LLVM  10.0.0svn
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
21 #include "llvm/ADT/Optional.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/Support/Debug.h"
37 
38 #define DEBUG_TYPE "aarch64-isel"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define GET_GLOBALISEL_PREDICATE_BITSET
45 #include "AArch64GenGlobalISel.inc"
46 #undef GET_GLOBALISEL_PREDICATE_BITSET
47 
48 class AArch64InstructionSelector : public InstructionSelector {
49 public:
50  AArch64InstructionSelector(const AArch64TargetMachine &TM,
51  const AArch64Subtarget &STI,
52  const AArch64RegisterBankInfo &RBI);
53 
54  bool select(MachineInstr &I) override;
55  static const char *getName() { return DEBUG_TYPE; }
56 
57  void setupMF(MachineFunction &MF, GISelKnownBits &KB,
58  CodeGenCoverage &CoverageInfo) override {
59  InstructionSelector::setupMF(MF, KB, CoverageInfo);
60 
61  // hasFnAttribute() is expensive to call on every BRCOND selection, so
62  // cache it here for each run of the selector.
63  ProduceNonFlagSettingCondBr =
64  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
65  }
66 
67 private:
68  /// tblgen-erated 'select' implementation, used as the initial selector for
69  /// the patterns that don't require complex C++.
70  bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
71 
72  // A lowering phase that runs before any selection attempts.
73 
74  void preISelLower(MachineInstr &I) const;
75 
76  // An early selection function that runs before the selectImpl() call.
77  bool earlySelect(MachineInstr &I) const;
78 
79  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
80 
81  /// Eliminate same-sized cross-bank copies into stores before selectImpl().
82  void contractCrossBankCopyIntoStore(MachineInstr &I,
83  MachineRegisterInfo &MRI) const;
84 
85  bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
86  MachineRegisterInfo &MRI) const;
87  bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
88  MachineRegisterInfo &MRI) const;
89 
90  bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
91  MachineRegisterInfo &MRI) const;
92 
93  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
94  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
95 
96  // Helper to generate an equivalent of scalar_to_vector into a new register,
97  // returned via 'Dst'.
98  MachineInstr *emitScalarToVector(unsigned EltSize,
99  const TargetRegisterClass *DstRC,
101  MachineIRBuilder &MIRBuilder) const;
102 
103  /// Emit a lane insert into \p DstReg, or a new vector register if None is
104  /// provided.
105  ///
106  /// The lane inserted into is defined by \p LaneIdx. The vector source
107  /// register is given by \p SrcReg. The register containing the element is
108  /// given by \p EltReg.
109  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
110  Register EltReg, unsigned LaneIdx,
111  const RegisterBank &RB,
112  MachineIRBuilder &MIRBuilder) const;
113  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
114  bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
117 
118  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
119  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
120  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
121  bool selectSplitVectorUnmerge(MachineInstr &I,
122  MachineRegisterInfo &MRI) const;
123  bool selectIntrinsicWithSideEffects(MachineInstr &I,
124  MachineRegisterInfo &MRI) const;
125  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
126  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
127  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
128  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
129  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
130  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
131  bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
132 
133  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
134  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
135  MachineIRBuilder &MIRBuilder) const;
136 
137  // Emit a vector concat operation.
138  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
139  Register Op2,
140  MachineIRBuilder &MIRBuilder) const;
141  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
143  MachineIRBuilder &MIRBuilder) const;
144  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
145  MachineIRBuilder &MIRBuilder) const;
146  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
147  MachineIRBuilder &MIRBuilder) const;
148  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
149  MachineIRBuilder &MIRBuilder) const;
150  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
151  const RegisterBank &DstRB, LLT ScalarTy,
152  Register VecReg, unsigned LaneIdx,
153  MachineIRBuilder &MIRBuilder) const;
154 
155  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
156  /// materialized using a FMOV instruction, then update MI and return it.
157  /// Otherwise, do nothing and return a nullptr.
158  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
159  MachineRegisterInfo &MRI) const;
160 
161  /// Emit a CSet for a compare.
162  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
163  MachineIRBuilder &MIRBuilder) const;
164 
165  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
166  // We use these manually instead of using the importer since it doesn't
167  // support SDNodeXForm.
168  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
169  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
170  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
171  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
172 
173  ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
174  ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
175  ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
176 
177  ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
178  unsigned Size) const;
179 
180  ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
181  return selectAddrModeUnscaled(Root, 1);
182  }
183  ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
184  return selectAddrModeUnscaled(Root, 2);
185  }
186  ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
187  return selectAddrModeUnscaled(Root, 4);
188  }
189  ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
190  return selectAddrModeUnscaled(Root, 8);
191  }
192  ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
193  return selectAddrModeUnscaled(Root, 16);
194  }
195 
196  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
197  unsigned Size) const;
198  template <int Width>
199  ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
200  return selectAddrModeIndexed(Root, Width / 8);
201  }
202 
203  bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
204  const MachineRegisterInfo &MRI) const;
205  ComplexRendererFns
206  selectAddrModeShiftedExtendXReg(MachineOperand &Root,
207  unsigned SizeInBytes) const;
208  ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
209  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
210  unsigned SizeInBytes) const;
211  template <int Width>
212  ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
213  return selectAddrModeXRO(Root, Width / 8);
214  }
215 
216  ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
217 
218  ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
219  return selectShiftedRegister(Root);
220  }
221 
222  ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
223  // TODO: selectShiftedRegister should allow for rotates on logical shifts.
224  // For now, make them the same. The only difference between the two is that
225  // logical shifts are allowed to fold in rotates. Otherwise, these are
226  // functionally the same.
227  return selectShiftedRegister(Root);
228  }
229 
230  /// Instructions that accept extend modifiers like UXTW expect the register
231  /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
232  /// subregister copy if necessary. Return either ExtReg, or the result of the
233  /// new copy.
234  Register narrowExtendRegIfNeeded(Register ExtReg,
235  MachineIRBuilder &MIB) const;
236  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
237 
238  void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
239  void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
240  void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
241 
242  // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
243  void materializeLargeCMVal(MachineInstr &I, const Value *V,
244  unsigned OpFlags) const;
245 
246  // Optimization methods.
247  bool tryOptVectorShuffle(MachineInstr &I) const;
248  bool tryOptVectorDup(MachineInstr &MI) const;
249  bool tryOptSelect(MachineInstr &MI) const;
250  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251  MachineOperand &Predicate,
252  MachineIRBuilder &MIRBuilder) const;
253 
254  /// Return true if \p MI is a load or store of \p NumBytes bytes.
255  bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
256 
257  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
258  /// register zeroed out. In other words, the result of MI has been explicitly
259  /// zero extended.
260  bool isDef32(const MachineInstr &MI) const;
261 
262  const AArch64TargetMachine &TM;
263  const AArch64Subtarget &STI;
264  const AArch64InstrInfo &TII;
265  const AArch64RegisterInfo &TRI;
266  const AArch64RegisterBankInfo &RBI;
267 
268  bool ProduceNonFlagSettingCondBr = false;
269 
270 #define GET_GLOBALISEL_PREDICATES_DECL
271 #include "AArch64GenGlobalISel.inc"
272 #undef GET_GLOBALISEL_PREDICATES_DECL
273 
274 // We declare the temporaries used by selectImpl() in the class to minimize the
275 // cost of constructing placeholder values.
276 #define GET_GLOBALISEL_TEMPORARIES_DECL
277 #include "AArch64GenGlobalISel.inc"
278 #undef GET_GLOBALISEL_TEMPORARIES_DECL
279 };
280 
281 } // end anonymous namespace
282 
283 #define GET_GLOBALISEL_IMPL
284 #include "AArch64GenGlobalISel.inc"
285 #undef GET_GLOBALISEL_IMPL
286 
287 AArch64InstructionSelector::AArch64InstructionSelector(
288  const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
289  const AArch64RegisterBankInfo &RBI)
290  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
291  TRI(*STI.getRegisterInfo()), RBI(RBI),
293 #include "AArch64GenGlobalISel.inc"
296 #include "AArch64GenGlobalISel.inc"
298 {
299 }
300 
301 // FIXME: This should be target-independent, inferred from the types declared
302 // for each class in the bank.
303 static const TargetRegisterClass *
304 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
305  const RegisterBankInfo &RBI,
306  bool GetAllRegSet = false) {
307  if (RB.getID() == AArch64::GPRRegBankID) {
308  if (Ty.getSizeInBits() <= 32)
309  return GetAllRegSet ? &AArch64::GPR32allRegClass
310  : &AArch64::GPR32RegClass;
311  if (Ty.getSizeInBits() == 64)
312  return GetAllRegSet ? &AArch64::GPR64allRegClass
313  : &AArch64::GPR64RegClass;
314  return nullptr;
315  }
316 
317  if (RB.getID() == AArch64::FPRRegBankID) {
318  if (Ty.getSizeInBits() <= 16)
319  return &AArch64::FPR16RegClass;
320  if (Ty.getSizeInBits() == 32)
321  return &AArch64::FPR32RegClass;
322  if (Ty.getSizeInBits() == 64)
323  return &AArch64::FPR64RegClass;
324  if (Ty.getSizeInBits() == 128)
325  return &AArch64::FPR128RegClass;
326  return nullptr;
327  }
328 
329  return nullptr;
330 }
331 
332 /// Given a register bank, and size in bits, return the smallest register class
333 /// that can represent that combination.
334 static const TargetRegisterClass *
335 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
336  bool GetAllRegSet = false) {
337  unsigned RegBankID = RB.getID();
338 
339  if (RegBankID == AArch64::GPRRegBankID) {
340  if (SizeInBits <= 32)
341  return GetAllRegSet ? &AArch64::GPR32allRegClass
342  : &AArch64::GPR32RegClass;
343  if (SizeInBits == 64)
344  return GetAllRegSet ? &AArch64::GPR64allRegClass
345  : &AArch64::GPR64RegClass;
346  }
347 
348  if (RegBankID == AArch64::FPRRegBankID) {
349  switch (SizeInBits) {
350  default:
351  return nullptr;
352  case 8:
353  return &AArch64::FPR8RegClass;
354  case 16:
355  return &AArch64::FPR16RegClass;
356  case 32:
357  return &AArch64::FPR32RegClass;
358  case 64:
359  return &AArch64::FPR64RegClass;
360  case 128:
361  return &AArch64::FPR128RegClass;
362  }
363  }
364 
365  return nullptr;
366 }
367 
368 /// Returns the correct subregister to use for a given register class.
370  const TargetRegisterInfo &TRI, unsigned &SubReg) {
371  switch (TRI.getRegSizeInBits(*RC)) {
372  case 8:
373  SubReg = AArch64::bsub;
374  break;
375  case 16:
376  SubReg = AArch64::hsub;
377  break;
378  case 32:
379  if (RC != &AArch64::FPR32RegClass)
380  SubReg = AArch64::sub_32;
381  else
382  SubReg = AArch64::ssub;
383  break;
384  case 64:
385  SubReg = AArch64::dsub;
386  break;
387  default:
388  LLVM_DEBUG(
389  dbgs() << "Couldn't find appropriate subregister for register class.");
390  return false;
391  }
392 
393  return true;
394 }
395 
396 /// Check whether \p I is a currently unsupported binary operation:
397 /// - it has an unsized type
398 /// - an operand is not a vreg
399 /// - all operands are not in the same bank
400 /// These are checks that should someday live in the verifier, but right now,
401 /// these are mostly limitations of the aarch64 selector.
402 static bool unsupportedBinOp(const MachineInstr &I,
403  const AArch64RegisterBankInfo &RBI,
404  const MachineRegisterInfo &MRI,
405  const AArch64RegisterInfo &TRI) {
406  LLT Ty = MRI.getType(I.getOperand(0).getReg());
407  if (!Ty.isValid()) {
408  LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
409  return true;
410  }
411 
412  const RegisterBank *PrevOpBank = nullptr;
413  for (auto &MO : I.operands()) {
414  // FIXME: Support non-register operands.
415  if (!MO.isReg()) {
416  LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
417  return true;
418  }
419 
420  // FIXME: Can generic operations have physical registers operands? If
421  // so, this will need to be taught about that, and we'll need to get the
422  // bank out of the minimal class for the register.
423  // Either way, this needs to be documented (and possibly verified).
424  if (!Register::isVirtualRegister(MO.getReg())) {
425  LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
426  return true;
427  }
428 
429  const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
430  if (!OpBank) {
431  LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
432  return true;
433  }
434 
435  if (PrevOpBank && OpBank != PrevOpBank) {
436  LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
437  return true;
438  }
439  PrevOpBank = OpBank;
440  }
441  return false;
442 }
443 
444 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
445 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
446 /// and of size \p OpSize.
447 /// \returns \p GenericOpc if the combination is unsupported.
448 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
449  unsigned OpSize) {
450  switch (RegBankID) {
451  case AArch64::GPRRegBankID:
452  if (OpSize == 32) {
453  switch (GenericOpc) {
454  case TargetOpcode::G_SHL:
455  return AArch64::LSLVWr;
456  case TargetOpcode::G_LSHR:
457  return AArch64::LSRVWr;
458  case TargetOpcode::G_ASHR:
459  return AArch64::ASRVWr;
460  default:
461  return GenericOpc;
462  }
463  } else if (OpSize == 64) {
464  switch (GenericOpc) {
465  case TargetOpcode::G_GEP:
466  return AArch64::ADDXrr;
467  case TargetOpcode::G_SHL:
468  return AArch64::LSLVXr;
469  case TargetOpcode::G_LSHR:
470  return AArch64::LSRVXr;
471  case TargetOpcode::G_ASHR:
472  return AArch64::ASRVXr;
473  default:
474  return GenericOpc;
475  }
476  }
477  break;
478  case AArch64::FPRRegBankID:
479  switch (OpSize) {
480  case 32:
481  switch (GenericOpc) {
482  case TargetOpcode::G_FADD:
483  return AArch64::FADDSrr;
484  case TargetOpcode::G_FSUB:
485  return AArch64::FSUBSrr;
486  case TargetOpcode::G_FMUL:
487  return AArch64::FMULSrr;
488  case TargetOpcode::G_FDIV:
489  return AArch64::FDIVSrr;
490  default:
491  return GenericOpc;
492  }
493  case 64:
494  switch (GenericOpc) {
495  case TargetOpcode::G_FADD:
496  return AArch64::FADDDrr;
497  case TargetOpcode::G_FSUB:
498  return AArch64::FSUBDrr;
499  case TargetOpcode::G_FMUL:
500  return AArch64::FMULDrr;
501  case TargetOpcode::G_FDIV:
502  return AArch64::FDIVDrr;
503  case TargetOpcode::G_OR:
504  return AArch64::ORRv8i8;
505  default:
506  return GenericOpc;
507  }
508  }
509  break;
510  }
511  return GenericOpc;
512 }
513 
514 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
515 /// appropriate for the (value) register bank \p RegBankID and of memory access
516 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
517 /// addressing mode (e.g., LDRXui).
518 /// \returns \p GenericOpc if the combination is unsupported.
519 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
520  unsigned OpSize) {
521  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
522  switch (RegBankID) {
523  case AArch64::GPRRegBankID:
524  switch (OpSize) {
525  case 8:
526  return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
527  case 16:
528  return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
529  case 32:
530  return isStore ? AArch64::STRWui : AArch64::LDRWui;
531  case 64:
532  return isStore ? AArch64::STRXui : AArch64::LDRXui;
533  }
534  break;
535  case AArch64::FPRRegBankID:
536  switch (OpSize) {
537  case 8:
538  return isStore ? AArch64::STRBui : AArch64::LDRBui;
539  case 16:
540  return isStore ? AArch64::STRHui : AArch64::LDRHui;
541  case 32:
542  return isStore ? AArch64::STRSui : AArch64::LDRSui;
543  case 64:
544  return isStore ? AArch64::STRDui : AArch64::LDRDui;
545  }
546  break;
547  }
548  return GenericOpc;
549 }
550 
551 #ifndef NDEBUG
552 /// Helper function that verifies that we have a valid copy at the end of
553 /// selectCopy. Verifies that the source and dest have the expected sizes and
554 /// then returns true.
555 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
556  const MachineRegisterInfo &MRI,
557  const TargetRegisterInfo &TRI,
558  const RegisterBankInfo &RBI) {
559  const Register DstReg = I.getOperand(0).getReg();
560  const Register SrcReg = I.getOperand(1).getReg();
561  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
562  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
563 
564  // Make sure the size of the source and dest line up.
565  assert(
566  (DstSize == SrcSize ||
567  // Copies are a mean to setup initial types, the number of
568  // bits may not exactly match.
569  (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
570  // Copies are a mean to copy bits around, as long as we are
571  // on the same register class, that's fine. Otherwise, that
572  // means we need some SUBREG_TO_REG or AND & co.
573  (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
574  "Copy with different width?!");
575 
576  // Check the size of the destination.
577  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
578  "GPRs cannot get more than 64-bit width values");
579 
580  return true;
581 }
582 #endif
583 
584 /// Helper function for selectCopy. Inserts a subregister copy from
585 /// \p *From to \p *To, linking it up to \p I.
586 ///
587 /// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
588 ///
589 /// CopyReg (From class) = COPY SrcReg
590 /// SubRegCopy (To class) = COPY CopyReg:SubReg
591 /// Dst = COPY SubRegCopy
593  const RegisterBankInfo &RBI, Register SrcReg,
594  const TargetRegisterClass *From,
595  const TargetRegisterClass *To,
596  unsigned SubReg) {
597  MachineIRBuilder MIB(I);
598  auto Copy = MIB.buildCopy({From}, {SrcReg});
599  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
600  .addReg(Copy.getReg(0), 0, SubReg);
601  MachineOperand &RegOp = I.getOperand(1);
602  RegOp.setReg(SubRegCopy.getReg(0));
603 
604  // It's possible that the destination register won't be constrained. Make
605  // sure that happens.
606  if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
607  RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
608 
609  return true;
610 }
611 
612 /// Helper function to get the source and destination register classes for a
613 /// copy. Returns a std::pair containing the source register class for the
614 /// copy, and the destination register class for the copy. If a register class
615 /// cannot be determined, then it will be nullptr.
616 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
619  const RegisterBankInfo &RBI) {
620  Register DstReg = I.getOperand(0).getReg();
621  Register SrcReg = I.getOperand(1).getReg();
622  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
623  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
624  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
625  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
626 
627  // Special casing for cross-bank copies of s1s. We can technically represent
628  // a 1-bit value with any size of register. The minimum size for a GPR is 32
629  // bits. So, we need to put the FPR on 32 bits as well.
630  //
631  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
632  // then we can pull it into the helpers that get the appropriate class for a
633  // register bank. Or make a new helper that carries along some constraint
634  // information.
635  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
636  SrcSize = DstSize = 32;
637 
638  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
639  getMinClassForRegBank(DstRegBank, DstSize, true)};
640 }
641 
644  const RegisterBankInfo &RBI) {
645 
646  Register DstReg = I.getOperand(0).getReg();
647  Register SrcReg = I.getOperand(1).getReg();
648  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
649  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
650 
651  // Find the correct register classes for the source and destination registers.
652  const TargetRegisterClass *SrcRC;
653  const TargetRegisterClass *DstRC;
654  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
655 
656  if (!DstRC) {
657  LLVM_DEBUG(dbgs() << "Unexpected dest size "
658  << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
659  return false;
660  }
661 
662  // A couple helpers below, for making sure that the copy we produce is valid.
663 
664  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
665  // to verify that the src and dst are the same size, since that's handled by
666  // the SUBREG_TO_REG.
667  bool KnownValid = false;
668 
669  // Returns true, or asserts if something we don't expect happens. Instead of
670  // returning true, we return isValidCopy() to ensure that we verify the
671  // result.
672  auto CheckCopy = [&]() {
673  // If we have a bitcast or something, we can't have physical registers.
674  assert((I.isCopy() ||
675  (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
676  !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
677  "No phys reg on generic operator!");
678  assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
679  (void)KnownValid;
680  return true;
681  };
682 
683  // Is this a copy? If so, then we may need to insert a subregister copy, or
684  // a SUBREG_TO_REG.
685  if (I.isCopy()) {
686  // Yes. Check if there's anything to fix up.
687  if (!SrcRC) {
688  LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
689  return false;
690  }
691 
692  unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
693  unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
694 
695  // If we're doing a cross-bank copy on different-sized registers, we need
696  // to do a bit more work.
697  if (SrcSize > DstSize) {
698  // We're doing a cross-bank copy into a smaller register. We need a
699  // subregister copy. First, get a register class that's on the same bank
700  // as the destination, but the same size as the source.
701  const TargetRegisterClass *SubregRC =
702  getMinClassForRegBank(DstRegBank, SrcSize, true);
703  assert(SubregRC && "Didn't get a register class for subreg?");
704 
705  // Get the appropriate subregister for the destination.
706  unsigned SubReg = 0;
707  if (!getSubRegForClass(DstRC, TRI, SubReg)) {
708  LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
709  return false;
710  }
711 
712  // Now, insert a subregister copy using the new register class.
713  selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
714  return CheckCopy();
715  }
716 
717  // Is this a cross-bank copy?
718  if (DstRegBank.getID() != SrcRegBank.getID()) {
719  if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
720  SrcSize == 16) {
721  // Special case for FPR16 to GPR32.
722  // FIXME: This can probably be generalized like the above case.
723  Register PromoteReg =
724  MRI.createVirtualRegister(&AArch64::FPR32RegClass);
725  BuildMI(*I.getParent(), I, I.getDebugLoc(),
726  TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
727  .addImm(0)
728  .addUse(SrcReg)
729  .addImm(AArch64::hsub);
730  MachineOperand &RegOp = I.getOperand(1);
731  RegOp.setReg(PromoteReg);
732 
733  // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
734  KnownValid = true;
735  }
736  }
737 
738  // If the destination is a physical register, then there's nothing to
739  // change, so we're done.
740  if (Register::isPhysicalRegister(DstReg))
741  return CheckCopy();
742  }
743 
744  // No need to constrain SrcReg. It will get constrained when we hit another
745  // of its use or its defs. Copies do not have constraints.
746  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
747  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
748  << " operand\n");
749  return false;
750  }
751  I.setDesc(TII.get(AArch64::COPY));
752  return CheckCopy();
753 }
754 
755 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
756  if (!DstTy.isScalar() || !SrcTy.isScalar())
757  return GenericOpc;
758 
759  const unsigned DstSize = DstTy.getSizeInBits();
760  const unsigned SrcSize = SrcTy.getSizeInBits();
761 
762  switch (DstSize) {
763  case 32:
764  switch (SrcSize) {
765  case 32:
766  switch (GenericOpc) {
767  case TargetOpcode::G_SITOFP:
768  return AArch64::SCVTFUWSri;
769  case TargetOpcode::G_UITOFP:
770  return AArch64::UCVTFUWSri;
771  case TargetOpcode::G_FPTOSI:
772  return AArch64::FCVTZSUWSr;
773  case TargetOpcode::G_FPTOUI:
774  return AArch64::FCVTZUUWSr;
775  default:
776  return GenericOpc;
777  }
778  case 64:
779  switch (GenericOpc) {
780  case TargetOpcode::G_SITOFP:
781  return AArch64::SCVTFUXSri;
782  case TargetOpcode::G_UITOFP:
783  return AArch64::UCVTFUXSri;
784  case TargetOpcode::G_FPTOSI:
785  return AArch64::FCVTZSUWDr;
786  case TargetOpcode::G_FPTOUI:
787  return AArch64::FCVTZUUWDr;
788  default:
789  return GenericOpc;
790  }
791  default:
792  return GenericOpc;
793  }
794  case 64:
795  switch (SrcSize) {
796  case 32:
797  switch (GenericOpc) {
798  case TargetOpcode::G_SITOFP:
799  return AArch64::SCVTFUWDri;
800  case TargetOpcode::G_UITOFP:
801  return AArch64::UCVTFUWDri;
802  case TargetOpcode::G_FPTOSI:
803  return AArch64::FCVTZSUXSr;
804  case TargetOpcode::G_FPTOUI:
805  return AArch64::FCVTZUUXSr;
806  default:
807  return GenericOpc;
808  }
809  case 64:
810  switch (GenericOpc) {
811  case TargetOpcode::G_SITOFP:
812  return AArch64::SCVTFUXDri;
813  case TargetOpcode::G_UITOFP:
814  return AArch64::UCVTFUXDri;
815  case TargetOpcode::G_FPTOSI:
816  return AArch64::FCVTZSUXDr;
817  case TargetOpcode::G_FPTOUI:
818  return AArch64::FCVTZUUXDr;
819  default:
820  return GenericOpc;
821  }
822  default:
823  return GenericOpc;
824  }
825  default:
826  return GenericOpc;
827  };
828  return GenericOpc;
829 }
830 
832  const RegisterBankInfo &RBI) {
834  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
835  AArch64::GPRRegBankID);
836  LLT Ty = MRI.getType(I.getOperand(0).getReg());
837  if (Ty == LLT::scalar(32))
838  return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
839  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
840  return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
841  return 0;
842 }
843 
844 /// Helper function to select the opcode for a G_FCMP.
846  // If this is a compare against +0.0, then we don't have to explicitly
847  // materialize a constant.
848  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
849  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
850  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
851  if (OpSize != 32 && OpSize != 64)
852  return 0;
853  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
854  {AArch64::FCMPSri, AArch64::FCMPDri}};
855  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
856 }
857 
858 /// Returns true if \p P is an unsigned integer comparison predicate.
860  switch (P) {
861  default:
862  return false;
863  case CmpInst::ICMP_UGT:
864  case CmpInst::ICMP_UGE:
865  case CmpInst::ICMP_ULT:
866  case CmpInst::ICMP_ULE:
867  return true;
868  }
869 }
870 
872  switch (P) {
873  default:
874  llvm_unreachable("Unknown condition code!");
875  case CmpInst::ICMP_NE:
876  return AArch64CC::NE;
877  case CmpInst::ICMP_EQ:
878  return AArch64CC::EQ;
879  case CmpInst::ICMP_SGT:
880  return AArch64CC::GT;
881  case CmpInst::ICMP_SGE:
882  return AArch64CC::GE;
883  case CmpInst::ICMP_SLT:
884  return AArch64CC::LT;
885  case CmpInst::ICMP_SLE:
886  return AArch64CC::LE;
887  case CmpInst::ICMP_UGT:
888  return AArch64CC::HI;
889  case CmpInst::ICMP_UGE:
890  return AArch64CC::HS;
891  case CmpInst::ICMP_ULT:
892  return AArch64CC::LO;
893  case CmpInst::ICMP_ULE:
894  return AArch64CC::LS;
895  }
896 }
897 
900  AArch64CC::CondCode &CondCode2) {
901  CondCode2 = AArch64CC::AL;
902  switch (P) {
903  default:
904  llvm_unreachable("Unknown FP condition!");
905  case CmpInst::FCMP_OEQ:
906  CondCode = AArch64CC::EQ;
907  break;
908  case CmpInst::FCMP_OGT:
909  CondCode = AArch64CC::GT;
910  break;
911  case CmpInst::FCMP_OGE:
912  CondCode = AArch64CC::GE;
913  break;
914  case CmpInst::FCMP_OLT:
915  CondCode = AArch64CC::MI;
916  break;
917  case CmpInst::FCMP_OLE:
918  CondCode = AArch64CC::LS;
919  break;
920  case CmpInst::FCMP_ONE:
921  CondCode = AArch64CC::MI;
922  CondCode2 = AArch64CC::GT;
923  break;
924  case CmpInst::FCMP_ORD:
925  CondCode = AArch64CC::VC;
926  break;
927  case CmpInst::FCMP_UNO:
928  CondCode = AArch64CC::VS;
929  break;
930  case CmpInst::FCMP_UEQ:
931  CondCode = AArch64CC::EQ;
932  CondCode2 = AArch64CC::VS;
933  break;
934  case CmpInst::FCMP_UGT:
935  CondCode = AArch64CC::HI;
936  break;
937  case CmpInst::FCMP_UGE:
938  CondCode = AArch64CC::PL;
939  break;
940  case CmpInst::FCMP_ULT:
941  CondCode = AArch64CC::LT;
942  break;
943  case CmpInst::FCMP_ULE:
944  CondCode = AArch64CC::LE;
945  break;
946  case CmpInst::FCMP_UNE:
947  CondCode = AArch64CC::NE;
948  break;
949  }
950 }
951 
952 bool AArch64InstructionSelector::selectCompareBranch(
954 
955  const Register CondReg = I.getOperand(0).getReg();
956  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
957  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
958  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
959  CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
960  if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
961  return false;
962 
963  Register LHS = CCMI->getOperand(2).getReg();
964  Register RHS = CCMI->getOperand(3).getReg();
965  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
966  if (!VRegAndVal)
967  std::swap(RHS, LHS);
968 
969  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
970  if (!VRegAndVal || VRegAndVal->Value != 0) {
971  MachineIRBuilder MIB(I);
972  // If we can't select a CBZ then emit a cmp + Bcc.
973  if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
974  CCMI->getOperand(1), MIB))
975  return false;
978  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
979  I.eraseFromParent();
980  return true;
981  }
982 
983  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
984  if (RB.getID() != AArch64::GPRRegBankID)
985  return false;
986 
987  const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
988  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
989  return false;
990 
991  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
992  unsigned CBOpc = 0;
993  if (CmpWidth <= 32)
994  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
995  else if (CmpWidth == 64)
996  CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
997  else
998  return false;
999 
1000  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1001  .addUse(LHS)
1002  .addMBB(DestMBB)
1003  .constrainAllUses(TII, TRI, RBI);
1004 
1005  I.eraseFromParent();
1006  return true;
1007 }
1008 
1009 bool AArch64InstructionSelector::selectVectorSHL(
1010  MachineInstr &I, MachineRegisterInfo &MRI) const {
1011  assert(I.getOpcode() == TargetOpcode::G_SHL);
1012  Register DstReg = I.getOperand(0).getReg();
1013  const LLT Ty = MRI.getType(DstReg);
1014  Register Src1Reg = I.getOperand(1).getReg();
1015  Register Src2Reg = I.getOperand(2).getReg();
1016 
1017  if (!Ty.isVector())
1018  return false;
1019 
1020  unsigned Opc = 0;
1021  if (Ty == LLT::vector(2, 64)) {
1022  Opc = AArch64::USHLv2i64;
1023  } else if (Ty == LLT::vector(4, 32)) {
1024  Opc = AArch64::USHLv4i32;
1025  } else if (Ty == LLT::vector(2, 32)) {
1026  Opc = AArch64::USHLv2i32;
1027  } else {
1028  LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1029  return false;
1030  }
1031 
1032  MachineIRBuilder MIB(I);
1033  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1035  I.eraseFromParent();
1036  return true;
1037 }
1038 
1039 bool AArch64InstructionSelector::selectVectorASHR(
1040  MachineInstr &I, MachineRegisterInfo &MRI) const {
1041  assert(I.getOpcode() == TargetOpcode::G_ASHR);
1042  Register DstReg = I.getOperand(0).getReg();
1043  const LLT Ty = MRI.getType(DstReg);
1044  Register Src1Reg = I.getOperand(1).getReg();
1045  Register Src2Reg = I.getOperand(2).getReg();
1046 
1047  if (!Ty.isVector())
1048  return false;
1049 
1050  // There is not a shift right register instruction, but the shift left
1051  // register instruction takes a signed value, where negative numbers specify a
1052  // right shift.
1053 
1054  unsigned Opc = 0;
1055  unsigned NegOpc = 0;
1056  const TargetRegisterClass *RC = nullptr;
1057  if (Ty == LLT::vector(2, 64)) {
1058  Opc = AArch64::SSHLv2i64;
1059  NegOpc = AArch64::NEGv2i64;
1060  RC = &AArch64::FPR128RegClass;
1061  } else if (Ty == LLT::vector(4, 32)) {
1062  Opc = AArch64::SSHLv4i32;
1063  NegOpc = AArch64::NEGv4i32;
1064  RC = &AArch64::FPR128RegClass;
1065  } else if (Ty == LLT::vector(2, 32)) {
1066  Opc = AArch64::SSHLv2i32;
1067  NegOpc = AArch64::NEGv2i32;
1068  RC = &AArch64::FPR64RegClass;
1069  } else {
1070  LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1071  return false;
1072  }
1073 
1074  MachineIRBuilder MIB(I);
1075  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1077  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1079  I.eraseFromParent();
1080  return true;
1081 }
1082 
1083 bool AArch64InstructionSelector::selectVaStartAAPCS(
1084  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1085  return false;
1086 }
1087 
1088 bool AArch64InstructionSelector::selectVaStartDarwin(
1089  MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1091  Register ListReg = I.getOperand(0).getReg();
1092 
1093  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1094 
1095  auto MIB =
1096  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1097  .addDef(ArgsAddrReg)
1098  .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1099  .addImm(0)
1100  .addImm(0);
1101 
1103 
1104  MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1105  .addUse(ArgsAddrReg)
1106  .addUse(ListReg)
1107  .addImm(0)
1109 
1111  I.eraseFromParent();
1112  return true;
1113 }
1114 
1115 void AArch64InstructionSelector::materializeLargeCMVal(
1116  MachineInstr &I, const Value *V, unsigned OpFlags) const {
1117  MachineBasicBlock &MBB = *I.getParent();
1118  MachineFunction &MF = *MBB.getParent();
1119  MachineRegisterInfo &MRI = MF.getRegInfo();
1120  MachineIRBuilder MIB(I);
1121 
1122  auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1123  MovZ->addOperand(MF, I.getOperand(1));
1124  MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1126  MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1128 
1129  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1130  Register ForceDstReg) {
1131  Register DstReg = ForceDstReg
1132  ? ForceDstReg
1133  : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1134  auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1135  if (auto *GV = dyn_cast<GlobalValue>(V)) {
1136  MovI->addOperand(MF, MachineOperand::CreateGA(
1137  GV, MovZ->getOperand(1).getOffset(), Flags));
1138  } else {
1139  MovI->addOperand(
1140  MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1141  MovZ->getOperand(1).getOffset(), Flags));
1142  }
1143  MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1145  return DstReg;
1146  };
1147  Register DstReg = BuildMovK(MovZ.getReg(0),
1149  DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1150  BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1151  return;
1152 }
1153 
1154 void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1155  MachineBasicBlock &MBB = *I.getParent();
1156  MachineFunction &MF = *MBB.getParent();
1157  MachineRegisterInfo &MRI = MF.getRegInfo();
1158 
1159  switch (I.getOpcode()) {
1160  case TargetOpcode::G_SHL:
1161  case TargetOpcode::G_ASHR:
1162  case TargetOpcode::G_LSHR: {
1163  // These shifts are legalized to have 64 bit shift amounts because we want
1164  // to take advantage of the existing imported selection patterns that assume
1165  // the immediates are s64s. However, if the shifted type is 32 bits and for
1166  // some reason we receive input GMIR that has an s64 shift amount that's not
1167  // a G_CONSTANT, insert a truncate so that we can still select the s32
1168  // register-register variant.
1169  Register SrcReg = I.getOperand(1).getReg();
1170  Register ShiftReg = I.getOperand(2).getReg();
1171  const LLT ShiftTy = MRI.getType(ShiftReg);
1172  const LLT SrcTy = MRI.getType(SrcReg);
1173  if (SrcTy.isVector())
1174  return;
1175  assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1176  if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1177  return;
1178  auto *AmtMI = MRI.getVRegDef(ShiftReg);
1179  assert(AmtMI && "could not find a vreg definition for shift amount");
1180  if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1181  // Insert a subregister copy to implement a 64->32 trunc
1182  MachineIRBuilder MIB(I);
1183  auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1184  .addReg(ShiftReg, 0, AArch64::sub_32);
1185  MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1186  I.getOperand(2).setReg(Trunc.getReg(0));
1187  }
1188  return;
1189  }
1190  case TargetOpcode::G_STORE:
1191  contractCrossBankCopyIntoStore(I, MRI);
1192  return;
1193  default:
1194  return;
1195  }
1196 }
1197 
1198 bool AArch64InstructionSelector::earlySelectSHL(
1199  MachineInstr &I, MachineRegisterInfo &MRI) const {
1200  // We try to match the immediate variant of LSL, which is actually an alias
1201  // for a special case of UBFM. Otherwise, we fall back to the imported
1202  // selector which will match the register variant.
1203  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1204  const auto &MO = I.getOperand(2);
1205  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1206  if (!VRegAndVal)
1207  return false;
1208 
1209  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1210  if (DstTy.isVector())
1211  return false;
1212  bool Is64Bit = DstTy.getSizeInBits() == 64;
1213  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1214  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1215  MachineIRBuilder MIB(I);
1216 
1217  if (!Imm1Fn || !Imm2Fn)
1218  return false;
1219 
1220  auto NewI =
1221  MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1222  {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1223 
1224  for (auto &RenderFn : *Imm1Fn)
1225  RenderFn(NewI);
1226  for (auto &RenderFn : *Imm2Fn)
1227  RenderFn(NewI);
1228 
1229  I.eraseFromParent();
1230  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1231 }
1232 
1233 void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1234  MachineInstr &I, MachineRegisterInfo &MRI) const {
1235  assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1236  // If we're storing a scalar, it doesn't matter what register bank that
1237  // scalar is on. All that matters is the size.
1238  //
1239  // So, if we see something like this (with a 32-bit scalar as an example):
1240  //
1241  // %x:gpr(s32) = ... something ...
1242  // %y:fpr(s32) = COPY %x:gpr(s32)
1243  // G_STORE %y:fpr(s32)
1244  //
1245  // We can fix this up into something like this:
1246  //
1247  // G_STORE %x:gpr(s32)
1248  //
1249  // And then continue the selection process normally.
1251  if (!Def)
1252  return;
1253  Register DefDstReg = Def->getOperand(0).getReg();
1254  LLT DefDstTy = MRI.getType(DefDstReg);
1255  Register StoreSrcReg = I.getOperand(0).getReg();
1256  LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1257 
1258  // If we get something strange like a physical register, then we shouldn't
1259  // go any further.
1260  if (!DefDstTy.isValid())
1261  return;
1262 
1263  // Are the source and dst types the same size?
1264  if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1265  return;
1266 
1267  if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1268  RBI.getRegBank(DefDstReg, MRI, TRI))
1269  return;
1270 
1271  // We have a cross-bank copy, which is entering a store. Let's fold it.
1272  I.getOperand(0).setReg(DefDstReg);
1273 }
1274 
1275 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1276  assert(I.getParent() && "Instruction should be in a basic block!");
1277  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1278 
1279  MachineBasicBlock &MBB = *I.getParent();
1280  MachineFunction &MF = *MBB.getParent();
1281  MachineRegisterInfo &MRI = MF.getRegInfo();
1282 
1283  switch (I.getOpcode()) {
1284  case TargetOpcode::G_SHL:
1285  return earlySelectSHL(I, MRI);
1286  case TargetOpcode::G_CONSTANT: {
1287  bool IsZero = false;
1288  if (I.getOperand(1).isCImm())
1289  IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1290  else if (I.getOperand(1).isImm())
1291  IsZero = I.getOperand(1).getImm() == 0;
1292 
1293  if (!IsZero)
1294  return false;
1295 
1296  Register DefReg = I.getOperand(0).getReg();
1297  LLT Ty = MRI.getType(DefReg);
1298  if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1299  return false;
1300 
1301  if (Ty == LLT::scalar(64)) {
1302  I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1303  RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1304  } else {
1305  I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1306  RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1307  }
1308  I.setDesc(TII.get(TargetOpcode::COPY));
1309  return true;
1310  }
1311  default:
1312  return false;
1313  }
1314 }
1315 
1316 bool AArch64InstructionSelector::select(MachineInstr &I) {
1317  assert(I.getParent() && "Instruction should be in a basic block!");
1318  assert(I.getParent()->getParent() && "Instruction should be in a function!");
1319 
1320  MachineBasicBlock &MBB = *I.getParent();
1321  MachineFunction &MF = *MBB.getParent();
1322  MachineRegisterInfo &MRI = MF.getRegInfo();
1323 
1324  unsigned Opcode = I.getOpcode();
1325  // G_PHI requires same handling as PHI
1326  if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
1327  // Certain non-generic instructions also need some special handling.
1328 
1329  if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1330  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1331 
1332  if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1333  const Register DefReg = I.getOperand(0).getReg();
1334  const LLT DefTy = MRI.getType(DefReg);
1335 
1336  const RegClassOrRegBank &RegClassOrBank =
1337  MRI.getRegClassOrRegBank(DefReg);
1338 
1339  const TargetRegisterClass *DefRC
1340  = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1341  if (!DefRC) {
1342  if (!DefTy.isValid()) {
1343  LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1344  return false;
1345  }
1346  const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1347  DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1348  if (!DefRC) {
1349  LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1350  return false;
1351  }
1352  }
1353 
1354  I.setDesc(TII.get(TargetOpcode::PHI));
1355 
1356  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1357  }
1358 
1359  if (I.isCopy())
1360  return selectCopy(I, TII, MRI, TRI, RBI);
1361 
1362  return true;
1363  }
1364 
1365 
1366  if (I.getNumOperands() != I.getNumExplicitOperands()) {
1367  LLVM_DEBUG(
1368  dbgs() << "Generic instruction has unexpected implicit operands\n");
1369  return false;
1370  }
1371 
1372  // Try to do some lowering before we start instruction selecting. These
1373  // lowerings are purely transformations on the input G_MIR and so selection
1374  // must continue after any modification of the instruction.
1375  preISelLower(I);
1376 
1377  // There may be patterns where the importer can't deal with them optimally,
1378  // but does select it to a suboptimal sequence so our custom C++ selection
1379  // code later never has a chance to work on it. Therefore, we have an early
1380  // selection attempt here to give priority to certain selection routines
1381  // over the imported ones.
1382  if (earlySelect(I))
1383  return true;
1384 
1385  if (selectImpl(I, *CoverageInfo))
1386  return true;
1387 
1388  LLT Ty =
1389  I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1390 
1391  MachineIRBuilder MIB(I);
1392 
1393  switch (Opcode) {
1394  case TargetOpcode::G_BRCOND: {
1395  if (Ty.getSizeInBits() > 32) {
1396  // We shouldn't need this on AArch64, but it would be implemented as an
1397  // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1398  // bit being tested is < 32.
1399  LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1400  << ", expected at most 32-bits");
1401  return false;
1402  }
1403 
1404  const Register CondReg = I.getOperand(0).getReg();
1405  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1406 
1407  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1408  // instructions will not be produced, as they are conditional branch
1409  // instructions that do not set flags.
1410  bool ProduceNonFlagSettingCondBr =
1411  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1412  if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1413  return true;
1414 
1415  if (ProduceNonFlagSettingCondBr) {
1416  auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1417  .addUse(CondReg)
1418  .addImm(/*bit offset=*/0)
1419  .addMBB(DestMBB);
1420 
1421  I.eraseFromParent();
1422  return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1423  } else {
1424  auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1425  .addDef(AArch64::WZR)
1426  .addUse(CondReg)
1427  .addImm(1);
1428  constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1429  auto Bcc =
1430  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1431  .addImm(AArch64CC::EQ)
1432  .addMBB(DestMBB);
1433 
1434  I.eraseFromParent();
1435  return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1436  }
1437  }
1438 
1439  case TargetOpcode::G_BRINDIRECT: {
1440  I.setDesc(TII.get(AArch64::BR));
1441  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1442  }
1443 
1444  case TargetOpcode::G_BRJT:
1445  return selectBrJT(I, MRI);
1446 
1447  case TargetOpcode::G_BSWAP: {
1448  // Handle vector types for G_BSWAP directly.
1449  Register DstReg = I.getOperand(0).getReg();
1450  LLT DstTy = MRI.getType(DstReg);
1451 
1452  // We should only get vector types here; everything else is handled by the
1453  // importer right now.
1454  if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1455  LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1456  return false;
1457  }
1458 
1459  // Only handle 4 and 2 element vectors for now.
1460  // TODO: 16-bit elements.
1461  unsigned NumElts = DstTy.getNumElements();
1462  if (NumElts != 4 && NumElts != 2) {
1463  LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1464  return false;
1465  }
1466 
1467  // Choose the correct opcode for the supported types. Right now, that's
1468  // v2s32, v4s32, and v2s64.
1469  unsigned Opc = 0;
1470  unsigned EltSize = DstTy.getElementType().getSizeInBits();
1471  if (EltSize == 32)
1472  Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1473  : AArch64::REV32v16i8;
1474  else if (EltSize == 64)
1475  Opc = AArch64::REV64v16i8;
1476 
1477  // We should always get something by the time we get here...
1478  assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1479 
1480  I.setDesc(TII.get(Opc));
1481  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1482  }
1483 
1484  case TargetOpcode::G_FCONSTANT:
1485  case TargetOpcode::G_CONSTANT: {
1486  const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1487 
1488  const LLT s8 = LLT::scalar(8);
1489  const LLT s16 = LLT::scalar(16);
1490  const LLT s32 = LLT::scalar(32);
1491  const LLT s64 = LLT::scalar(64);
1492  const LLT p0 = LLT::pointer(0, 64);
1493 
1494  const Register DefReg = I.getOperand(0).getReg();
1495  const LLT DefTy = MRI.getType(DefReg);
1496  const unsigned DefSize = DefTy.getSizeInBits();
1497  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1498 
1499  // FIXME: Redundant check, but even less readable when factored out.
1500  if (isFP) {
1501  if (Ty != s32 && Ty != s64) {
1502  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1503  << " constant, expected: " << s32 << " or " << s64
1504  << '\n');
1505  return false;
1506  }
1507 
1508  if (RB.getID() != AArch64::FPRRegBankID) {
1509  LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1510  << " constant on bank: " << RB
1511  << ", expected: FPR\n");
1512  return false;
1513  }
1514 
1515  // The case when we have 0.0 is covered by tablegen. Reject it here so we
1516  // can be sure tablegen works correctly and isn't rescued by this code.
1517  if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1518  return false;
1519  } else {
1520  // s32 and s64 are covered by tablegen.
1521  if (Ty != p0 && Ty != s8 && Ty != s16) {
1522  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1523  << " constant, expected: " << s32 << ", " << s64
1524  << ", or " << p0 << '\n');
1525  return false;
1526  }
1527 
1528  if (RB.getID() != AArch64::GPRRegBankID) {
1529  LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1530  << " constant on bank: " << RB
1531  << ", expected: GPR\n");
1532  return false;
1533  }
1534  }
1535 
1536  // We allow G_CONSTANT of types < 32b.
1537  const unsigned MovOpc =
1538  DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
1539 
1540  if (isFP) {
1541  // Either emit a FMOV, or emit a copy to emit a normal mov.
1542  const TargetRegisterClass &GPRRC =
1543  DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1544  const TargetRegisterClass &FPRRC =
1545  DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1546 
1547  // Can we use a FMOV instruction to represent the immediate?
1548  if (emitFMovForFConstant(I, MRI))
1549  return true;
1550 
1551  // Nope. Emit a copy and use a normal mov instead.
1552  const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1553  MachineOperand &RegOp = I.getOperand(0);
1554  RegOp.setReg(DefGPRReg);
1555  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1556  MIB.buildCopy({DefReg}, {DefGPRReg});
1557 
1558  if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
1559  LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
1560  return false;
1561  }
1562 
1563  MachineOperand &ImmOp = I.getOperand(1);
1564  // FIXME: Is going through int64_t always correct?
1565  ImmOp.ChangeToImmediate(
1567  } else if (I.getOperand(1).isCImm()) {
1568  uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1569  I.getOperand(1).ChangeToImmediate(Val);
1570  } else if (I.getOperand(1).isImm()) {
1571  uint64_t Val = I.getOperand(1).getImm();
1572  I.getOperand(1).ChangeToImmediate(Val);
1573  }
1574 
1575  I.setDesc(TII.get(MovOpc));
1577  return true;
1578  }
1579  case TargetOpcode::G_EXTRACT: {
1580  Register DstReg = I.getOperand(0).getReg();
1581  Register SrcReg = I.getOperand(1).getReg();
1582  LLT SrcTy = MRI.getType(SrcReg);
1583  LLT DstTy = MRI.getType(DstReg);
1584  (void)DstTy;
1585  unsigned SrcSize = SrcTy.getSizeInBits();
1586 
1587  if (SrcTy.getSizeInBits() > 64) {
1588  // This should be an extract of an s128, which is like a vector extract.
1589  if (SrcTy.getSizeInBits() != 128)
1590  return false;
1591  // Only support extracting 64 bits from an s128 at the moment.
1592  if (DstTy.getSizeInBits() != 64)
1593  return false;
1594 
1595  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1596  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1597  // Check we have the right regbank always.
1598  assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1599  DstRB.getID() == AArch64::FPRRegBankID &&
1600  "Wrong extract regbank!");
1601  (void)SrcRB;
1602 
1603  // Emit the same code as a vector extract.
1604  // Offset must be a multiple of 64.
1605  unsigned Offset = I.getOperand(2).getImm();
1606  if (Offset % 64 != 0)
1607  return false;
1608  unsigned LaneIdx = Offset / 64;
1609  MachineIRBuilder MIB(I);
1610  MachineInstr *Extract = emitExtractVectorElt(
1611  DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1612  if (!Extract)
1613  return false;
1614  I.eraseFromParent();
1615  return true;
1616  }
1617 
1618  I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
1620  Ty.getSizeInBits() - 1);
1621 
1622  if (SrcSize < 64) {
1623  assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1624  "unexpected G_EXTRACT types");
1625  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1626  }
1627 
1628  DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1629  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1630  MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1631  .addReg(DstReg, 0, AArch64::sub_32);
1633  AArch64::GPR32RegClass, MRI);
1634  I.getOperand(0).setReg(DstReg);
1635 
1636  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1637  }
1638 
1639  case TargetOpcode::G_INSERT: {
1640  LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
1641  LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1642  unsigned DstSize = DstTy.getSizeInBits();
1643  // Larger inserts are vectors, same-size ones should be something else by
1644  // now (split up or turned into COPYs).
1645  if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1646  return false;
1647 
1648  I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
1649  unsigned LSB = I.getOperand(3).getImm();
1650  unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1651  I.getOperand(3).setImm((DstSize - LSB) % DstSize);
1652  MachineInstrBuilder(MF, I).addImm(Width - 1);
1653 
1654  if (DstSize < 64) {
1655  assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1656  "unexpected G_INSERT types");
1657  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1658  }
1659 
1660  Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1661  BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1662  TII.get(AArch64::SUBREG_TO_REG))
1663  .addDef(SrcReg)
1664  .addImm(0)
1665  .addUse(I.getOperand(2).getReg())
1666  .addImm(AArch64::sub_32);
1668  AArch64::GPR32RegClass, MRI);
1669  I.getOperand(2).setReg(SrcReg);
1670 
1671  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1672  }
1673  case TargetOpcode::G_FRAME_INDEX: {
1674  // allocas and G_FRAME_INDEX are only supported in addrspace(0).
1675  if (Ty != LLT::pointer(0, 64)) {
1676  LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1677  << ", expected: " << LLT::pointer(0, 64) << '\n');
1678  return false;
1679  }
1680  I.setDesc(TII.get(AArch64::ADDXri));
1681 
1682  // MOs for a #0 shifted immediate.
1683  I.addOperand(MachineOperand::CreateImm(0));
1684  I.addOperand(MachineOperand::CreateImm(0));
1685 
1686  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1687  }
1688 
1689  case TargetOpcode::G_GLOBAL_VALUE: {
1690  auto GV = I.getOperand(1).getGlobal();
1691  if (GV->isThreadLocal())
1692  return selectTLSGlobalValue(I, MRI);
1693 
1694  unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
1695  if (OpFlags & AArch64II::MO_GOT) {
1696  I.setDesc(TII.get(AArch64::LOADgot));
1697  I.getOperand(1).setTargetFlags(OpFlags);
1698  } else if (TM.getCodeModel() == CodeModel::Large) {
1699  // Materialize the global using movz/movk instructions.
1700  materializeLargeCMVal(I, GV, OpFlags);
1701  I.eraseFromParent();
1702  return true;
1703  } else if (TM.getCodeModel() == CodeModel::Tiny) {
1704  I.setDesc(TII.get(AArch64::ADR));
1705  I.getOperand(1).setTargetFlags(OpFlags);
1706  } else {
1707  I.setDesc(TII.get(AArch64::MOVaddr));
1709  MachineInstrBuilder MIB(MF, I);
1710  MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1712  }
1713  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1714  }
1715 
1716  case TargetOpcode::G_ZEXTLOAD:
1717  case TargetOpcode::G_LOAD:
1718  case TargetOpcode::G_STORE: {
1719  bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1720  MachineIRBuilder MIB(I);
1721 
1722  LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1723 
1724  if (PtrTy != LLT::pointer(0, 64)) {
1725  LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1726  << ", expected: " << LLT::pointer(0, 64) << '\n');
1727  return false;
1728  }
1729 
1730  auto &MemOp = **I.memoperands_begin();
1731  if (MemOp.isAtomic()) {
1732  // For now we just support s8 acquire loads to be able to compile stack
1733  // protector code.
1734  if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1735  MemOp.getSize() == 1) {
1736  I.setDesc(TII.get(AArch64::LDARB));
1737  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1738  }
1739  LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
1740  return false;
1741  }
1742  unsigned MemSizeInBits = MemOp.getSize() * 8;
1743 
1744  const Register PtrReg = I.getOperand(1).getReg();
1745 #ifndef NDEBUG
1746  const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
1747  // Sanity-check the pointer register.
1748  assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1749  "Load/Store pointer operand isn't a GPR");
1750  assert(MRI.getType(PtrReg).isPointer() &&
1751  "Load/Store pointer operand isn't a pointer");
1752 #endif
1753 
1754  const Register ValReg = I.getOperand(0).getReg();
1755  const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1756 
1757  const unsigned NewOpc =
1758  selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
1759  if (NewOpc == I.getOpcode())
1760  return false;
1761 
1762  I.setDesc(TII.get(NewOpc));
1763 
1764  uint64_t Offset = 0;
1765  auto *PtrMI = MRI.getVRegDef(PtrReg);
1766 
1767  // Try to fold a GEP into our unsigned immediate addressing mode.
1768  if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1769  if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1770  int64_t Imm = *COff;
1771  const unsigned Size = MemSizeInBits / 8;
1772  const unsigned Scale = Log2_32(Size);
1773  if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1774  Register Ptr2Reg = PtrMI->getOperand(1).getReg();
1775  I.getOperand(1).setReg(Ptr2Reg);
1776  PtrMI = MRI.getVRegDef(Ptr2Reg);
1777  Offset = Imm / Size;
1778  }
1779  }
1780  }
1781 
1782  // If we haven't folded anything into our addressing mode yet, try to fold
1783  // a frame index into the base+offset.
1784  if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1785  I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1786 
1787  I.addOperand(MachineOperand::CreateImm(Offset));
1788 
1789  // If we're storing a 0, use WZR/XZR.
1790  if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1791  if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1792  if (I.getOpcode() == AArch64::STRWui)
1793  I.getOperand(0).setReg(AArch64::WZR);
1794  else if (I.getOpcode() == AArch64::STRXui)
1795  I.getOperand(0).setReg(AArch64::XZR);
1796  }
1797  }
1798 
1799  if (IsZExtLoad) {
1800  // The zextload from a smaller type to i32 should be handled by the importer.
1801  if (MRI.getType(ValReg).getSizeInBits() != 64)
1802  return false;
1803  // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1804  //and zero_extend with SUBREG_TO_REG.
1805  Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1806  Register DstReg = I.getOperand(0).getReg();
1807  I.getOperand(0).setReg(LdReg);
1808 
1809  MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1810  MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1811  .addImm(0)
1812  .addUse(LdReg)
1813  .addImm(AArch64::sub_32);
1815  return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1816  MRI);
1817  }
1818  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1819  }
1820 
1821  case TargetOpcode::G_SMULH:
1822  case TargetOpcode::G_UMULH: {
1823  // Reject the various things we don't support yet.
1824  if (unsupportedBinOp(I, RBI, MRI, TRI))
1825  return false;
1826 
1827  const Register DefReg = I.getOperand(0).getReg();
1828  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1829 
1830  if (RB.getID() != AArch64::GPRRegBankID) {
1831  LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
1832  return false;
1833  }
1834 
1835  if (Ty != LLT::scalar(64)) {
1836  LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1837  << ", expected: " << LLT::scalar(64) << '\n');
1838  return false;
1839  }
1840 
1841  unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1842  : AArch64::UMULHrr;
1843  I.setDesc(TII.get(NewOpc));
1844 
1845  // Now that we selected an opcode, we need to constrain the register
1846  // operands to use appropriate classes.
1847  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1848  }
1849  case TargetOpcode::G_FADD:
1850  case TargetOpcode::G_FSUB:
1851  case TargetOpcode::G_FMUL:
1852  case TargetOpcode::G_FDIV:
1853 
1854  case TargetOpcode::G_ASHR:
1855  if (MRI.getType(I.getOperand(0).getReg()).isVector())
1856  return selectVectorASHR(I, MRI);
1858  case TargetOpcode::G_SHL:
1859  if (Opcode == TargetOpcode::G_SHL &&
1860  MRI.getType(I.getOperand(0).getReg()).isVector())
1861  return selectVectorSHL(I, MRI);
1863  case TargetOpcode::G_OR:
1864  case TargetOpcode::G_LSHR: {
1865  // Reject the various things we don't support yet.
1866  if (unsupportedBinOp(I, RBI, MRI, TRI))
1867  return false;
1868 
1869  const unsigned OpSize = Ty.getSizeInBits();
1870 
1871  const Register DefReg = I.getOperand(0).getReg();
1872  const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1873 
1874  const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1875  if (NewOpc == I.getOpcode())
1876  return false;
1877 
1878  I.setDesc(TII.get(NewOpc));
1879  // FIXME: Should the type be always reset in setDesc?
1880 
1881  // Now that we selected an opcode, we need to constrain the register
1882  // operands to use appropriate classes.
1883  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1884  }
1885 
1886  case TargetOpcode::G_GEP: {
1887  MachineIRBuilder MIRBuilder(I);
1888  emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1889  MIRBuilder);
1890  I.eraseFromParent();
1891  return true;
1892  }
1893  case TargetOpcode::G_UADDO: {
1894  // TODO: Support other types.
1895  unsigned OpSize = Ty.getSizeInBits();
1896  if (OpSize != 32 && OpSize != 64) {
1897  LLVM_DEBUG(
1898  dbgs()
1899  << "G_UADDO currently only supported for 32 and 64 b types.\n");
1900  return false;
1901  }
1902 
1903  // TODO: Support vectors.
1904  if (Ty.isVector()) {
1905  LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1906  return false;
1907  }
1908 
1909  // Add and set the set condition flag.
1910  unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1911  MachineIRBuilder MIRBuilder(I);
1912  auto AddsMI = MIRBuilder.buildInstr(
1913  AddsOpc, {I.getOperand(0).getReg()},
1914  {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1915  constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1916 
1917  // Now, put the overflow result in the register given by the first operand
1918  // to the G_UADDO. CSINC increments the result when the predicate is false,
1919  // so to get the increment when it's true, we need to use the inverse. In
1920  // this case, we want to increment when carry is set.
1921  auto CsetMI = MIRBuilder
1922  .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1923  {Register(AArch64::WZR), Register(AArch64::WZR)})
1925  constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1926  I.eraseFromParent();
1927  return true;
1928  }
1929 
1930  case TargetOpcode::G_PTR_MASK: {
1931  uint64_t Align = I.getOperand(2).getImm();
1932  if (Align >= 64 || Align == 0)
1933  return false;
1934 
1935  uint64_t Mask = ~((1ULL << Align) - 1);
1936  I.setDesc(TII.get(AArch64::ANDXri));
1938 
1939  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1940  }
1941  case TargetOpcode::G_PTRTOINT:
1942  case TargetOpcode::G_TRUNC: {
1943  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1944  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1945 
1946  const Register DstReg = I.getOperand(0).getReg();
1947  const Register SrcReg = I.getOperand(1).getReg();
1948 
1949  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1950  const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1951 
1952  if (DstRB.getID() != SrcRB.getID()) {
1953  LLVM_DEBUG(
1954  dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
1955  return false;
1956  }
1957 
1958  if (DstRB.getID() == AArch64::GPRRegBankID) {
1959  const TargetRegisterClass *DstRC =
1960  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1961  if (!DstRC)
1962  return false;
1963 
1964  const TargetRegisterClass *SrcRC =
1965  getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1966  if (!SrcRC)
1967  return false;
1968 
1969  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1970  !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1971  LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
1972  return false;
1973  }
1974 
1975  if (DstRC == SrcRC) {
1976  // Nothing to be done
1977  } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1978  SrcTy == LLT::scalar(64)) {
1979  llvm_unreachable("TableGen can import this case");
1980  return false;
1981  } else if (DstRC == &AArch64::GPR32RegClass &&
1982  SrcRC == &AArch64::GPR64RegClass) {
1983  I.getOperand(1).setSubReg(AArch64::sub_32);
1984  } else {
1985  LLVM_DEBUG(
1986  dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
1987  return false;
1988  }
1989 
1990  I.setDesc(TII.get(TargetOpcode::COPY));
1991  return true;
1992  } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1993  if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1994  I.setDesc(TII.get(AArch64::XTNv4i16));
1996  return true;
1997  }
1998 
1999  if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2000  MachineIRBuilder MIB(I);
2001  MachineInstr *Extract = emitExtractVectorElt(
2002  DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2003  if (!Extract)
2004  return false;
2005  I.eraseFromParent();
2006  return true;
2007  }
2008  }
2009 
2010  return false;
2011  }
2012 
2013  case TargetOpcode::G_ANYEXT: {
2014  const Register DstReg = I.getOperand(0).getReg();
2015  const Register SrcReg = I.getOperand(1).getReg();
2016 
2017  const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2018  if (RBDst.getID() != AArch64::GPRRegBankID) {
2019  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2020  << ", expected: GPR\n");
2021  return false;
2022  }
2023 
2024  const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2025  if (RBSrc.getID() != AArch64::GPRRegBankID) {
2026  LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2027  << ", expected: GPR\n");
2028  return false;
2029  }
2030 
2031  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2032 
2033  if (DstSize == 0) {
2034  LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2035  return false;
2036  }
2037 
2038  if (DstSize != 64 && DstSize > 32) {
2039  LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2040  << ", expected: 32 or 64\n");
2041  return false;
2042  }
2043  // At this point G_ANYEXT is just like a plain COPY, but we need
2044  // to explicitly form the 64-bit value if any.
2045  if (DstSize > 32) {
2046  Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2047  BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2048  .addDef(ExtSrc)
2049  .addImm(0)
2050  .addUse(SrcReg)
2051  .addImm(AArch64::sub_32);
2052  I.getOperand(1).setReg(ExtSrc);
2053  }
2054  return selectCopy(I, TII, MRI, TRI, RBI);
2055  }
2056 
2057  case TargetOpcode::G_ZEXT:
2058  case TargetOpcode::G_SEXT: {
2059  unsigned Opcode = I.getOpcode();
2060  const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
2061  const Register DefReg = I.getOperand(0).getReg();
2062  const Register SrcReg = I.getOperand(1).getReg();
2063  const LLT DstTy = MRI.getType(DefReg);
2064  const LLT SrcTy = MRI.getType(SrcReg);
2065  unsigned DstSize = DstTy.getSizeInBits();
2066  unsigned SrcSize = SrcTy.getSizeInBits();
2067 
2068  assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2069  AArch64::GPRRegBankID &&
2070  "Unexpected ext regbank");
2071 
2072  MachineIRBuilder MIB(I);
2073  MachineInstr *ExtI;
2074  if (DstTy.isVector())
2075  return false; // Should be handled by imported patterns.
2076 
2077  // First check if we're extending the result of a load which has a dest type
2078  // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2079  // GPR register on AArch64 and all loads which are smaller automatically
2080  // zero-extend the upper bits. E.g.
2081  // %v(s8) = G_LOAD %p, :: (load 1)
2082  // %v2(s32) = G_ZEXT %v(s8)
2083  if (!IsSigned) {
2084  auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2085  if (LoadMI &&
2086  RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2087  const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2088  unsigned BytesLoaded = MemOp->getSize();
2089  if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2090  return selectCopy(I, TII, MRI, TRI, RBI);
2091  }
2092  }
2093 
2094  if (DstSize == 64) {
2095  // FIXME: Can we avoid manually doing this?
2096  if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
2097  LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2098  << " operand\n");
2099  return false;
2100  }
2101 
2102  auto SubregToReg =
2103  MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2104  .addImm(0)
2105  .addUse(SrcReg)
2106  .addImm(AArch64::sub_32);
2107 
2108  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2109  {DefReg}, {SubregToReg})
2110  .addImm(0)
2111  .addImm(SrcSize - 1);
2112  } else if (DstSize <= 32) {
2113  ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2114  {DefReg}, {SrcReg})
2115  .addImm(0)
2116  .addImm(SrcSize - 1);
2117  } else {
2118  return false;
2119  }
2120 
2122  I.eraseFromParent();
2123  return true;
2124  }
2125 
2126  case TargetOpcode::G_SITOFP:
2127  case TargetOpcode::G_UITOFP:
2128  case TargetOpcode::G_FPTOSI:
2129  case TargetOpcode::G_FPTOUI: {
2130  const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2131  SrcTy = MRI.getType(I.getOperand(1).getReg());
2132  const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2133  if (NewOpc == Opcode)
2134  return false;
2135 
2136  I.setDesc(TII.get(NewOpc));
2138 
2139  return true;
2140  }
2141 
2142 
2143  case TargetOpcode::G_INTTOPTR:
2144  // The importer is currently unable to import pointer types since they
2145  // didn't exist in SelectionDAG.
2146  return selectCopy(I, TII, MRI, TRI, RBI);
2147 
2148  case TargetOpcode::G_BITCAST:
2149  // Imported SelectionDAG rules can handle every bitcast except those that
2150  // bitcast from a type to the same type. Ideally, these shouldn't occur
2151  // but we might not run an optimizer that deletes them. The other exception
2152  // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2153  // of them.
2154  return selectCopy(I, TII, MRI, TRI, RBI);
2155 
2156  case TargetOpcode::G_SELECT: {
2157  if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2158  LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2159  << ", expected: " << LLT::scalar(1) << '\n');
2160  return false;
2161  }
2162 
2163  const Register CondReg = I.getOperand(1).getReg();
2164  const Register TReg = I.getOperand(2).getReg();
2165  const Register FReg = I.getOperand(3).getReg();
2166 
2167  if (tryOptSelect(I))
2168  return true;
2169 
2170  Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2171  MachineInstr &TstMI =
2172  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2173  .addDef(AArch64::WZR)
2174  .addUse(CondReg)
2176 
2177  MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2178  .addDef(I.getOperand(0).getReg())
2179  .addUse(TReg)
2180  .addUse(FReg)
2182 
2184  constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2185 
2186  I.eraseFromParent();
2187  return true;
2188  }
2189  case TargetOpcode::G_ICMP: {
2190  if (Ty.isVector())
2191  return selectVectorICmp(I, MRI);
2192 
2193  if (Ty != LLT::scalar(32)) {
2194  LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2195  << ", expected: " << LLT::scalar(32) << '\n');
2196  return false;
2197  }
2198 
2199  MachineIRBuilder MIRBuilder(I);
2200  if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2201  MIRBuilder))
2202  return false;
2203  emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
2204  MIRBuilder);
2205  I.eraseFromParent();
2206  return true;
2207  }
2208 
2209  case TargetOpcode::G_FCMP: {
2210  if (Ty != LLT::scalar(32)) {
2211  LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2212  << ", expected: " << LLT::scalar(32) << '\n');
2213  return false;
2214  }
2215 
2216  unsigned CmpOpc = selectFCMPOpc(I, MRI);
2217  if (!CmpOpc)
2218  return false;
2219 
2220  // FIXME: regbank
2221 
2222  AArch64CC::CondCode CC1, CC2;
2224  (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2225 
2226  // Partially build the compare. Decide if we need to add a use for the
2227  // third operand based off whether or not we're comparing against 0.0.
2228  auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2229  .addUse(I.getOperand(2).getReg());
2230 
2231  // If we don't have an immediate compare, then we need to add a use of the
2232  // register which wasn't used for the immediate.
2233  // Note that the immediate will always be the last operand.
2234  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2235  CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2236 
2237  const Register DefReg = I.getOperand(0).getReg();
2238  Register Def1Reg = DefReg;
2239  if (CC2 != AArch64CC::AL)
2240  Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2241 
2242  MachineInstr &CSetMI =
2243  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2244  .addDef(Def1Reg)
2245  .addUse(AArch64::WZR)
2246  .addUse(AArch64::WZR)
2247  .addImm(getInvertedCondCode(CC1));
2248 
2249  if (CC2 != AArch64CC::AL) {
2250  Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2251  MachineInstr &CSet2MI =
2252  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2253  .addDef(Def2Reg)
2254  .addUse(AArch64::WZR)
2255  .addUse(AArch64::WZR)
2256  .addImm(getInvertedCondCode(CC2));
2257  MachineInstr &OrMI =
2258  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2259  .addDef(DefReg)
2260  .addUse(Def1Reg)
2261  .addUse(Def2Reg);
2263  constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2264  }
2265  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2266  constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2267 
2268  I.eraseFromParent();
2269  return true;
2270  }
2271  case TargetOpcode::G_VASTART:
2272  return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2273  : selectVaStartAAPCS(I, MF, MRI);
2274  case TargetOpcode::G_INTRINSIC:
2275  return selectIntrinsic(I, MRI);
2276  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2277  return selectIntrinsicWithSideEffects(I, MRI);
2278  case TargetOpcode::G_IMPLICIT_DEF: {
2279  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2280  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2281  const Register DstReg = I.getOperand(0).getReg();
2282  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2283  const TargetRegisterClass *DstRC =
2284  getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2285  RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2286  return true;
2287  }
2288  case TargetOpcode::G_BLOCK_ADDR: {
2289  if (TM.getCodeModel() == CodeModel::Large) {
2290  materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2291  I.eraseFromParent();
2292  return true;
2293  } else {
2294  I.setDesc(TII.get(AArch64::MOVaddrBA));
2295  auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2296  I.getOperand(0).getReg())
2297  .addBlockAddress(I.getOperand(1).getBlockAddress(),
2298  /* Offset */ 0, AArch64II::MO_PAGE)
2299  .addBlockAddress(
2300  I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2302  I.eraseFromParent();
2303  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2304  }
2305  }
2306  case TargetOpcode::G_INTRINSIC_TRUNC:
2307  return selectIntrinsicTrunc(I, MRI);
2308  case TargetOpcode::G_INTRINSIC_ROUND:
2309  return selectIntrinsicRound(I, MRI);
2310  case TargetOpcode::G_BUILD_VECTOR:
2311  return selectBuildVector(I, MRI);
2312  case TargetOpcode::G_MERGE_VALUES:
2313  return selectMergeValues(I, MRI);
2314  case TargetOpcode::G_UNMERGE_VALUES:
2315  return selectUnmergeValues(I, MRI);
2316  case TargetOpcode::G_SHUFFLE_VECTOR:
2317  return selectShuffleVector(I, MRI);
2318  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2319  return selectExtractElt(I, MRI);
2320  case TargetOpcode::G_INSERT_VECTOR_ELT:
2321  return selectInsertElt(I, MRI);
2322  case TargetOpcode::G_CONCAT_VECTORS:
2323  return selectConcatVectors(I, MRI);
2324  case TargetOpcode::G_JUMP_TABLE:
2325  return selectJumpTable(I, MRI);
2326  }
2327 
2328  return false;
2329 }
2330 
2331 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2332  MachineRegisterInfo &MRI) const {
2333  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2334  Register JTAddr = I.getOperand(0).getReg();
2335  unsigned JTI = I.getOperand(1).getIndex();
2336  Register Index = I.getOperand(2).getReg();
2337  MachineIRBuilder MIB(I);
2338 
2339  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2340  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2341  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2342  {JTAddr, Index})
2343  .addJumpTableIndex(JTI);
2344 
2345  // Build the indirect branch.
2346  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2347  I.eraseFromParent();
2348  return true;
2349 }
2350 
2351 bool AArch64InstructionSelector::selectJumpTable(
2352  MachineInstr &I, MachineRegisterInfo &MRI) const {
2353  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2354  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2355 
2356  Register DstReg = I.getOperand(0).getReg();
2357  unsigned JTI = I.getOperand(1).getIndex();
2358  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2359  MachineIRBuilder MIB(I);
2360  auto MovMI =
2361  MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2362  .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2364  I.eraseFromParent();
2365  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2366 }
2367 
2368 bool AArch64InstructionSelector::selectTLSGlobalValue(
2369  MachineInstr &I, MachineRegisterInfo &MRI) const {
2370  if (!STI.isTargetMachO())
2371  return false;
2372  MachineFunction &MF = *I.getParent()->getParent();
2373  MF.getFrameInfo().setAdjustsStack(true);
2374 
2375  const GlobalValue &GV = *I.getOperand(1).getGlobal();
2376  MachineIRBuilder MIB(I);
2377 
2378  MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2379  .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2380 
2381  auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2382  {Register(AArch64::X0)})
2383  .addImm(0);
2384 
2385  // TLS calls preserve all registers except those that absolutely must be
2386  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2387  // silly).
2388  MIB.buildInstr(AArch64::BLR, {}, {Load})
2389  .addDef(AArch64::X0, RegState::Implicit)
2390  .addRegMask(TRI.getTLSCallPreservedMask());
2391 
2392  MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2393  RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2394  MRI);
2395  I.eraseFromParent();
2396  return true;
2397 }
2398 
2399 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2400  MachineInstr &I, MachineRegisterInfo &MRI) const {
2401  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2402 
2403  // Select the correct opcode.
2404  unsigned Opc = 0;
2405  if (!SrcTy.isVector()) {
2406  switch (SrcTy.getSizeInBits()) {
2407  default:
2408  case 16:
2409  Opc = AArch64::FRINTZHr;
2410  break;
2411  case 32:
2412  Opc = AArch64::FRINTZSr;
2413  break;
2414  case 64:
2415  Opc = AArch64::FRINTZDr;
2416  break;
2417  }
2418  } else {
2419  unsigned NumElts = SrcTy.getNumElements();
2420  switch (SrcTy.getElementType().getSizeInBits()) {
2421  default:
2422  break;
2423  case 16:
2424  if (NumElts == 4)
2425  Opc = AArch64::FRINTZv4f16;
2426  else if (NumElts == 8)
2427  Opc = AArch64::FRINTZv8f16;
2428  break;
2429  case 32:
2430  if (NumElts == 2)
2431  Opc = AArch64::FRINTZv2f32;
2432  else if (NumElts == 4)
2433  Opc = AArch64::FRINTZv4f32;
2434  break;
2435  case 64:
2436  if (NumElts == 2)
2437  Opc = AArch64::FRINTZv2f64;
2438  break;
2439  }
2440  }
2441 
2442  if (!Opc) {
2443  // Didn't get an opcode above, bail.
2444  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2445  return false;
2446  }
2447 
2448  // Legalization would have set us up perfectly for this; we just need to
2449  // set the opcode and move on.
2450  I.setDesc(TII.get(Opc));
2451  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2452 }
2453 
2454 bool AArch64InstructionSelector::selectIntrinsicRound(
2455  MachineInstr &I, MachineRegisterInfo &MRI) const {
2456  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2457 
2458  // Select the correct opcode.
2459  unsigned Opc = 0;
2460  if (!SrcTy.isVector()) {
2461  switch (SrcTy.getSizeInBits()) {
2462  default:
2463  case 16:
2464  Opc = AArch64::FRINTAHr;
2465  break;
2466  case 32:
2467  Opc = AArch64::FRINTASr;
2468  break;
2469  case 64:
2470  Opc = AArch64::FRINTADr;
2471  break;
2472  }
2473  } else {
2474  unsigned NumElts = SrcTy.getNumElements();
2475  switch (SrcTy.getElementType().getSizeInBits()) {
2476  default:
2477  break;
2478  case 16:
2479  if (NumElts == 4)
2480  Opc = AArch64::FRINTAv4f16;
2481  else if (NumElts == 8)
2482  Opc = AArch64::FRINTAv8f16;
2483  break;
2484  case 32:
2485  if (NumElts == 2)
2486  Opc = AArch64::FRINTAv2f32;
2487  else if (NumElts == 4)
2488  Opc = AArch64::FRINTAv4f32;
2489  break;
2490  case 64:
2491  if (NumElts == 2)
2492  Opc = AArch64::FRINTAv2f64;
2493  break;
2494  }
2495  }
2496 
2497  if (!Opc) {
2498  // Didn't get an opcode above, bail.
2499  LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2500  return false;
2501  }
2502 
2503  // Legalization would have set us up perfectly for this; we just need to
2504  // set the opcode and move on.
2505  I.setDesc(TII.get(Opc));
2506  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2507 }
2508 
2509 bool AArch64InstructionSelector::selectVectorICmp(
2510  MachineInstr &I, MachineRegisterInfo &MRI) const {
2511  Register DstReg = I.getOperand(0).getReg();
2512  LLT DstTy = MRI.getType(DstReg);
2513  Register SrcReg = I.getOperand(2).getReg();
2514  Register Src2Reg = I.getOperand(3).getReg();
2515  LLT SrcTy = MRI.getType(SrcReg);
2516 
2517  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2518  unsigned NumElts = DstTy.getNumElements();
2519 
2520  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2521  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2522  // Third index is cc opcode:
2523  // 0 == eq
2524  // 1 == ugt
2525  // 2 == uge
2526  // 3 == ult
2527  // 4 == ule
2528  // 5 == sgt
2529  // 6 == sge
2530  // 7 == slt
2531  // 8 == sle
2532  // ne is done by negating 'eq' result.
2533 
2534  // This table below assumes that for some comparisons the operands will be
2535  // commuted.
2536  // ult op == commute + ugt op
2537  // ule op == commute + uge op
2538  // slt op == commute + sgt op
2539  // sle op == commute + sge op
2540  unsigned PredIdx = 0;
2541  bool SwapOperands = false;
2543  switch (Pred) {
2544  case CmpInst::ICMP_NE:
2545  case CmpInst::ICMP_EQ:
2546  PredIdx = 0;
2547  break;
2548  case CmpInst::ICMP_UGT:
2549  PredIdx = 1;
2550  break;
2551  case CmpInst::ICMP_UGE:
2552  PredIdx = 2;
2553  break;
2554  case CmpInst::ICMP_ULT:
2555  PredIdx = 3;
2556  SwapOperands = true;
2557  break;
2558  case CmpInst::ICMP_ULE:
2559  PredIdx = 4;
2560  SwapOperands = true;
2561  break;
2562  case CmpInst::ICMP_SGT:
2563  PredIdx = 5;
2564  break;
2565  case CmpInst::ICMP_SGE:
2566  PredIdx = 6;
2567  break;
2568  case CmpInst::ICMP_SLT:
2569  PredIdx = 7;
2570  SwapOperands = true;
2571  break;
2572  case CmpInst::ICMP_SLE:
2573  PredIdx = 8;
2574  SwapOperands = true;
2575  break;
2576  default:
2577  llvm_unreachable("Unhandled icmp predicate");
2578  return false;
2579  }
2580 
2581  // This table obviously should be tablegen'd when we have our GISel native
2582  // tablegen selector.
2583 
2584  static const unsigned OpcTable[4][4][9] = {
2585  {
2586  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2587  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2588  0 /* invalid */},
2589  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2590  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2591  0 /* invalid */},
2592  {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2593  AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2594  AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2595  {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2596  AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2597  AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2598  },
2599  {
2600  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2601  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2602  0 /* invalid */},
2603  {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2604  AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2605  AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2606  {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2607  AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2608  AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2609  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2610  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2611  0 /* invalid */}
2612  },
2613  {
2614  {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2615  AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2616  AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2617  {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2618  AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2619  AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2620  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2621  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2622  0 /* invalid */},
2623  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2624  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2625  0 /* invalid */}
2626  },
2627  {
2628  {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2629  AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2630  AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2631  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2632  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2633  0 /* invalid */},
2634  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2635  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2636  0 /* invalid */},
2637  {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2638  0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2639  0 /* invalid */}
2640  },
2641  };
2642  unsigned EltIdx = Log2_32(SrcEltSize / 8);
2643  unsigned NumEltsIdx = Log2_32(NumElts / 2);
2644  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2645  if (!Opc) {
2646  LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2647  return false;
2648  }
2649 
2650  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2651  const TargetRegisterClass *SrcRC =
2652  getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2653  if (!SrcRC) {
2654  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2655  return false;
2656  }
2657 
2658  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2659  if (SrcTy.getSizeInBits() == 128)
2660  NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2661 
2662  if (SwapOperands)
2663  std::swap(SrcReg, Src2Reg);
2664 
2665  MachineIRBuilder MIB(I);
2666  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2668 
2669  // Invert if we had a 'ne' cc.
2670  if (NotOpc) {
2671  Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2673  } else {
2674  MIB.buildCopy(DstReg, Cmp.getReg(0));
2675  }
2676  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2677  I.eraseFromParent();
2678  return true;
2679 }
2680 
2681 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
2682  unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
2683  MachineIRBuilder &MIRBuilder) const {
2684  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
2685 
2686  auto BuildFn = [&](unsigned SubregIndex) {
2687  auto Ins =
2688  MIRBuilder
2689  .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2690  .addImm(SubregIndex);
2693  return &*Ins;
2694  };
2695 
2696  switch (EltSize) {
2697  case 16:
2698  return BuildFn(AArch64::hsub);
2699  case 32:
2700  return BuildFn(AArch64::ssub);
2701  case 64:
2702  return BuildFn(AArch64::dsub);
2703  default:
2704  return nullptr;
2705  }
2706 }
2707 
2709  MachineInstr &I, MachineRegisterInfo &MRI) const {
2710  assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2711  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2712  const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2713  assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2714  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2715 
2716  if (I.getNumOperands() != 3)
2717  return false;
2718 
2719  // Merging 2 s64s into an s128.
2720  if (DstTy == LLT::scalar(128)) {
2721  if (SrcTy.getSizeInBits() != 64)
2722  return false;
2723  MachineIRBuilder MIB(I);
2724  Register DstReg = I.getOperand(0).getReg();
2725  Register Src1Reg = I.getOperand(1).getReg();
2726  Register Src2Reg = I.getOperand(2).getReg();
2727  auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2728  MachineInstr *InsMI =
2729  emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2730  if (!InsMI)
2731  return false;
2732  MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2733  Src2Reg, /* LaneIdx */ 1, RB, MIB);
2734  if (!Ins2MI)
2735  return false;
2736  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2737  constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2738  I.eraseFromParent();
2739  return true;
2740  }
2741 
2742  if (RB.getID() != AArch64::GPRRegBankID)
2743  return false;
2744 
2745  if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2746  return false;
2747 
2748  auto *DstRC = &AArch64::GPR64RegClass;
2749  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
2750  MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2751  TII.get(TargetOpcode::SUBREG_TO_REG))
2752  .addDef(SubToRegDef)
2753  .addImm(0)
2754  .addUse(I.getOperand(1).getReg())
2755  .addImm(AArch64::sub_32);
2756  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2757  // Need to anyext the second scalar before we can use bfm
2758  MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2759  TII.get(TargetOpcode::SUBREG_TO_REG))
2760  .addDef(SubToRegDef2)
2761  .addImm(0)
2762  .addUse(I.getOperand(2).getReg())
2763  .addImm(AArch64::sub_32);
2764  MachineInstr &BFM =
2765  *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
2766  .addDef(I.getOperand(0).getReg())
2767  .addUse(SubToRegDef)
2768  .addUse(SubToRegDef2)
2769  .addImm(32)
2770  .addImm(31);
2771  constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2772  constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2774  I.eraseFromParent();
2775  return true;
2776 }
2777 
2778 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2779  const unsigned EltSize) {
2780  // Choose a lane copy opcode and subregister based off of the size of the
2781  // vector's elements.
2782  switch (EltSize) {
2783  case 16:
2784  CopyOpc = AArch64::CPYi16;
2785  ExtractSubReg = AArch64::hsub;
2786  break;
2787  case 32:
2788  CopyOpc = AArch64::CPYi32;
2789  ExtractSubReg = AArch64::ssub;
2790  break;
2791  case 64:
2792  CopyOpc = AArch64::CPYi64;
2793  ExtractSubReg = AArch64::dsub;
2794  break;
2795  default:
2796  // Unknown size, bail out.
2797  LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2798  return false;
2799  }
2800  return true;
2801 }
2802 
2803 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2804  Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2805  Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2806  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2807  unsigned CopyOpc = 0;
2808  unsigned ExtractSubReg = 0;
2809  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2810  LLVM_DEBUG(
2811  dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2812  return nullptr;
2813  }
2814 
2815  const TargetRegisterClass *DstRC =
2816  getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2817  if (!DstRC) {
2818  LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2819  return nullptr;
2820  }
2821 
2822  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2823  const LLT &VecTy = MRI.getType(VecReg);
2824  const TargetRegisterClass *VecRC =
2825  getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2826  if (!VecRC) {
2827  LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2828  return nullptr;
2829  }
2830 
2831  // The register that we're going to copy into.
2832  Register InsertReg = VecReg;
2833  if (!DstReg)
2834  DstReg = MRI.createVirtualRegister(DstRC);
2835  // If the lane index is 0, we just use a subregister COPY.
2836  if (LaneIdx == 0) {
2837  auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2838  .addReg(VecReg, 0, ExtractSubReg);
2839  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2840  return &*Copy;
2841  }
2842 
2843  // Lane copies require 128-bit wide registers. If we're dealing with an
2844  // unpacked vector, then we need to move up to that width. Insert an implicit
2845  // def and a subregister insert to get us there.
2846  if (VecTy.getSizeInBits() != 128) {
2847  MachineInstr *ScalarToVector = emitScalarToVector(
2848  VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2849  if (!ScalarToVector)
2850  return nullptr;
2851  InsertReg = ScalarToVector->getOperand(0).getReg();
2852  }
2853 
2854  MachineInstr *LaneCopyMI =
2855  MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2856  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2857 
2858  // Make sure that we actually constrain the initial copy.
2859  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2860  return LaneCopyMI;
2861 }
2862 
2863 bool AArch64InstructionSelector::selectExtractElt(
2864  MachineInstr &I, MachineRegisterInfo &MRI) const {
2865  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2866  "unexpected opcode!");
2867  Register DstReg = I.getOperand(0).getReg();
2868  const LLT NarrowTy = MRI.getType(DstReg);
2869  const Register SrcReg = I.getOperand(1).getReg();
2870  const LLT WideTy = MRI.getType(SrcReg);
2871  (void)WideTy;
2872  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2873  "source register size too small!");
2874  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2875 
2876  // Need the lane index to determine the correct copy opcode.
2877  MachineOperand &LaneIdxOp = I.getOperand(2);
2878  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2879 
2880  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2881  LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2882  return false;
2883  }
2884 
2885  // Find the index to extract from.
2886  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2887  if (!VRegAndVal)
2888  return false;
2889  unsigned LaneIdx = VRegAndVal->Value;
2890 
2891  MachineIRBuilder MIRBuilder(I);
2892 
2893  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2894  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2895  LaneIdx, MIRBuilder);
2896  if (!Extract)
2897  return false;
2898 
2899  I.eraseFromParent();
2900  return true;
2901 }
2902 
2903 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2904  MachineInstr &I, MachineRegisterInfo &MRI) const {
2905  unsigned NumElts = I.getNumOperands() - 1;
2906  Register SrcReg = I.getOperand(NumElts).getReg();
2907  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2908  const LLT SrcTy = MRI.getType(SrcReg);
2909 
2910  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2911  if (SrcTy.getSizeInBits() > 128) {
2912  LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2913  return false;
2914  }
2915 
2916  MachineIRBuilder MIB(I);
2917 
2918  // We implement a split vector operation by treating the sub-vectors as
2919  // scalars and extracting them.
2920  const RegisterBank &DstRB =
2921  *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2922  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2923  Register Dst = I.getOperand(OpIdx).getReg();
2924  MachineInstr *Extract =
2925  emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2926  if (!Extract)
2927  return false;
2928  }
2929  I.eraseFromParent();
2930  return true;
2931 }
2932 
2934  MachineInstr &I, MachineRegisterInfo &MRI) const {
2935  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2936  "unexpected opcode");
2937 
2938  // TODO: Handle unmerging into GPRs and from scalars to scalars.
2939  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2940  AArch64::FPRRegBankID ||
2941  RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2942  AArch64::FPRRegBankID) {
2943  LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2944  "currently unsupported.\n");
2945  return false;
2946  }
2947 
2948  // The last operand is the vector source register, and every other operand is
2949  // a register to unpack into.
2950  unsigned NumElts = I.getNumOperands() - 1;
2951  Register SrcReg = I.getOperand(NumElts).getReg();
2952  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2953  const LLT WideTy = MRI.getType(SrcReg);
2954  (void)WideTy;
2955  assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2956  "can only unmerge from vector or s128 types!");
2957  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2958  "source register size too small!");
2959 
2960  if (!NarrowTy.isScalar())
2961  return selectSplitVectorUnmerge(I, MRI);
2962 
2963  MachineIRBuilder MIB(I);
2964 
2965  // Choose a lane copy opcode and subregister based off of the size of the
2966  // vector's elements.
2967  unsigned CopyOpc = 0;
2968  unsigned ExtractSubReg = 0;
2969  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
2970  return false;
2971 
2972  // Set up for the lane copies.
2973  MachineBasicBlock &MBB = *I.getParent();
2974 
2975  // Stores the registers we'll be copying from.
2976  SmallVector<Register, 4> InsertRegs;
2977 
2978  // We'll use the first register twice, so we only need NumElts-1 registers.
2979  unsigned NumInsertRegs = NumElts - 1;
2980 
2981  // If our elements fit into exactly 128 bits, then we can copy from the source
2982  // directly. Otherwise, we need to do a bit of setup with some subregister
2983  // inserts.
2984  if (NarrowTy.getSizeInBits() * NumElts == 128) {
2985  InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
2986  } else {
2987  // No. We have to perform subregister inserts. For each insert, create an
2988  // implicit def and a subregister insert, and save the register we create.
2989  for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2990  Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2991  MachineInstr &ImpDefMI =
2992  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2993  ImpDefReg);
2994 
2995  // Now, create the subregister insert from SrcReg.
2996  Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2997  MachineInstr &InsMI =
2998  *BuildMI(MBB, I, I.getDebugLoc(),
2999  TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3000  .addUse(ImpDefReg)
3001  .addUse(SrcReg)
3002  .addImm(AArch64::dsub);
3003 
3004  constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3006 
3007  // Save the register so that we can copy from it after.
3008  InsertRegs.push_back(InsertReg);
3009  }
3010  }
3011 
3012  // Now that we've created any necessary subregister inserts, we can
3013  // create the copies.
3014  //
3015  // Perform the first copy separately as a subregister copy.
3016  Register CopyTo = I.getOperand(0).getReg();
3017  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3018  .addReg(InsertRegs[0], 0, ExtractSubReg);
3019  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3020 
3021  // Now, perform the remaining copies as vector lane copies.
3022  unsigned LaneIdx = 1;
3023  for (Register InsReg : InsertRegs) {
3024  Register CopyTo = I.getOperand(LaneIdx).getReg();
3025  MachineInstr &CopyInst =
3026  *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3027  .addUse(InsReg)
3028  .addImm(LaneIdx);
3029  constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3030  ++LaneIdx;
3031  }
3032 
3033  // Separately constrain the first copy's destination. Because of the
3034  // limitation in constrainOperandRegClass, we can't guarantee that this will
3035  // actually be constrained. So, do it ourselves using the second operand.
3036  const TargetRegisterClass *RC =
3037  MRI.getRegClassOrNull(I.getOperand(1).getReg());
3038  if (!RC) {
3039  LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3040  return false;
3041  }
3042 
3043  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3044  I.eraseFromParent();
3045  return true;
3046 }
3047 
3048 bool AArch64InstructionSelector::selectConcatVectors(
3049  MachineInstr &I, MachineRegisterInfo &MRI) const {
3050  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3051  "Unexpected opcode");
3052  Register Dst = I.getOperand(0).getReg();
3053  Register Op1 = I.getOperand(1).getReg();
3054  Register Op2 = I.getOperand(2).getReg();
3055  MachineIRBuilder MIRBuilder(I);
3056  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3057  if (!ConcatMI)
3058  return false;
3059  I.eraseFromParent();
3060  return true;
3061 }
3062 
3063 unsigned
3064 AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3065  MachineFunction &MF) const {
3066  Type *CPTy = CPVal->getType();
3067  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3068  if (Align == 0)
3069  Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3070 
3072  return MCP->getConstantPoolIndex(CPVal, Align);
3073 }
3074 
3075 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3076  Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3077  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3078 
3079  auto Adrp =
3080  MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3081  .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3082 
3083  MachineInstr *LoadMI = nullptr;
3084  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3085  case 16:
3086  LoadMI =
3087  &*MIRBuilder
3088  .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3089  .addConstantPoolIndex(CPIdx, 0,
3091  break;
3092  case 8:
3093  LoadMI = &*MIRBuilder
3094  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3095  .addConstantPoolIndex(
3097  break;
3098  default:
3099  LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3100  << *CPVal->getType());
3101  return nullptr;
3102  }
3104  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3105  return LoadMI;
3106 }
3107 
3108 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3109 /// size and RB.
3110 static std::pair<unsigned, unsigned>
3111 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3112  unsigned Opc, SubregIdx;
3113  if (RB.getID() == AArch64::GPRRegBankID) {
3114  if (EltSize == 32) {
3115  Opc = AArch64::INSvi32gpr;
3116  SubregIdx = AArch64::ssub;
3117  } else if (EltSize == 64) {
3118  Opc = AArch64::INSvi64gpr;
3119  SubregIdx = AArch64::dsub;
3120  } else {
3121  llvm_unreachable("invalid elt size!");
3122  }
3123  } else {
3124  if (EltSize == 8) {
3125  Opc = AArch64::INSvi8lane;
3126  SubregIdx = AArch64::bsub;
3127  } else if (EltSize == 16) {
3128  Opc = AArch64::INSvi16lane;
3129  SubregIdx = AArch64::hsub;
3130  } else if (EltSize == 32) {
3131  Opc = AArch64::INSvi32lane;
3132  SubregIdx = AArch64::ssub;
3133  } else if (EltSize == 64) {
3134  Opc = AArch64::INSvi64lane;
3135  SubregIdx = AArch64::dsub;
3136  } else {
3137  llvm_unreachable("invalid elt size!");
3138  }
3139  }
3140  return std::make_pair(Opc, SubregIdx);
3141 }
3142 
3143 MachineInstr *
3144 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3145  MachineOperand &RHS,
3146  MachineIRBuilder &MIRBuilder) const {
3147  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3148  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3149  static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3150  {AArch64::ADDWrr, AArch64::ADDWri}};
3151  bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3152  auto ImmFns = selectArithImmed(RHS);
3153  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3154  auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3155 
3156  // If we matched a valid constant immediate, add those operands.
3157  if (ImmFns) {
3158  for (auto &RenderFn : *ImmFns)
3159  RenderFn(AddMI);
3160  } else {
3161  AddMI.addUse(RHS.getReg());
3162  }
3163 
3164  constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3165  return &*AddMI;
3166 }
3167 
3168 MachineInstr *
3169 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3170  MachineIRBuilder &MIRBuilder) const {
3171  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3172  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3173  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3174  {AArch64::ADDSWrr, AArch64::ADDSWri}};
3175  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3176  auto ImmFns = selectArithImmed(RHS);
3177  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3178  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3179 
3180  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3181 
3182  // If we matched a valid constant immediate, add those operands.
3183  if (ImmFns) {
3184  for (auto &RenderFn : *ImmFns)
3185  RenderFn(CmpMI);
3186  } else {
3187  CmpMI.addUse(RHS.getReg());
3188  }
3189 
3190  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3191  return &*CmpMI;
3192 }
3193 
3194 MachineInstr *
3195 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3196  MachineIRBuilder &MIRBuilder) const {
3197  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3198  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3199  bool Is32Bit = (RegSize == 32);
3200  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3201  {AArch64::ANDSWrr, AArch64::ANDSWri}};
3202  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3203 
3204  // We might be able to fold in an immediate into the TST. We need to make sure
3205  // it's a logical immediate though, since ANDS requires that.
3206  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3207  bool IsImmForm = ValAndVReg.hasValue() &&
3208  AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3209  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3210  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3211 
3212  if (IsImmForm)
3213  TstMI.addImm(
3214  AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3215  else
3216  TstMI.addUse(RHS);
3217 
3218  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3219  return &*TstMI;
3220 }
3221 
3222 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3224  MachineIRBuilder &MIRBuilder) const {
3225  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3226  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3227 
3228  // Fold the compare if possible.
3229  MachineInstr *FoldCmp =
3230  tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3231  if (FoldCmp)
3232  return FoldCmp;
3233 
3234  // Can't fold into a CMN. Just emit a normal compare.
3235  unsigned CmpOpc = 0;
3236  Register ZReg;
3237 
3238  LLT CmpTy = MRI.getType(LHS.getReg());
3239  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3240  "Expected scalar or pointer");
3241  if (CmpTy == LLT::scalar(32)) {
3242  CmpOpc = AArch64::SUBSWrr;
3243  ZReg = AArch64::WZR;
3244  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3245  CmpOpc = AArch64::SUBSXrr;
3246  ZReg = AArch64::XZR;
3247  } else {
3248  return nullptr;
3249  }
3250 
3251  // Try to match immediate forms.
3252  auto ImmFns = selectArithImmed(RHS);
3253  if (ImmFns)
3254  CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3255 
3256  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3257  // If we matched a valid constant immediate, add those operands.
3258  if (ImmFns) {
3259  for (auto &RenderFn : *ImmFns)
3260  RenderFn(CmpMI);
3261  } else {
3262  CmpMI.addUse(RHS.getReg());
3263  }
3264 
3265  // Make sure that we can constrain the compare that we emitted.
3266  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3267  return &*CmpMI;
3268 }
3269 
3270 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3271  Optional<Register> Dst, Register Op1, Register Op2,
3272  MachineIRBuilder &MIRBuilder) const {
3273  // We implement a vector concat by:
3274  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3275  // 2. Insert the upper vector into the destination's upper element
3276  // TODO: some of this code is common with G_BUILD_VECTOR handling.
3277  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3278 
3279  const LLT Op1Ty = MRI.getType(Op1);
3280  const LLT Op2Ty = MRI.getType(Op2);
3281 
3282  if (Op1Ty != Op2Ty) {
3283  LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3284  return nullptr;
3285  }
3286  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3287 
3288  if (Op1Ty.getSizeInBits() >= 128) {
3289  LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3290  return nullptr;
3291  }
3292 
3293  // At the moment we just support 64 bit vector concats.
3294  if (Op1Ty.getSizeInBits() != 64) {
3295  LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3296  return nullptr;
3297  }
3298 
3299  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3300  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3301  const TargetRegisterClass *DstRC =
3302  getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3303 
3304  MachineInstr *WidenedOp1 =
3305  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3306  MachineInstr *WidenedOp2 =
3307  emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3308  if (!WidenedOp1 || !WidenedOp2) {
3309  LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3310  return nullptr;
3311  }
3312 
3313  // Now do the insert of the upper element.
3314  unsigned InsertOpc, InsSubRegIdx;
3315  std::tie(InsertOpc, InsSubRegIdx) =
3316  getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3317 
3318  if (!Dst)
3319  Dst = MRI.createVirtualRegister(DstRC);
3320  auto InsElt =
3321  MIRBuilder
3322  .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3323  .addImm(1) /* Lane index */
3324  .addUse(WidenedOp2->getOperand(0).getReg())
3325  .addImm(0);
3326  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3327  return &*InsElt;
3328 }
3329 
3330 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3331  MachineInstr &I, MachineRegisterInfo &MRI) const {
3332  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3333  "Expected a G_FCONSTANT!");
3334  MachineOperand &ImmOp = I.getOperand(1);
3335  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3336 
3337  // Only handle 32 and 64 bit defs for now.
3338  if (DefSize != 32 && DefSize != 64)
3339  return nullptr;
3340 
3341  // Don't handle null values using FMOV.
3342  if (ImmOp.getFPImm()->isNullValue())
3343  return nullptr;
3344 
3345  // Get the immediate representation for the FMOV.
3346  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3347  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3348  : AArch64_AM::getFP64Imm(ImmValAPF);
3349 
3350  // If this is -1, it means the immediate can't be represented as the requested
3351  // floating point value. Bail.
3352  if (Imm == -1)
3353  return nullptr;
3354 
3355  // Update MI to represent the new FMOV instruction, constrain it, and return.
3356  ImmOp.ChangeToImmediate(Imm);
3357  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3358  I.setDesc(TII.get(MovOpc));
3360  return &I;
3361 }
3362 
3363 MachineInstr *
3364 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3365  MachineIRBuilder &MIRBuilder) const {
3366  // CSINC increments the result when the predicate is false. Invert it.
3368  CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3369  auto I =
3370  MIRBuilder
3371  .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3372  .addImm(InvCC);
3374  return &*I;
3375 }
3376 
3377 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3378  MachineIRBuilder MIB(I);
3379  MachineRegisterInfo &MRI = *MIB.getMRI();
3381 
3382  // We want to recognize this pattern:
3383  //
3384  // $z = G_FCMP pred, $x, $y
3385  // ...
3386  // $w = G_SELECT $z, $a, $b
3387  //
3388  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3389  // some copies/truncs in between.)
3390  //
3391  // If we see this, then we can emit something like this:
3392  //
3393  // fcmp $x, $y
3394  // fcsel $w, $a, $b, pred
3395  //
3396  // Rather than emitting both of the rather long sequences in the standard
3397  // G_FCMP/G_SELECT select methods.
3398 
3399  // First, check if the condition is defined by a compare.
3400  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3401  while (CondDef) {
3402  // We can only fold if all of the defs have one use.
3403  if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3404  return false;
3405 
3406  // We can skip over G_TRUNC since the condition is 1-bit.
3407  // Truncating/extending can have no impact on the value.
3408  unsigned Opc = CondDef->getOpcode();
3409  if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3410  break;
3411 
3412  // Can't see past copies from physregs.
3413  if (Opc == TargetOpcode::COPY &&
3414  Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3415  return false;
3416 
3417  CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3418  }
3419 
3420  // Is the condition defined by a compare?
3421  if (!CondDef)
3422  return false;
3423 
3424  unsigned CondOpc = CondDef->getOpcode();
3425  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3426  return false;
3427 
3429  if (CondOpc == TargetOpcode::G_ICMP) {
3430  CondCode = changeICMPPredToAArch64CC(
3431  (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3432  if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3433  CondDef->getOperand(1), MIB)) {
3434  LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3435  return false;
3436  }
3437  } else {
3438  // Get the condition code for the select.
3439  AArch64CC::CondCode CondCode2;
3442  CondCode2);
3443 
3444  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3445  // instructions to emit the comparison.
3446  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3447  // unnecessary.
3448  if (CondCode2 != AArch64CC::AL)
3449  return false;
3450 
3451  // Make sure we'll be able to select the compare.
3452  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3453  if (!CmpOpc)
3454  return false;
3455 
3456  // Emit a new compare.
3457  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3458  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3459  Cmp.addUse(CondDef->getOperand(3).getReg());
3460  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3461  }
3462 
3463  // Emit the select.
3464  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3465  auto CSel =
3466  MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3467  {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3468  .addImm(CondCode);
3469  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3470  I.eraseFromParent();
3471  return true;
3472 }
3473 
3474 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3475  MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3476  MachineIRBuilder &MIRBuilder) const {
3477  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3478  "Unexpected MachineOperand");
3479  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3480  // We want to find this sort of thing:
3481  // x = G_SUB 0, y
3482  // G_ICMP z, x
3483  //
3484  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3485  // e.g:
3486  //
3487  // cmn z, y
3488 
3489  // Helper lambda to detect the subtract followed by the compare.
3490  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3491  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3492  if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3493  return false;
3494 
3495  // Need to make sure NZCV is the same at the end of the transformation.
3496  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3497  return false;
3498 
3499  // We want to match against SUBs.
3500  if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3501  return false;
3502 
3503  // Make sure that we're getting
3504  // x = G_SUB 0, y
3505  auto ValAndVReg =
3507  if (!ValAndVReg || ValAndVReg->Value != 0)
3508  return false;
3509 
3510  // This can safely be represented as a CMN.
3511  return true;
3512  };
3513 
3514  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3515  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3516  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
3519 
3520  // Given this:
3521  //
3522  // x = G_SUB 0, y
3523  // G_ICMP x, z
3524  //
3525  // Produce this:
3526  //
3527  // cmn y, z
3528  if (IsCMN(LHSDef, CC))
3529  return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3530 
3531  // Same idea here, but with the RHS of the compare instead:
3532  //
3533  // Given this:
3534  //
3535  // x = G_SUB 0, y
3536  // G_ICMP z, x
3537  //
3538  // Produce this:
3539  //
3540  // cmn z, y
3541  if (IsCMN(RHSDef, CC))
3542  return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3543 
3544  // Given this:
3545  //
3546  // z = G_AND x, y
3547  // G_ICMP z, 0
3548  //
3549  // Produce this if the compare is signed:
3550  //
3551  // tst x, y
3552  if (!isUnsignedICMPPred(P) && LHSDef &&
3553  LHSDef->getOpcode() == TargetOpcode::G_AND) {
3554  // Make sure that the RHS is 0.
3555  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3556  if (!ValAndVReg || ValAndVReg->Value != 0)
3557  return nullptr;
3558 
3559  return emitTST(LHSDef->getOperand(1).getReg(),
3560  LHSDef->getOperand(2).getReg(), MIRBuilder);
3561  }
3562 
3563  return nullptr;
3564 }
3565 
3566 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3567  // Try to match a vector splat operation into a dup instruction.
3568  // We're looking for this pattern:
3569  // %scalar:gpr(s64) = COPY $x0
3570  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3571  // %cst0:gpr(s32) = G_CONSTANT i32 0
3572  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3573  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3574  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3575  // %zerovec(<2 x s32>)
3576  //
3577  // ...into:
3578  // %splat = DUP %scalar
3579  // We use the regbank of the scalar to determine which kind of dup to use.
3580  MachineIRBuilder MIB(I);
3581  MachineRegisterInfo &MRI = *MIB.getMRI();
3583  using namespace TargetOpcode;
3584  using namespace MIPatternMatch;
3585 
3586  // Begin matching the insert.
3587  auto *InsMI =
3588  getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
3589  if (!InsMI)
3590  return false;
3591  // Match the undef vector operand.
3592  auto *UndefMI =
3593  getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
3594  if (!UndefMI)
3595  return false;
3596  // Match the scalar being splatted.
3597  Register ScalarReg = InsMI->getOperand(2).getReg();
3598  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3599  // Match the index constant 0.
3600  int64_t Index = 0;
3601  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3602  return false;
3603 
3604  // The shuffle's second operand doesn't matter if the mask is all zero.
3605  const Constant *Mask = I.getOperand(3).getShuffleMask();
3606  if (!isa<ConstantAggregateZero>(Mask))
3607  return false;
3608 
3609  // We're done, now find out what kind of splat we need.
3610  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3611  LLT EltTy = VecTy.getElementType();
3612  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3613  LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3614  return false;
3615  }
3616  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3617  static const unsigned OpcTable[2][2] = {
3618  {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3619  {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3620  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3621 
3622  // For FP splats, we need to widen the scalar reg via undef too.
3623  if (IsFP) {
3624  MachineInstr *Widen = emitScalarToVector(
3625  EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3626  if (!Widen)
3627  return false;
3628  ScalarReg = Widen->getOperand(0).getReg();
3629  }
3630  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3631  if (IsFP)
3632  Dup.addImm(0);
3633  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3634  I.eraseFromParent();
3635  return true;
3636 }
3637 
3638 bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3639  if (TM.getOptLevel() == CodeGenOpt::None)
3640  return false;
3641  if (tryOptVectorDup(I))
3642  return true;
3643  return false;
3644 }
3645 
3646 bool AArch64InstructionSelector::selectShuffleVector(
3647  MachineInstr &I, MachineRegisterInfo &MRI) const {
3648  if (tryOptVectorShuffle(I))
3649  return true;
3650  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3651  Register Src1Reg = I.getOperand(1).getReg();
3652  const LLT Src1Ty = MRI.getType(Src1Reg);
3653  Register Src2Reg = I.getOperand(2).getReg();
3654  const LLT Src2Ty = MRI.getType(Src2Reg);
3655  const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
3656 
3657  MachineBasicBlock &MBB = *I.getParent();
3658  MachineFunction &MF = *MBB.getParent();
3659  LLVMContext &Ctx = MF.getFunction().getContext();
3660 
3662  ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
3663 
3664  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3665  // it's originated from a <1 x T> type. Those should have been lowered into
3666  // G_BUILD_VECTOR earlier.
3667  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3668  LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3669  return false;
3670  }
3671 
3672  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3673 
3675  for (int Val : Mask) {
3676  // For now, any undef indexes we'll just assume to be 0. This should be
3677  // optimized in future, e.g. to select DUP etc.
3678  Val = Val < 0 ? 0 : Val;
3679  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3680  unsigned Offset = Byte + Val * BytesPerElt;
3681  CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3682  }
3683  }
3684 
3685  MachineIRBuilder MIRBuilder(I);
3686 
3687  // Use a constant pool to load the index vector for TBL.
3688  Constant *CPVal = ConstantVector::get(CstIdxs);
3689  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3690  if (!IndexLoad) {
3691  LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3692  return false;
3693  }
3694 
3695  if (DstTy.getSizeInBits() != 128) {
3696  assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3697  // This case can be done with TBL1.
3698  MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
3699  if (!Concat) {
3700  LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3701  return false;
3702  }
3703 
3704  // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3705  IndexLoad =
3706  emitScalarToVector(64, &AArch64::FPR128RegClass,
3707  IndexLoad->getOperand(0).getReg(), MIRBuilder);
3708 
3709  auto TBL1 = MIRBuilder.buildInstr(
3710  AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3711  {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3713 
3714  auto Copy =
3715  MIRBuilder
3716  .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3717  .addReg(TBL1.getReg(0), 0, AArch64::dsub);
3718  RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3719  I.eraseFromParent();
3720  return true;
3721  }
3722 
3723  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3724  // Q registers for regalloc.
3725  auto RegSeq = MIRBuilder
3726  .buildInstr(TargetOpcode::REG_SEQUENCE,
3727  {&AArch64::QQRegClass}, {Src1Reg})
3728  .addImm(AArch64::qsub0)
3729  .addUse(Src2Reg)
3730  .addImm(AArch64::qsub1);
3731 
3732  auto TBL2 =
3733  MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3734  {RegSeq, IndexLoad->getOperand(0).getReg()});
3735  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3737  I.eraseFromParent();
3738  return true;
3739 }
3740 
3741 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3742  Optional<Register> DstReg, Register SrcReg, Register EltReg,
3743  unsigned LaneIdx, const RegisterBank &RB,
3744  MachineIRBuilder &MIRBuilder) const {
3745  MachineInstr *InsElt = nullptr;
3746  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3747  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3748 
3749  // Create a register to define with the insert if one wasn't passed in.
3750  if (!DstReg)
3751  DstReg = MRI.createVirtualRegister(DstRC);
3752 
3753  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3754  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3755 
3756  if (RB.getID() == AArch64::FPRRegBankID) {
3757  auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3758  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3759  .addImm(LaneIdx)
3760  .addUse(InsSub->getOperand(0).getReg())
3761  .addImm(0);
3762  } else {
3763  InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3764  .addImm(LaneIdx)
3765  .addUse(EltReg);
3766  }
3767 
3768  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3769  return InsElt;
3770 }
3771 
3772 bool AArch64InstructionSelector::selectInsertElt(
3773  MachineInstr &I, MachineRegisterInfo &MRI) const {
3774  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3775 
3776  // Get information on the destination.
3777  Register DstReg = I.getOperand(0).getReg();
3778  const LLT DstTy = MRI.getType(DstReg);
3779  unsigned VecSize = DstTy.getSizeInBits();
3780 
3781  // Get information on the element we want to insert into the destination.
3782  Register EltReg = I.getOperand(2).getReg();
3783  const LLT EltTy = MRI.getType(EltReg);
3784  unsigned EltSize = EltTy.getSizeInBits();
3785  if (EltSize < 16 || EltSize > 64)
3786  return false; // Don't support all element types yet.
3787 
3788  // Find the definition of the index. Bail out if it's not defined by a
3789  // G_CONSTANT.
3790  Register IdxReg = I.getOperand(3).getReg();
3791  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3792  if (!VRegAndVal)
3793  return false;
3794  unsigned LaneIdx = VRegAndVal->Value;
3795 
3796  // Perform the lane insert.
3797  Register SrcReg = I.getOperand(1).getReg();
3798  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3799  MachineIRBuilder MIRBuilder(I);
3800 
3801  if (VecSize < 128) {
3802  // If the vector we're inserting into is smaller than 128 bits, widen it
3803  // to 128 to do the insert.
3804  MachineInstr *ScalarToVec = emitScalarToVector(
3805  VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3806  if (!ScalarToVec)
3807  return false;
3808  SrcReg = ScalarToVec->getOperand(0).getReg();
3809  }
3810 
3811  // Create an insert into a new FPR128 register.
3812  // Note that if our vector is already 128 bits, we end up emitting an extra
3813  // register.
3814  MachineInstr *InsMI =
3815  emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3816 
3817  if (VecSize < 128) {
3818  // If we had to widen to perform the insert, then we have to demote back to
3819  // the original size to get the result we want.
3820  Register DemoteVec = InsMI->getOperand(0).getReg();
3821  const TargetRegisterClass *RC =
3822  getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3823  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3824  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3825  return false;
3826  }
3827  unsigned SubReg = 0;
3828  if (!getSubRegForClass(RC, TRI, SubReg))
3829  return false;
3830  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3831  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3832  << "\n");
3833  return false;
3834  }
3835  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3836  .addReg(DemoteVec, 0, SubReg);
3837  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3838  } else {
3839  // No widening needed.
3840  InsMI->getOperand(0).setReg(DstReg);
3841  constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3842  }
3843 
3844  I.eraseFromParent();
3845  return true;
3846 }
3847 
3848 bool AArch64InstructionSelector::selectBuildVector(
3849  MachineInstr &I, MachineRegisterInfo &MRI) const {
3850  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3851  // Until we port more of the optimized selections, for now just use a vector
3852  // insert sequence.
3853  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3854  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3855  unsigned EltSize = EltTy.getSizeInBits();
3856  if (EltSize < 16 || EltSize > 64)
3857  return false; // Don't support all element types yet.
3858  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3859  MachineIRBuilder MIRBuilder(I);
3860 
3861  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3862  MachineInstr *ScalarToVec =
3863  emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3864  I.getOperand(1).getReg(), MIRBuilder);
3865  if (!ScalarToVec)
3866  return false;
3867 
3868  Register DstVec = ScalarToVec->getOperand(0).getReg();
3869  unsigned DstSize = DstTy.getSizeInBits();
3870 
3871  // Keep track of the last MI we inserted. Later on, we might be able to save
3872  // a copy using it.
3873  MachineInstr *PrevMI = nullptr;
3874  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
3875  // Note that if we don't do a subregister copy, we can end up making an
3876  // extra register.
3877  PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3878  MIRBuilder);
3879  DstVec = PrevMI->getOperand(0).getReg();
3880  }
3881 
3882  // If DstTy's size in bits is less than 128, then emit a subregister copy
3883  // from DstVec to the last register we've defined.
3884  if (DstSize < 128) {
3885  // Force this to be FPR using the destination vector.
3886  const TargetRegisterClass *RC =
3887  getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
3888  if (!RC)
3889  return false;
3890  if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3891  LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3892  return false;
3893  }
3894 
3895  unsigned SubReg = 0;
3896  if (!getSubRegForClass(RC, TRI, SubReg))
3897  return false;
3898  if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3899  LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3900  << "\n");
3901  return false;
3902  }
3903 
3905  Register DstReg = I.getOperand(0).getReg();
3906 
3907  MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3908  .addReg(DstVec, 0, SubReg);
3909  MachineOperand &RegOp = I.getOperand(1);
3910  RegOp.setReg(Reg);
3911  RBI.constrainGenericRegister(DstReg, *RC, MRI);
3912  } else {
3913  // We don't need a subregister copy. Save a copy by re-using the
3914  // destination register on the final insert.
3915  assert(PrevMI && "PrevMI was null?");
3916  PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3917  constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3918  }
3919 
3920  I.eraseFromParent();
3921  return true;
3922 }
3923 
3924 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3925 /// ID if it exists, and 0 otherwise.
3926 static unsigned findIntrinsicID(MachineInstr &I) {
3927  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3928  return Op.isIntrinsicID();
3929  });
3930  if (IntrinOp == I.operands_end())
3931  return 0;
3932  return IntrinOp->getIntrinsicID();
3933 }
3934 
3935 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3936  MachineInstr &I, MachineRegisterInfo &MRI) const {
3937  // Find the intrinsic ID.
3938  unsigned IntrinID = findIntrinsicID(I);
3939  if (!IntrinID)
3940  return false;
3941  MachineIRBuilder MIRBuilder(I);
3942 
3943  // Select the instruction.
3944  switch (IntrinID) {
3945  default:
3946  return false;
3947  case Intrinsic::trap:
3948  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3949  break;
3950  case Intrinsic::debugtrap:
3951  if (!STI.isTargetWindows())
3952  return false;
3953  MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3954  break;
3955  }
3956 
3957  I.eraseFromParent();
3958  return true;
3959 }
3960 
3961 bool AArch64InstructionSelector::selectIntrinsic(
3962  MachineInstr &I, MachineRegisterInfo &MRI) const {
3963  unsigned IntrinID = findIntrinsicID(I);
3964  if (!IntrinID)
3965  return false;
3966  MachineIRBuilder MIRBuilder(I);
3967 
3968  switch (IntrinID) {
3969  default:
3970  break;
3971  case Intrinsic::aarch64_crypto_sha1h:
3972  Register DstReg = I.getOperand(0).getReg();
3973  Register SrcReg = I.getOperand(2).getReg();
3974 
3975  // FIXME: Should this be an assert?
3976  if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3977  MRI.getType(SrcReg).getSizeInBits() != 32)
3978  return false;
3979 
3980  // The operation has to happen on FPRs. Set up some new FPR registers for
3981  // the source and destination if they are on GPRs.
3982  if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3983  SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3984  MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3985 
3986  // Make sure the copy ends up getting constrained properly.
3988  AArch64::GPR32RegClass, MRI);
3989  }
3990 
3991  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3992  DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3993 
3994  // Actually insert the instruction.
3995  auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3996  constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3997 
3998  // Did we create a new register for the destination?
3999  if (DstReg != I.getOperand(0).getReg()) {
4000  // Yep. Copy the result of the instruction back into the original
4001  // destination.
4002  MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4004  AArch64::GPR32RegClass, MRI);
4005  }
4006 
4007  I.eraseFromParent();
4008  return true;
4009  }
4010  return false;
4011 }
4012 
4014  auto &MI = *Root.getParent();
4015  auto &MBB = *MI.getParent();
4016  auto &MF = *MBB.getParent();
4017  auto &MRI = MF.getRegInfo();
4018  uint64_t Immed;
4019  if (Root.isImm())
4020  Immed = Root.getImm();
4021  else if (Root.isCImm())
4022  Immed = Root.getCImm()->getZExtValue();
4023  else if (Root.isReg()) {
4024  auto ValAndVReg =
4026  if (!ValAndVReg)
4027  return None;
4028  Immed = ValAndVReg->Value;
4029  } else
4030  return None;
4031  return Immed;
4032 }
4033 
4035 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4036  auto MaybeImmed = getImmedFromMO(Root);
4037  if (MaybeImmed == None || *MaybeImmed > 31)
4038  return None;
4039  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4040  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4041 }
4042 
4044 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4045  auto MaybeImmed = getImmedFromMO(Root);
4046  if (MaybeImmed == None || *MaybeImmed > 31)
4047  return None;
4048  uint64_t Enc = 31 - *MaybeImmed;
4049  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4050 }
4051 
4053 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4054  auto MaybeImmed = getImmedFromMO(Root);
4055  if (MaybeImmed == None || *MaybeImmed > 63)
4056  return None;
4057  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4058  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4059 }
4060 
4062 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4063  auto MaybeImmed = getImmedFromMO(Root);
4064  if (MaybeImmed == None || *MaybeImmed > 63)
4065  return None;
4066  uint64_t Enc = 63 - *MaybeImmed;
4067  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4068 }
4069 
4070 /// Helper to select an immediate value that can be represented as a 12-bit
4071 /// value shifted left by either 0 or 12. If it is possible to do so, return
4072 /// the immediate and shift value. If not, return None.
4073 ///
4074 /// Used by selectArithImmed and selectNegArithImmed.
4076 AArch64InstructionSelector::select12BitValueWithLeftShift(
4077  uint64_t Immed) const {
4078  unsigned ShiftAmt;
4079  if (Immed >> 12 == 0) {
4080  ShiftAmt = 0;
4081  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4082  ShiftAmt = 12;
4083  Immed = Immed >> 12;
4084  } else
4085  return None;
4086 
4087  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4088  return {{
4089  [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4090  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4091  }};
4092 }
4093 
4094 /// SelectArithImmed - Select an immediate value that can be represented as
4095 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4096 /// Val set to the 12-bit value and Shift set to the shifter operand.
4098 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4099  // This function is called from the addsub_shifted_imm ComplexPattern,
4100  // which lists [imm] as the list of opcode it's interested in, however
4101  // we still need to check whether the operand is actually an immediate
4102  // here because the ComplexPattern opcode list is only used in
4103  // root-level opcode matching.
4104  auto MaybeImmed = getImmedFromMO(Root);
4105  if (MaybeImmed == None)
4106  return None;
4107  return select12BitValueWithLeftShift(*MaybeImmed);
4108 }
4109 
4110 /// SelectNegArithImmed - As above, but negates the value before trying to
4111 /// select it.
4113 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4114  // We need a register here, because we need to know if we have a 64 or 32
4115  // bit immediate.
4116  if (!Root.isReg())
4117  return None;
4118  auto MaybeImmed = getImmedFromMO(Root);
4119  if (MaybeImmed == None)
4120  return None;
4121  uint64_t Immed = *MaybeImmed;
4122 
4123  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4124  // have the opposite effect on the C flag, so this pattern mustn't match under
4125  // those circumstances.
4126  if (Immed == 0)
4127  return None;
4128 
4129  // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4130  // the root.
4131  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4132  if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4133  Immed = ~((uint32_t)Immed) + 1;
4134  else
4135  Immed = ~Immed + 1ULL;
4136 
4137  if (Immed & 0xFFFFFFFFFF000000ULL)
4138  return None;
4139 
4140  Immed &= 0xFFFFFFULL;
4141  return select12BitValueWithLeftShift(Immed);
4142 }
4143 
4144 /// Return true if it is worth folding MI into an extended register. That is,
4145 /// if it's safe to pull it into the addressing mode of a load or store as a
4146 /// shift.
4147 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4148  MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4149  // Always fold if there is one use, or if we're optimizing for size.
4150  Register DefReg = MI.getOperand(0).getReg();
4151  if (MRI.hasOneUse(DefReg) ||
4153  return true;
4154 
4155  // It's better to avoid folding and recomputing shifts when we don't have a
4156  // fastpath.
4157  if (!STI.hasLSLFast())
4158  return false;
4159 
4160  // We have a fastpath, so folding a shift in and potentially computing it
4161  // many times may be beneficial. Check if this is only used in memory ops.
4162  // If it is, then we should fold.
4163  return all_of(MRI.use_instructions(DefReg),
4164  [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4165 }
4166 
4167 /// This is used for computing addresses like this:
4168 ///
4169 /// ldr x1, [x2, x3, lsl #3]
4170 ///
4171 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4172 /// is a constant value specific to this load instruction. That is, we'll never
4173 /// see anything other than a 3 here (which corresponds to the size of the
4174 /// element being loaded.)
4176 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4177  MachineOperand &Root, unsigned SizeInBytes) const {
4178  if (!Root.isReg())
4179  return None;
4180  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4181 
4182  // Make sure that the memory op is a valid size.
4183  int64_t LegalShiftVal = Log2_32(SizeInBytes);
4184  if (LegalShiftVal == 0)
4185  return None;
4186 
4187  // We want to find something like this:
4188  //
4189  // val = G_CONSTANT LegalShiftVal
4190  // shift = G_SHL off_reg val
4191  // ptr = G_GEP base_reg shift
4192  // x = G_LOAD ptr
4193  //
4194  // And fold it into this addressing mode:
4195  //
4196  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4197 
4198  // Check if we can find the G_GEP.
4199  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4200  if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4201  return None;
4202 
4203  // Now, try to match an opcode which will match our specific offset.
4204  // We want a G_SHL or a G_MUL.
4205  MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4206  if (!OffsetInst)
4207  return None;
4208 
4209  unsigned OffsetOpc = OffsetInst->getOpcode();
4210  if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4211  return None;
4212 
4213  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4214  return None;
4215 
4216  // Now, try to find the specific G_CONSTANT. Start by assuming that the
4217  // register we will offset is the LHS, and the register containing the
4218  // constant is the RHS.
4219  Register OffsetReg = OffsetInst->getOperand(1).getReg();
4220  Register ConstantReg = OffsetInst->getOperand(2).getReg();
4221  auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4222  if (!ValAndVReg) {
4223  // We didn't get a constant on the RHS. If the opcode is a shift, then
4224  // we're done.
4225  if (OffsetOpc == TargetOpcode::G_SHL)
4226  return None;
4227 
4228  // If we have a G_MUL, we can use either register. Try looking at the RHS.
4229  std::swap(OffsetReg, ConstantReg);
4230  ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4231  if (!ValAndVReg)
4232  return None;
4233  }
4234 
4235  // The value must fit into 3 bits, and must be positive. Make sure that is
4236  // true.
4237  int64_t ImmVal = ValAndVReg->Value;
4238 
4239  // Since we're going to pull this into a shift, the constant value must be
4240  // a power of 2. If we got a multiply, then we need to check this.
4241  if (OffsetOpc == TargetOpcode::G_MUL) {
4242  if (!isPowerOf2_32(ImmVal))
4243  return None;
4244 
4245  // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4246  ImmVal = Log2_32(ImmVal);
4247  }
4248 
4249  if ((ImmVal & 0x7) != ImmVal)
4250  return None;
4251 
4252  // We are only allowed to shift by LegalShiftVal. This shift value is built
4253  // into the instruction, so we can't just use whatever we want.
4254  if (ImmVal != LegalShiftVal)
4255  return None;
4256 
4257  // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4258  // offset. Signify that we are shifting by setting the shift flag to 1.
4259  return {{[=](MachineInstrBuilder &MIB) {
4260  MIB.addUse(Gep->getOperand(1).getReg());
4261  },
4262  [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4263  [=](MachineInstrBuilder &MIB) {
4264  // Need to add both immediates here to make sure that they are both
4265  // added to the instruction.
4266  MIB.addImm(0);
4267  MIB.addImm(1);
4268  }}};
4269 }
4270 
4271 /// This is used for computing addresses like this:
4272 ///
4273 /// ldr x1, [x2, x3]
4274 ///
4275 /// Where x2 is the base register, and x3 is an offset register.
4276 ///
4277 /// When possible (or profitable) to fold a G_GEP into the address calculation,
4278 /// this will do so. Otherwise, it will return None.
4280 AArch64InstructionSelector::selectAddrModeRegisterOffset(
4281  MachineOperand &Root) const {
4282  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4283 
4284  // We need a GEP.
4285  MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4286  if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4287  return None;
4288 
4289  // If this is used more than once, let's not bother folding.
4290  // TODO: Check if they are memory ops. If they are, then we can still fold
4291  // without having to recompute anything.
4292  if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4293  return None;
4294 
4295  // Base is the GEP's LHS, offset is its RHS.
4296  return {{[=](MachineInstrBuilder &MIB) {
4297  MIB.addUse(Gep->getOperand(1).getReg());
4298  },
4299  [=](MachineInstrBuilder &MIB) {
4300  MIB.addUse(Gep->getOperand(2).getReg());
4301  },
4302  [=](MachineInstrBuilder &MIB) {
4303  // Need to add both immediates here to make sure that they are both
4304  // added to the instruction.
4305  MIB.addImm(0);
4306  MIB.addImm(0);
4307  }}};
4308 }
4309 
4310 /// This is intended to be equivalent to selectAddrModeXRO in
4311 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4313 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4314  unsigned SizeInBytes) const {
4315  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4316 
4317  // If we have a constant offset, then we probably don't want to match a
4318  // register offset.
4319  if (isBaseWithConstantOffset(Root, MRI))
4320  return None;
4321 
4322  // Try to fold shifts into the addressing mode.
4323  auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4324  if (AddrModeFns)
4325  return AddrModeFns;
4326 
4327  // If that doesn't work, see if it's possible to fold in registers from
4328  // a GEP.
4329  return selectAddrModeRegisterOffset(Root);
4330 }
4331 
4332 /// Select a "register plus unscaled signed 9-bit immediate" address. This
4333 /// should only match when there is an offset that is not valid for a scaled
4334 /// immediate addressing mode. The "Size" argument is the size in bytes of the
4335 /// memory reference, which is needed here to know what is valid for a scaled
4336 /// immediate.
4338 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4339  unsigned Size) const {
4340  MachineRegisterInfo &MRI =
4341  Root.getParent()->getParent()->getParent()->getRegInfo();
4342 
4343  if (!Root.isReg())
4344  return None;
4345 
4346  if (!isBaseWithConstantOffset(Root, MRI))
4347  return None;
4348 
4349  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4350  if (!RootDef)
4351  return None;
4352 
4353  MachineOperand &OffImm = RootDef->getOperand(2);
4354  if (!OffImm.isReg())
4355  return None;
4356  MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4357  if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4358  return None;
4359  int64_t RHSC;
4360  MachineOperand &RHSOp1 = RHS->getOperand(1);
4361  if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4362  return None;
4363  RHSC = RHSOp1.getCImm()->getSExtValue();
4364 
4365  // If the offset is valid as a scaled immediate, don't match here.
4366  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4367  return None;
4368  if (RHSC >= -256 && RHSC < 256) {
4369  MachineOperand &Base = RootDef->getOperand(1);
4370  return {{
4371  [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4372  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4373  }};
4374  }
4375  return None;
4376 }
4377 
4378 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
4379 /// "Size" argument is the size in bytes of the memory reference, which
4380 /// determines the scale.
4382 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4383  unsigned Size) const {
4384  MachineRegisterInfo &MRI =
4385  Root.getParent()->getParent()->getParent()->getRegInfo();
4386 
4387  if (!Root.isReg())
4388  return None;
4389 
4390  MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4391  if (!RootDef)
4392  return None;
4393 
4394  if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4395  return {{
4396  [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4397  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4398  }};
4399  }
4400 
4401  if (isBaseWithConstantOffset(Root, MRI)) {
4402  MachineOperand &LHS = RootDef->getOperand(1);
4403  MachineOperand &RHS = RootDef->getOperand(2);
4404  MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4405  MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4406  if (LHSDef && RHSDef) {
4407  int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4408  unsigned Scale = Log2_32(Size);
4409  if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4410  if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
4411  return {{
4412  [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4413  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4414  }};
4415 
4416  return {{
4417  [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4418  [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4419  }};
4420  }
4421  }
4422  }
4423 
4424  // Before falling back to our general case, check if the unscaled
4425  // instructions can handle this. If so, that's preferable.
4426  if (selectAddrModeUnscaled(Root, Size).hasValue())
4427  return None;
4428 
4429  return {{
4430  [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4431  [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4432  }};
4433 }
4434 
4435 /// Given a shift instruction, return the correct shift type for that
4436 /// instruction.
4438  // TODO: Handle AArch64_AM::ROR
4439  switch (MI.getOpcode()) {
4440  default:
4442  case TargetOpcode::G_SHL:
4443  return AArch64_AM::LSL;
4444  case TargetOpcode::G_LSHR:
4445  return AArch64_AM::LSR;
4446  case TargetOpcode::G_ASHR:
4447  return AArch64_AM::ASR;
4448  }
4449 }
4450 
4451 /// Select a "shifted register" operand. If the value is not shifted, set the
4452 /// shift operand to a default value of "lsl 0".
4453 ///
4454 /// TODO: Allow shifted register to be rotated in logical instructions.
4456 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4457  if (!Root.isReg())
4458  return None;
4459  MachineRegisterInfo &MRI =
4460  Root.getParent()->getParent()->getParent()->getRegInfo();
4461 
4462  // Check if the operand is defined by an instruction which corresponds to
4463  // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4464  //
4465  // TODO: Handle AArch64_AM::ROR for logical instructions.
4466  MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4467  if (!ShiftInst)
4468  return None;
4469  AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4470  if (ShType == AArch64_AM::InvalidShiftExtend)
4471  return None;
4472  if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4473  return None;
4474 
4475  // Need an immediate on the RHS.
4476  MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4477  auto Immed = getImmedFromMO(ShiftRHS);
4478  if (!Immed)
4479  return None;
4480 
4481  // We have something that we can fold. Fold in the shift's LHS and RHS into
4482  // the instruction.
4483  MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4484  Register ShiftReg = ShiftLHS.getReg();
4485 
4486  unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4487  unsigned Val = *Immed & (NumBits - 1);
4488  unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4489 
4490  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4491  [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4492 }
4493 
4494 /// Get the correct ShiftExtendType for an extend instruction.
4497  unsigned Opc = MI.getOpcode();
4498 
4499  // Handle explicit extend instructions first.
4500  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
4501  unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4502  assert(Size != 64 && "Extend from 64 bits?");
4503  switch (Size) {
4504  case 8:
4505  return AArch64_AM::SXTB;
4506  case 16:
4507  return AArch64_AM::SXTH;
4508  case 32:
4509  return AArch64_AM::SXTW;
4510  default:
4512  }
4513  }
4514 
4515  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
4516  unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4517  assert(Size != 64 && "Extend from 64 bits?");
4518  switch (Size) {
4519  case 8:
4520  return AArch64_AM::UXTB;
4521  case 16:
4522  return AArch64_AM::UXTH;
4523  case 32:
4524  return AArch64_AM::UXTW;
4525  default:
4527  }
4528  }
4529 
4530  // Don't have an explicit extend. Try to handle a G_AND with a constant mask
4531  // on the RHS.
4532  if (Opc != TargetOpcode::G_AND)
4534 
4535  Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
4536  if (!MaybeAndMask)
4538  uint64_t AndMask = *MaybeAndMask;
4539  switch (AndMask) {
4540  default:
4542  case 0xFF:
4543  return AArch64_AM::UXTB;
4544  case 0xFFFF:
4545  return AArch64_AM::UXTH;
4546  case 0xFFFFFFFF:
4547  return AArch64_AM::UXTW;
4548  }
4549 }
4550 
4551 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
4552  Register ExtReg, MachineIRBuilder &MIB) const {
4553  MachineRegisterInfo &MRI = *MIB.getMRI();
4554  if (MRI.getType(ExtReg).getSizeInBits() == 32)
4555  return ExtReg;
4556 
4557  // Insert a copy to move ExtReg to GPR32.
4558  Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4559  auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
4560 
4561  // Select the copy into a subregister copy.
4562  selectCopy(*Copy, TII, MRI, TRI, RBI);
4563  return Copy.getReg(0);
4564 }
4565 
4566 /// Select an "extended register" operand. This operand folds in an extend
4567 /// followed by an optional left shift.
4569 AArch64InstructionSelector::selectArithExtendedRegister(
4570  MachineOperand &Root) const {
4571  if (!Root.isReg())
4572  return None;
4573  MachineRegisterInfo &MRI =
4574  Root.getParent()->getParent()->getParent()->getRegInfo();
4575 
4576  uint64_t ShiftVal = 0;
4577  Register ExtReg;
4579  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
4580  if (!RootDef)
4581  return None;
4582 
4583  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
4584  return None;
4585 
4586  // Check if we can fold a shift and an extend.
4587  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
4588  // Look for a constant on the RHS of the shift.
4589  MachineOperand &RHS = RootDef->getOperand(2);
4590  Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
4591  if (!MaybeShiftVal)
4592  return None;
4593  ShiftVal = *MaybeShiftVal;
4594  if (ShiftVal > 4)
4595  return None;
4596  // Look for a valid extend instruction on the LHS of the shift.
4597  MachineOperand &LHS = RootDef->getOperand(1);
4598  MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4599  if (!ExtDef)
4600  return None;
4601  Ext = getExtendTypeForInst(*ExtDef, MRI);
4602  if (Ext == AArch64_AM::InvalidShiftExtend)
4603  return None;
4604  ExtReg = ExtDef->getOperand(1).getReg();
4605  } else {
4606  // Didn't get a shift. Try just folding an extend.
4607  Ext = getExtendTypeForInst(*RootDef, MRI);
4608  if (Ext == AArch64_AM::InvalidShiftExtend)
4609  return None;
4610  ExtReg = RootDef->getOperand(1).getReg();
4611 
4612  // If we have a 32 bit instruction which zeroes out the high half of a
4613  // register, we get an implicit zero extend for free. Check if we have one.
4614  // FIXME: We actually emit the extend right now even though we don't have
4615  // to.
4616  if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
4617  MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
4618  if (ExtInst && isDef32(*ExtInst))
4619  return None;
4620  }
4621  }
4622 
4623  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
4624  // copy.
4625  MachineIRBuilder MIB(*RootDef);
4626  ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
4627 
4628  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
4629  [=](MachineInstrBuilder &MIB) {
4630  MIB.addImm(getArithExtendImm(Ext, ShiftVal));
4631  }}};
4632 }
4633 
4634 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4635  const MachineInstr &MI) const {
4636  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4637  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4639  assert(CstVal && "Expected constant value");
4640  MIB.addImm(CstVal.getValue());
4641 }
4642 
4643 void AArch64InstructionSelector::renderLogicalImm32(
4644  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4645  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4646  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4647  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4648  MIB.addImm(Enc);
4649 }
4650 
4651 void AArch64InstructionSelector::renderLogicalImm64(
4652  MachineInstrBuilder &MIB, const MachineInstr &I) const {
4653  assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4654  uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4655  uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4656  MIB.addImm(Enc);
4657 }
4658 
4659 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
4660  const MachineInstr &MI, unsigned NumBytes) const {
4661  if (!MI.mayLoadOrStore())
4662  return false;
4663  assert(MI.hasOneMemOperand() &&
4664  "Expected load/store to have only one mem op!");
4665  return (*MI.memoperands_begin())->getSize() == NumBytes;
4666 }
4667 
4668 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
4669  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4670  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
4671  return false;
4672 
4673  // Only return true if we know the operation will zero-out the high half of
4674  // the 64-bit register. Truncates can be subregister copies, which don't
4675  // zero out the high bits. Copies and other copy-like instructions can be
4676  // fed by truncates, or could be lowered as subregister copies.
4677  switch (MI.getOpcode()) {
4678  default:
4679  return true;
4680  case TargetOpcode::COPY:
4681  case TargetOpcode::G_BITCAST:
4682  case TargetOpcode::G_TRUNC:
4683  case TargetOpcode::G_PHI:
4684  return false;
4685  }
4686 }
4687 
4688 namespace llvm {
4691  AArch64Subtarget &Subtarget,
4692  AArch64RegisterBankInfo &RBI) {
4693  return new AArch64InstructionSelector(TM, Subtarget, RBI);
4694 }
4695 }
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
const NoneType None
Definition: None.h:23
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
mop_iterator operands_end()
Definition: MachineInstr.h:471
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
MachineBasicBlock * getMBB() const
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB...
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
void setTargetFlags(unsigned F)
virtual void setupMF(MachineFunction &mf, GISelKnownBits &KB, CodeGenCoverage &covinfo)
Setup per-MF selector state.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ConstantFP * getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:295
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:384
bool isScalar() const
static CondCode getInvertedCondCode(CondCode Code)
unsigned Reg
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
static uint64_t selectImpl(uint64_t CandidateMask, uint64_t &NextInSequenceMask)
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t getSize() const
Return the size in bytes of the memory reference.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:853
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
static uint32_t Concat[]
unsigned const TargetRegisterInfo * TRI
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
void setRegBank(unsigned Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:476
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:194
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:303
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:142
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
This file declares the targeting of the RegisterBankInfo class for AArch64.
static AArch64_AM::ShiftExtendType getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI)
Get the correct ShiftExtendType for an extend instruction.
bool isVector() const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
Holds all the information related to register banks.
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const HexagonInstrInfo * TII
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
static StringRef getName(Value *V)
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
static int getID(struct InternalInstruction *insn, const void *miiArg)
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:308
static bool isStore(int Opcode)
MachineFunction & getMF()
Getter for the function we currently build.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
static bool isUnsignedICMPPred(const CmpInst::Predicate P)
Returns true if P is an unsigned integer comparison predicate.
bool isPredicate() const
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1020
void setReg(Register Reg)
Change the register this operand corresponds to.
#define EQ(a, b)
Definition: regexec.c:112
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV)...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:665
const TargetRegisterInfo * getTargetRegisterInfo() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
unsigned const MachineRegisterInfo * MRI
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:41
const GlobalValue * getGlobal() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:765
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1145
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:566
bool isValid() const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
self_iterator getIterator()
Definition: ilist_node.h:81
Optional< ValueAndVReg > getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool HandleFConstants=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_F/CONSTANT (LookThro...
Definition: Utils.cpp:218
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineInstrBuilder & addFrameIndex(int Idx) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isCopy() const
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:319
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
#define GET_GLOBALISEL_TEMPORARIES_INIT
const APFloat & getValueAPF() const
Definition: Constants.h:302
static Optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:551
RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder MachineInstrBuilder & DefMI
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:111
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function that verifies that we have a valid copy at the end of selectCopy. ...
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:207
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides the logic to select generic machine instructions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class provides the information for the target register banks.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
ConstantMatch m_ICst(int64_t &Cst)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:305
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
int64_t getOffset() const
Return the offset from the symbol in this operand.
const BlockAddress * getBlockAddress() const
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned findIntrinsicID(MachineInstr &I)
Helper function to find an intrinsic ID on an a MachineInstr.
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...