LLVM  14.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
16 #include "AArch64Subtarget.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/IntrinsicsAArch64.h"
29 #include "llvm/IR/Type.h"
31 #include <initializer_list>
32 
33 #define DEBUG_TYPE "aarch64-legalinfo"
34 
35 using namespace llvm;
36 using namespace LegalizeActions;
37 using namespace LegalizeMutations;
38 using namespace LegalityPredicates;
39 using namespace MIPatternMatch;
40 
42  : ST(&ST) {
43  using namespace TargetOpcode;
44  const LLT p0 = LLT::pointer(0, 64);
45  const LLT s1 = LLT::scalar(1);
46  const LLT s8 = LLT::scalar(8);
47  const LLT s16 = LLT::scalar(16);
48  const LLT s32 = LLT::scalar(32);
49  const LLT s64 = LLT::scalar(64);
50  const LLT s128 = LLT::scalar(128);
51  const LLT v16s8 = LLT::fixed_vector(16, 8);
52  const LLT v8s8 = LLT::fixed_vector(8, 8);
53  const LLT v4s8 = LLT::fixed_vector(4, 8);
54  const LLT v8s16 = LLT::fixed_vector(8, 16);
55  const LLT v4s16 = LLT::fixed_vector(4, 16);
56  const LLT v2s16 = LLT::fixed_vector(2, 16);
57  const LLT v2s32 = LLT::fixed_vector(2, 32);
58  const LLT v4s32 = LLT::fixed_vector(4, 32);
59  const LLT v2s64 = LLT::fixed_vector(2, 64);
60  const LLT v2p0 = LLT::fixed_vector(2, p0);
61 
62  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
63  v16s8, v8s16, v4s32,
64  v2s64, v2p0,
65  /* End 128bit types */
66  /* Begin 64bit types */
67  v8s8, v4s16, v2s32};
68 
69  const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
70 
71  // FIXME: support subtargets which have neon/fp-armv8 disabled.
72  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
74  return;
75  }
76 
77  // Some instructions only support s16 if the subtarget has full 16-bit FP
78  // support.
79  const bool HasFP16 = ST.hasFullFP16();
80  const LLT &MinFPScalar = HasFP16 ? s16 : s32;
81 
82  getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
83  .legalFor({p0, s1, s8, s16, s32, s64})
84  .legalFor(PackedVectorAllTypeList)
86  .clampScalar(0, s8, s64)
88  [=](const LegalityQuery &Query) {
89  return Query.Types[0].isVector() &&
90  (Query.Types[0].getElementType() != s64 ||
91  Query.Types[0].getNumElements() != 2);
92  },
93  [=](const LegalityQuery &Query) {
94  LLT EltTy = Query.Types[0].getElementType();
95  if (EltTy == s64)
96  return std::make_pair(0, LLT::fixed_vector(2, 64));
97  return std::make_pair(0, EltTy);
98  });
99 
101  .legalFor({p0, s16, s32, s64})
102  .legalFor(PackedVectorAllTypeList)
104  .clampScalar(0, s16, s64)
105  // Maximum: sN * k = 128
106  .clampMaxNumElements(0, s8, 16)
107  .clampMaxNumElements(0, s16, 8)
108  .clampMaxNumElements(0, s32, 4)
109  .clampMaxNumElements(0, s64, 2)
110  .clampMaxNumElements(0, p0, 2);
111 
113  .legalFor({s32, s64, v4s32, v2s32, v2s64})
114  .widenScalarToNextPow2(0)
115  .clampScalar(0, s32, s64);
116 
117  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
118  .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
119  .scalarizeIf(
120  [=](const LegalityQuery &Query) {
121  return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
122  },
123  0)
124  .legalFor({v2s64})
125  .widenScalarToNextPow2(0)
126  .clampScalar(0, s32, s64)
127  .clampNumElements(0, v2s32, v4s32)
128  .clampNumElements(0, v2s64, v2s64)
130 
131  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
132  .customIf([=](const LegalityQuery &Query) {
133  const auto &SrcTy = Query.Types[0];
134  const auto &AmtTy = Query.Types[1];
135  return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
136  AmtTy.getSizeInBits() == 32;
137  })
138  .legalFor({
139  {s32, s32},
140  {s32, s64},
141  {s64, s64},
142  {v8s8, v8s8},
143  {v16s8, v16s8},
144  {v4s16, v4s16},
145  {v8s16, v8s16},
146  {v2s32, v2s32},
147  {v4s32, v4s32},
148  {v2s64, v2s64},
149  })
150  .widenScalarToNextPow2(0)
151  .clampScalar(1, s32, s64)
152  .clampScalar(0, s32, s64)
153  .clampNumElements(0, v2s32, v4s32)
154  .clampNumElements(0, v2s64, v2s64)
156  .minScalarSameAs(1, 0);
157 
158  getActionDefinitionsBuilder(G_PTR_ADD)
159  .legalFor({{p0, s64}, {v2p0, v2s64}})
160  .clampScalar(1, s64, s64);
161 
162  getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
163 
164  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
165  .legalFor({s32, s64})
166  .libcallFor({s128})
167  .clampScalar(0, s32, s64)
169  .scalarize(0);
170 
171  getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
172  .lowerFor({s1, s8, s16, s32, s64, v2s64, v4s32, v2s32})
174  .clampScalarOrElt(0, s32, s64)
175  .clampNumElements(0, v2s32, v4s32)
176  .clampNumElements(0, v2s64, v2s64)
177  .moreElementsToNextPow2(0);
178 
179 
180  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
181  .widenScalarToNextPow2(0, /*Min = */ 32)
182  .clampScalar(0, s32, s64)
183  .lowerIf(typeIs(1, s1));
184 
185  getActionDefinitionsBuilder({G_SMULH, G_UMULH})
186  .legalFor({s64, v8s16, v16s8, v4s32})
187  .lower();
188 
189  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
190  .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
191  .clampNumElements(0, v8s8, v16s8)
192  .clampNumElements(0, v4s16, v8s16)
193  .clampNumElements(0, v2s32, v4s32)
194  // FIXME: This sholdn't be needed as v2s64 types are going to
195  // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
196  .clampNumElements(0, v2s64, v2s64)
197  .lower();
198 
200  {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
201  .legalFor({{s32, s1}, {s64, s1}})
202  .clampScalar(0, s32, s64)
204 
205  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
206  .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
207  .clampScalar(0, MinFPScalar, s64)
208  .clampNumElements(0, v2s32, v4s32)
209  .clampNumElements(0, v2s64, v2s64);
210 
211  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
212 
213  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
214  G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
215  G_FNEARBYINT, G_INTRINSIC_LRINT})
216  // If we don't have full FP16 support, then scalarize the elements of
217  // vectors containing fp16 types.
218  .fewerElementsIf(
219  [=, &ST](const LegalityQuery &Query) {
220  const auto &Ty = Query.Types[0];
221  return Ty.isVector() && Ty.getElementType() == s16 &&
222  !ST.hasFullFP16();
223  },
224  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
225  // If we don't have full FP16 support, then widen s16 to s32 if we
226  // encounter it.
227  .widenScalarIf(
228  [=, &ST](const LegalityQuery &Query) {
229  return Query.Types[0] == s16 && !ST.hasFullFP16();
230  },
231  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
232  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
233 
235  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
236  // We need a call for these, so we always need to scalarize.
237  .scalarize(0)
238  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
239  .minScalar(0, s32)
240  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
241 
243  .legalIf(all(typeInSet(0, {s32, s64, p0}),
244  typeInSet(1, {s1, s8, s16, s32}), smallerThan(1, 0)))
246  .clampScalar(0, s32, s64)
248  .minScalar(1, s8)
249  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
250  .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
251 
252  getActionDefinitionsBuilder(G_EXTRACT)
253  .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
254  typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
256  .clampScalar(1, s32, s128)
258  .minScalar(0, s16)
259  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
260  .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
261  .maxScalarIf(typeInSet(1, {s128}), 0, s64);
262 
263  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
265  .legalForTypesWithMemDesc({{s32, p0, s8, 8},
266  {s32, p0, s16, 8},
267  {s32, p0, s32, 8},
268  {s64, p0, s8, 2},
269  {s64, p0, s16, 2},
270  {s64, p0, s32, 4},
271  {s64, p0, s64, 8},
272  {p0, p0, s64, 8},
273  {v2s32, p0, s64, 8}})
274  .widenScalarToNextPow2(0)
275  .clampScalar(0, s32, s64)
276  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
277  // how to do that yet.
279  // Lower anything left over into G_*EXT and G_LOAD
280  .lower();
281 
282  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
283  const LLT &ValTy = Query.Types[0];
284  if (!ValTy.isVector())
285  return false;
286  const LLT EltTy = ValTy.getElementType();
287  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
288  };
289 
291  .customIf([=](const LegalityQuery &Query) {
292  return Query.Types[0] == s128 &&
293  Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
294  })
295  .legalForTypesWithMemDesc({{s8, p0, s8, 8},
296  {s16, p0, s16, 8},
297  {s32, p0, s32, 8},
298  {s64, p0, s64, 8},
299  {p0, p0, s64, 8},
300  {s128, p0, s128, 8},
301  {v8s8, p0, s64, 8},
302  {v16s8, p0, s128, 8},
303  {v4s16, p0, s64, 8},
304  {v8s16, p0, s128, 8},
305  {v2s32, p0, s64, 8},
306  {v4s32, p0, s128, 8},
307  {v2s64, p0, s128, 8}})
308  // These extends are also legal
309  .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
310  .widenScalarToNextPow2(0, /* MinSize = */8)
312  .clampScalar(0, s8, s64)
313  .narrowScalarIf([=](const LegalityQuery &Query) {
314  // Clamp extending load results to 32-bits.
315  return Query.Types[0].isScalar() &&
316  Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
317  Query.Types[0].getSizeInBits() > 32;
318  },
319  changeTo(0, s32))
320  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
321  .lowerIf([=](const LegalityQuery &Query) {
322  return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
323  })
324  .clampMaxNumElements(0, s8, 16)
325  .clampMaxNumElements(0, s16, 8)
326  .clampMaxNumElements(0, s32, 4)
327  .clampMaxNumElements(0, s64, 2)
328  .clampMaxNumElements(0, p0, 2)
329  .customIf(IsPtrVecPred)
330  .scalarizeIf(typeIs(0, v2s16), 0);
331 
333  .customIf([=](const LegalityQuery &Query) {
334  return Query.Types[0] == s128 &&
335  Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
336  })
337  .legalForTypesWithMemDesc({{s8, p0, s8, 8},
338  {s16, p0, s8, 8}, // truncstorei8 from s16
339  {s32, p0, s8, 8}, // truncstorei8 from s32
340  {s64, p0, s8, 8}, // truncstorei8 from s64
341  {s16, p0, s16, 8},
342  {s32, p0, s16, 8}, // truncstorei16 from s32
343  {s64, p0, s16, 8}, // truncstorei16 from s64
344  {s32, p0, s8, 8},
345  {s32, p0, s16, 8},
346  {s32, p0, s32, 8},
347  {s64, p0, s64, 8},
348  {s64, p0, s32, 8}, // truncstorei32 from s64
349  {p0, p0, s64, 8},
350  {s128, p0, s128, 8},
351  {v16s8, p0, s128, 8},
352  {v8s8, p0, s64, 8},
353  {v4s16, p0, s64, 8},
354  {v8s16, p0, s128, 8},
355  {v2s32, p0, s64, 8},
356  {v4s32, p0, s128, 8},
357  {v2s64, p0, s128, 8}})
358  .clampScalar(0, s8, s64)
359  .lowerIf([=](const LegalityQuery &Query) {
360  return Query.Types[0].isScalar() &&
361  Query.Types[0] != Query.MMODescrs[0].MemoryTy;
362  })
363  // Maximum: sN * k = 128
364  .clampMaxNumElements(0, s8, 16)
365  .clampMaxNumElements(0, s16, 8)
366  .clampMaxNumElements(0, s32, 4)
367  .clampMaxNumElements(0, s64, 2)
368  .clampMaxNumElements(0, p0, 2)
370  .customIf(IsPtrVecPred)
371  .scalarizeIf(typeIs(0, v2s16), 0);
372 
373  // Constants
374  getActionDefinitionsBuilder(G_CONSTANT)
375  .legalFor({p0, s8, s16, s32, s64})
376  .widenScalarToNextPow2(0)
377  .clampScalar(0, s8, s64);
378  getActionDefinitionsBuilder(G_FCONSTANT)
379  .legalIf([=](const LegalityQuery &Query) {
380  const auto &Ty = Query.Types[0];
381  if (HasFP16 && Ty == s16)
382  return true;
383  return Ty == s32 || Ty == s64 || Ty == s128;
384  })
385  .clampScalar(0, MinFPScalar, s128);
386 
387  getActionDefinitionsBuilder({G_ICMP, G_FCMP})
388  .legalFor({{s32, s32},
389  {s32, s64},
390  {s32, p0},
391  {v4s32, v4s32},
392  {v2s32, v2s32},
393  {v2s64, v2s64},
394  {v2s64, v2p0},
395  {v4s16, v4s16},
396  {v8s16, v8s16},
397  {v8s8, v8s8},
398  {v16s8, v16s8}})
400  .clampScalar(1, s32, s64)
401  .clampScalar(0, s32, s32)
402  .minScalarEltSameAsIf(
403  [=](const LegalityQuery &Query) {
404  const LLT &Ty = Query.Types[0];
405  const LLT &SrcTy = Query.Types[1];
406  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
407  Ty.getElementType() != SrcTy.getElementType();
408  },
409  0, 1)
410  .minScalarOrEltIf(
411  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
412  1, s32)
413  .minScalarOrEltIf(
414  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
415  s64)
416  .clampNumElements(0, v2s32, v4s32);
417 
418  // Extensions
419  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
420  unsigned DstSize = Query.Types[0].getSizeInBits();
421 
422  if (DstSize == 128 && !Query.Types[0].isVector())
423  return false; // Extending to a scalar s128 needs narrowing.
424 
425  // Make sure that we have something that will fit in a register, and
426  // make sure it's a power of 2.
427  if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
428  return false;
429 
430  const LLT &SrcTy = Query.Types[1];
431 
432  // Special case for s1.
433  if (SrcTy == s1)
434  return true;
435 
436  // Make sure we fit in a register otherwise. Don't bother checking that
437  // the source type is below 128 bits. We shouldn't be allowing anything
438  // through which is wider than the destination in the first place.
439  unsigned SrcSize = SrcTy.getSizeInBits();
440  if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
441  return false;
442 
443  return true;
444  };
445  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
446  .legalIf(ExtLegalFunc)
447  .clampScalar(0, s64, s64); // Just for s128, others are handled above.
448 
451  [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
452  0, s8)
453  .customIf([=](const LegalityQuery &Query) {
454  LLT DstTy = Query.Types[0];
455  LLT SrcTy = Query.Types[1];
456  return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
457  })
458  .alwaysLegal();
459 
460  getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
461 
462  // FP conversions
463  getActionDefinitionsBuilder(G_FPTRUNC)
464  .legalFor(
465  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
466  .clampMaxNumElements(0, s32, 2);
468  .legalFor(
469  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
470  .clampMaxNumElements(0, s64, 2);
471 
472  // Conversions
473  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
474  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
475  .widenScalarToNextPow2(0)
476  .clampScalar(0, s32, s64)
478  .clampScalar(1, s32, s64);
479 
480  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
481  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
482  .clampScalar(1, s32, s64)
483  .minScalarSameAs(1, 0)
484  .clampScalar(0, s32, s64)
486 
487  // Control-flow
488  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
489  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
490 
492  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
493  .widenScalarToNextPow2(0)
494  .clampScalar(0, s32, s64)
495  .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
496  .lowerIf(isVector(0));
497 
498  // Pointer-handling
499  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
500 
501  if (TM.getCodeModel() == CodeModel::Small)
502  getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
503  else
504  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
505 
506  getActionDefinitionsBuilder(G_PTRTOINT)
507  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
508  .legalFor({{v2s64, v2p0}})
509  .maxScalar(0, s64)
510  .widenScalarToNextPow2(0, /*Min*/ 8);
511 
512  getActionDefinitionsBuilder(G_INTTOPTR)
513  .unsupportedIf([&](const LegalityQuery &Query) {
514  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
515  })
516  .legalFor({{p0, s64}, {v2p0, v2s64}});
517 
518  // Casts for 32 and 64-bit width type are just copies.
519  // Same for 128-bit width type, except they are on the FPR bank.
520  getActionDefinitionsBuilder(G_BITCAST)
521  // FIXME: This is wrong since G_BITCAST is not allowed to change the
522  // number of bits but it's what the previous code described and fixing
523  // it breaks tests.
524  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
525  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
526  v2p0});
527 
528  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
529 
530  // va_list must be a pointer, but most sized types are pretty easy to handle
531  // as the destination.
533  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
534  .clampScalar(0, s8, s64)
535  .widenScalarToNextPow2(0, /*Min*/ 8);
536 
537  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
538  .lowerIf(
539  all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0)));
540 
541  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
542  .customIf([](const LegalityQuery &Query) {
543  return Query.Types[0].getSizeInBits() == 128;
544  })
545  .clampScalar(0, s32, s64)
546  .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
547 
549  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
550  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
551  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
552  .clampScalar(0, s32, s64)
553  .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
554 
555  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
556 
557  // Merge/Unmerge
558  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
559  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
560  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
562  .widenScalarToNextPow2(LitTyIdx, 8)
563  .widenScalarToNextPow2(BigTyIdx, 32)
564  .clampScalar(LitTyIdx, s8, s64)
565  .clampScalar(BigTyIdx, s32, s128)
566  .legalIf([=](const LegalityQuery &Q) {
567  switch (Q.Types[BigTyIdx].getSizeInBits()) {
568  case 32:
569  case 64:
570  case 128:
571  break;
572  default:
573  return false;
574  }
575  switch (Q.Types[LitTyIdx].getSizeInBits()) {
576  case 8:
577  case 16:
578  case 32:
579  case 64:
580  return true;
581  default:
582  return false;
583  }
584  });
585  }
586 
587  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
588  .unsupportedIf([=](const LegalityQuery &Query) {
589  const LLT &EltTy = Query.Types[1].getElementType();
590  return Query.Types[0] != EltTy;
591  })
592  .minScalar(2, s64)
593  .legalIf([=](const LegalityQuery &Query) {
594  const LLT &VecTy = Query.Types[1];
595  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
596  VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
597  VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
598  VecTy == v2p0;
599  })
600  .minScalarOrEltIf(
601  [=](const LegalityQuery &Query) {
602  // We want to promote to <M x s1> to <M x s64> if that wouldn't
603  // cause the total vec size to be > 128b.
604  return Query.Types[1].getNumElements() <= 2;
605  },
606  0, s64)
607  .minScalarOrEltIf(
608  [=](const LegalityQuery &Query) {
609  return Query.Types[1].getNumElements() <= 4;
610  },
611  0, s32)
612  .minScalarOrEltIf(
613  [=](const LegalityQuery &Query) {
614  return Query.Types[1].getNumElements() <= 8;
615  },
616  0, s16)
617  .minScalarOrEltIf(
618  [=](const LegalityQuery &Query) {
619  return Query.Types[1].getNumElements() <= 16;
620  },
621  0, s8)
622  .minScalarOrElt(0, s8) // Worst case, we need at least s8.
623  .clampMaxNumElements(1, s64, 2)
624  .clampMaxNumElements(1, s32, 4)
625  .clampMaxNumElements(1, s16, 8)
626  .clampMaxNumElements(1, p0, 2);
627 
628  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
629  .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
630 
631  getActionDefinitionsBuilder(G_BUILD_VECTOR)
632  .legalFor({{v8s8, s8},
633  {v16s8, s8},
634  {v2s16, s16},
635  {v4s16, s16},
636  {v8s16, s16},
637  {v2s32, s32},
638  {v4s32, s32},
639  {v2p0, p0},
640  {v2s64, s64}})
641  .clampNumElements(0, v4s32, v4s32)
642  .clampNumElements(0, v2s64, v2s64)
643  .minScalarOrElt(0, s8)
644  .minScalarSameAs(1, 0);
645 
646  getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
647 
650  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
651  .scalarize(1);
652  getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
653 
654  // TODO: Custom lowering for v2s32, v4s32, v2s64.
655  getActionDefinitionsBuilder(G_BITREVERSE)
656  .legalFor({s32, s64, v8s8, v16s8})
657  .widenScalarToNextPow2(0, /*Min = */ 32)
658  .clampScalar(0, s32, s64);
659 
660  getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
661 
662  // TODO: Handle vector types.
664  .clampScalar(0, s32, s64)
665  .scalarSameSizeAs(1, 0)
666  .customFor({s32, s64});
667 
668  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
669  .legalIf([=](const LegalityQuery &Query) {
670  const LLT &DstTy = Query.Types[0];
671  const LLT &SrcTy = Query.Types[1];
672  // For now just support the TBL2 variant which needs the source vectors
673  // to be the same size as the dest.
674  if (DstTy != SrcTy)
675  return false;
676  for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
677  if (DstTy == Ty)
678  return true;
679  }
680  return false;
681  })
682  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
683  // just want those lowered into G_BUILD_VECTOR
684  .lowerIf([=](const LegalityQuery &Query) {
685  return !Query.Types[1].isVector();
686  })
688  .clampNumElements(0, v4s32, v4s32)
689  .clampNumElements(0, v2s64, v2s64);
690 
691  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
692  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
693 
694  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
695 
696  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
697  return Query.Types[0] == p0 && Query.Types[1] == s64;
698  });
699 
700  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
701 
702  getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
703  .libcall();
704 
705  // FIXME: Legal types are only legal with NEON.
707  .lowerIf(isScalar(0))
708  .legalFor(PackedVectorAllTypeList);
709 
710  getActionDefinitionsBuilder(G_VECREDUCE_FADD)
711  // We only have FADDP to do reduction-like operations. Lower the rest.
712  .legalFor({{s32, v2s32}, {s64, v2s64}})
713  .clampMaxNumElements(1, s64, 2)
714  .clampMaxNumElements(1, s32, 2)
715  .lower();
716 
717  getActionDefinitionsBuilder(G_VECREDUCE_ADD)
718  .legalFor(
719  {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
720  .clampMaxNumElements(1, s64, 2)
721  .clampMaxNumElements(1, s32, 4)
722  .lower();
723 
725  {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
726  // Try to break down into smaller vectors as long as they're at least 64
727  // bits. This lets us use vector operations for some parts of the
728  // reduction.
729  .fewerElementsIf(
730  [=](const LegalityQuery &Q) {
731  LLT SrcTy = Q.Types[1];
732  if (SrcTy.isScalar())
733  return false;
734  if (!isPowerOf2_32(SrcTy.getNumElements()))
735  return false;
736  // We can usually perform 64b vector operations.
737  return SrcTy.getSizeInBits() > 64;
738  },
739  [=](const LegalityQuery &Q) {
740  LLT SrcTy = Q.Types[1];
741  return std::make_pair(1, SrcTy.divide(2));
742  })
743  .scalarize(1)
744  .lower();
745 
746  getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
747  .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
748 
749  getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
750 
752  .legalFor({{s32, s64}, {s64, s64}})
753  .customIf([=](const LegalityQuery &Q) {
754  return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
755  })
756  .lower();
758 
759  getActionDefinitionsBuilder({G_SBFX, G_UBFX})
760  .customFor({{s32, s32}, {s64, s64}});
761 
762  // TODO: Use generic lowering when custom lowering is not possible.
763  auto always = [=](const LegalityQuery &Q) { return true; };
765  .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
766  .clampScalar(0, s32, s128)
770  .customFor({{s32, s32},
771  {s64, s64},
772  {s128, s128},
773  {v2s64, v2s64},
774  {v2s32, v2s32},
775  {v4s32, v4s32},
776  {v4s16, v4s16},
777  {v8s16, v8s16}});
778 
779  // TODO: Vector types.
780  getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
781 
782  // TODO: Vector types.
783  getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
784  .legalFor({MinFPScalar, s32, s64})
785  .libcallFor({s128})
786  .minScalar(0, MinFPScalar);
787 
788  // TODO: Libcall support for s128.
789  // TODO: s16 should be legal with full FP16 support.
790  getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
791  .legalFor({{s64, s32}, {s64, s64}});
792 
794  verify(*ST.getInstrInfo());
795 }
796 
798  MachineInstr &MI) const {
799  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
800  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
801  GISelChangeObserver &Observer = Helper.Observer;
802  switch (MI.getOpcode()) {
803  default:
804  // No idea what to do.
805  return false;
806  case TargetOpcode::G_VAARG:
807  return legalizeVaArg(MI, MRI, MIRBuilder);
808  case TargetOpcode::G_LOAD:
809  case TargetOpcode::G_STORE:
810  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
811  case TargetOpcode::G_SHL:
812  case TargetOpcode::G_ASHR:
813  case TargetOpcode::G_LSHR:
814  return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
815  case TargetOpcode::G_GLOBAL_VALUE:
816  return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
817  case TargetOpcode::G_TRUNC:
818  return legalizeVectorTrunc(MI, Helper);
819  case TargetOpcode::G_SBFX:
820  case TargetOpcode::G_UBFX:
821  return legalizeBitfieldExtract(MI, MRI, Helper);
822  case TargetOpcode::G_ROTR:
823  return legalizeRotate(MI, MRI, Helper);
824  case TargetOpcode::G_CTPOP:
825  return legalizeCTPOP(MI, MRI, Helper);
826  case TargetOpcode::G_ATOMIC_CMPXCHG:
827  return legalizeAtomicCmpxchg128(MI, MRI, Helper);
828  case TargetOpcode::G_CTTZ:
829  return legalizeCTTZ(MI, Helper);
830  }
831 
832  llvm_unreachable("expected switch to return");
833 }
834 
835 bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
837  LegalizerHelper &Helper) const {
838  // To allow for imported patterns to match, we ensure that the rotate amount
839  // is 64b with an extension.
840  Register AmtReg = MI.getOperand(2).getReg();
841  LLT AmtTy = MRI.getType(AmtReg);
842  (void)AmtTy;
843  assert(AmtTy.isScalar() && "Expected a scalar rotate");
844  assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
845  auto NewAmt = Helper.MIRBuilder.buildSExt(LLT::scalar(64), AmtReg);
846  Helper.Observer.changingInstr(MI);
847  MI.getOperand(2).setReg(NewAmt.getReg(0));
848  Helper.Observer.changedInstr(MI);
849  return true;
850 }
851 
853  MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
854  SmallVectorImpl<Register> &VRegs) {
855  for (int I = 0; I < NumParts; ++I)
856  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
857  MIRBuilder.buildUnmerge(VRegs, Reg);
858 }
859 
860 bool AArch64LegalizerInfo::legalizeVectorTrunc(
861  MachineInstr &MI, LegalizerHelper &Helper) const {
862  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
863  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
864  // Similar to how operand splitting is done in SelectiondDAG, we can handle
865  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
866  // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
867  // %lo16(<4 x s16>) = G_TRUNC %inlo
868  // %hi16(<4 x s16>) = G_TRUNC %inhi
869  // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
870  // %res(<8 x s8>) = G_TRUNC %in16
871 
872  Register DstReg = MI.getOperand(0).getReg();
873  Register SrcReg = MI.getOperand(1).getReg();
874  LLT DstTy = MRI.getType(DstReg);
875  LLT SrcTy = MRI.getType(SrcReg);
877  isPowerOf2_32(SrcTy.getSizeInBits()));
878 
879  // Split input type.
880  LLT SplitSrcTy =
882  // First, split the source into two smaller vectors.
883  SmallVector<Register, 2> SplitSrcs;
884  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
885 
886  // Truncate the splits into intermediate narrower elements.
887  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
888  for (unsigned I = 0; I < SplitSrcs.size(); ++I)
889  SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
890 
891  auto Concat = MIRBuilder.buildConcatVectors(
892  DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
893 
894  Helper.Observer.changingInstr(MI);
895  MI.getOperand(1).setReg(Concat.getReg(0));
896  Helper.Observer.changedInstr(MI);
897  return true;
898 }
899 
900 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
902  GISelChangeObserver &Observer) const {
903  assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
904  // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
905  // G_ADD_LOW instructions.
906  // By splitting this here, we can optimize accesses in the small code model by
907  // folding in the G_ADD_LOW into the load/store offset.
908  auto &GlobalOp = MI.getOperand(1);
909  const auto* GV = GlobalOp.getGlobal();
910  if (GV->isThreadLocal())
911  return true; // Don't want to modify TLS vars.
912 
913  auto &TM = ST->getTargetLowering()->getTargetMachine();
914  unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
915 
916  if (OpFlags & AArch64II::MO_GOT)
917  return true;
918 
919  auto Offset = GlobalOp.getOffset();
920  Register DstReg = MI.getOperand(0).getReg();
921  auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
922  .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
923  // Set the regclass on the dest reg too.
924  MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
925 
926  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
927  // by creating a MOVK that sets bits 48-63 of the register to (global address
928  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
929  // prevent an incorrect tag being generated during relocation when the the
930  // global appears before the code section. Without the offset, a global at
931  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
932  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
933  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
934  // instead of `0xf`.
935  // This assumes that we're in the small code model so we can assume a binary
936  // size of <= 4GB, which makes the untagged PC relative offset positive. The
937  // binary must also be loaded into address range [0, 2^48). Both of these
938  // properties need to be ensured at runtime when using tagged addresses.
939  if (OpFlags & AArch64II::MO_TAGGED) {
940  assert(!Offset &&
941  "Should not have folded in an offset for a tagged global!");
942  ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
943  .addGlobalAddress(GV, 0x100000000,
945  .addImm(48);
946  MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
947  }
948 
949  MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
950  .addGlobalAddress(GV, Offset,
952  MI.eraseFromParent();
953  return true;
954 }
955 
957  MachineInstr &MI) const {
958  switch (MI.getIntrinsicID()) {
959  case Intrinsic::vacopy: {
960  unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
961  unsigned VaListSize =
962  (ST->isTargetDarwin() || ST->isTargetWindows())
963  ? PtrSize
964  : ST->isTargetILP32() ? 20 : 32;
965 
966  MachineFunction &MF = *MI.getMF();
968  LLT::scalar(VaListSize * 8));
969  MachineIRBuilder MIB(MI);
970  MIB.buildLoad(Val, MI.getOperand(2),
973  VaListSize, Align(PtrSize)));
974  MIB.buildStore(Val, MI.getOperand(1),
977  VaListSize, Align(PtrSize)));
978  MI.eraseFromParent();
979  return true;
980  }
981  case Intrinsic::get_dynamic_area_offset: {
982  MachineIRBuilder &MIB = Helper.MIRBuilder;
983  MIB.buildConstant(MI.getOperand(0).getReg(), 0);
984  MI.eraseFromParent();
985  return true;
986  }
987  }
988 
989  return true;
990 }
991 
992 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
994  GISelChangeObserver &Observer) const {
995  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
996  MI.getOpcode() == TargetOpcode::G_LSHR ||
997  MI.getOpcode() == TargetOpcode::G_SHL);
998  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
999  // imported patterns can select it later. Either way, it will be legal.
1000  Register AmtReg = MI.getOperand(2).getReg();
1001  auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1002  if (!VRegAndVal)
1003  return true;
1004  // Check the shift amount is in range for an immediate form.
1005  int64_t Amount = VRegAndVal->Value.getSExtValue();
1006  if (Amount > 31)
1007  return true; // This will have to remain a register variant.
1008  auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1009  Observer.changingInstr(MI);
1010  MI.getOperand(2).setReg(ExtCst.getReg(0));
1011  Observer.changedInstr(MI);
1012  return true;
1013 }
1014 
1017  Base = Root;
1018  Offset = 0;
1019 
1020  Register NewBase;
1021  int64_t NewOffset;
1022  if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1023  isShiftedInt<7, 3>(NewOffset)) {
1024  Base = NewBase;
1025  Offset = NewOffset;
1026  }
1027 }
1028 
1029 // FIXME: This should be removed and replaced with the generic bitcast legalize
1030 // action.
1031 bool AArch64LegalizerInfo::legalizeLoadStore(
1033  GISelChangeObserver &Observer) const {
1034  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1035  MI.getOpcode() == TargetOpcode::G_LOAD);
1036  // Here we just try to handle vector loads/stores where our value type might
1037  // have pointer elements, which the SelectionDAG importer can't handle. To
1038  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1039  // the value to use s64 types.
1040 
1041  // Custom legalization requires the instruction, if not deleted, must be fully
1042  // legalized. In order to allow further legalization of the inst, we create
1043  // a new instruction and erase the existing one.
1044 
1045  Register ValReg = MI.getOperand(0).getReg();
1046  const LLT ValTy = MRI.getType(ValReg);
1047 
1048  if (ValTy == LLT::scalar(128)) {
1049  assert((*MI.memoperands_begin())->getSuccessOrdering() ==
1051  (*MI.memoperands_begin())->getSuccessOrdering() ==
1053  assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1054  LLT s64 = LLT::scalar(64);
1055  MachineInstrBuilder NewI;
1056  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
1057  NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {});
1058  MIRBuilder.buildMerge(ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1059  } else {
1060  auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1061  NewI = MIRBuilder.buildInstr(
1062  AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
1063  }
1064  Register Base;
1065  int Offset;
1066  matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1067  NewI.addUse(Base);
1068  NewI.addImm(Offset / 8);
1069 
1070  NewI.cloneMemRefs(MI);
1073  *ST->getRegBankInfo());
1074  MI.eraseFromParent();
1075  return true;
1076  }
1077 
1078  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1079  ValTy.getElementType().getAddressSpace() != 0) {
1080  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1081  return false;
1082  }
1083 
1084  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1085  const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1086  auto &MMO = **MI.memoperands_begin();
1087  MMO.setType(NewTy);
1088 
1089  if (MI.getOpcode() == TargetOpcode::G_STORE) {
1090  auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1091  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1092  } else {
1093  auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1094  MIRBuilder.buildBitcast(ValReg, NewLoad);
1095  }
1096  MI.eraseFromParent();
1097  return true;
1098 }
1099 
1100 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1102  MachineIRBuilder &MIRBuilder) const {
1103  MachineFunction &MF = MIRBuilder.getMF();
1104  Align Alignment(MI.getOperand(2).getImm());
1105  Register Dst = MI.getOperand(0).getReg();
1106  Register ListPtr = MI.getOperand(1).getReg();
1107 
1108  LLT PtrTy = MRI.getType(ListPtr);
1109  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1110 
1111  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1112  const Align PtrAlign = Align(PtrSize);
1113  auto List = MIRBuilder.buildLoad(
1114  PtrTy, ListPtr,
1116  PtrTy, PtrAlign));
1117 
1118  MachineInstrBuilder DstPtr;
1119  if (Alignment > PtrAlign) {
1120  // Realign the list to the actual required alignment.
1121  auto AlignMinus1 =
1122  MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1123  auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1124  DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1125  } else
1126  DstPtr = List;
1127 
1128  LLT ValTy = MRI.getType(Dst);
1129  uint64_t ValSize = ValTy.getSizeInBits() / 8;
1130  MIRBuilder.buildLoad(
1131  Dst, DstPtr,
1133  ValTy, std::max(Alignment, PtrAlign)));
1134 
1135  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1136 
1137  auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1138 
1139  MIRBuilder.buildStore(NewList, ListPtr,
1142  PtrTy, PtrAlign));
1143 
1144  MI.eraseFromParent();
1145  return true;
1146 }
1147 
1148 bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1150  // Only legal if we can select immediate forms.
1151  // TODO: Lower this otherwise.
1152  return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1153  getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1154 }
1155 
1156 bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1158  LegalizerHelper &Helper) const {
1159  // While there is no integer popcount instruction, it can
1160  // be more efficiently lowered to the following sequence that uses
1161  // AdvSIMD registers/instructions as long as the copies to/from
1162  // the AdvSIMD registers are cheap.
1163  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1164  // CNT V0.8B, V0.8B // 8xbyte pop-counts
1165  // ADDV B0, V0.8B // sum 8xbyte pop-counts
1166  // UMOV X0, V0.B[0] // copy byte result back to integer reg
1167  //
1168  // For 128 bit vector popcounts, we lower to the following sequence:
1169  // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1170  // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1171  // uaddlp.4s v0, v0 // v4s32, v2s64
1172  // uaddlp.2d v0, v0 // v2s64
1173  //
1174  // For 64 bit vector popcounts, we lower to the following sequence:
1175  // cnt.8b v0, v0 // v4s16, v2s32
1176  // uaddlp.4h v0, v0 // v4s16, v2s32
1177  // uaddlp.2s v0, v0 // v2s32
1178 
1179  if (!ST->hasNEON() ||
1180  MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat))
1181  return false;
1182  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1183  Register Dst = MI.getOperand(0).getReg();
1184  Register Val = MI.getOperand(1).getReg();
1185  LLT Ty = MRI.getType(Val);
1186 
1187  assert(Ty == MRI.getType(Dst) &&
1188  "Expected src and dst to have the same type!");
1189  unsigned Size = Ty.getSizeInBits();
1190 
1191  // Pre-conditioning: widen Val up to the nearest vector type.
1192  // s32,s64,v4s16,v2s32 -> v8i8
1193  // v8s16,v4s32,v2s64 -> v16i8
1194  LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1195  if (Ty.isScalar()) {
1196  assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1197  if (Size == 32) {
1198  Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1199  }
1200  }
1201  Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1202 
1203  // Count bits in each byte-sized lane.
1204  auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1205 
1206  // Sum across lanes.
1207  Register HSum = CTPOP.getReg(0);
1208  unsigned Opc;
1209  SmallVector<LLT> HAddTys;
1210  if (Ty.isScalar()) {
1211  Opc = Intrinsic::aarch64_neon_uaddlv;
1212  HAddTys.push_back(LLT::scalar(32));
1213  } else if (Ty == LLT::fixed_vector(8, 16)) {
1214  Opc = Intrinsic::aarch64_neon_uaddlp;
1215  HAddTys.push_back(LLT::fixed_vector(8, 16));
1216  } else if (Ty == LLT::fixed_vector(4, 32)) {
1217  Opc = Intrinsic::aarch64_neon_uaddlp;
1218  HAddTys.push_back(LLT::fixed_vector(8, 16));
1219  HAddTys.push_back(LLT::fixed_vector(4, 32));
1220  } else if (Ty == LLT::fixed_vector(2, 64)) {
1221  Opc = Intrinsic::aarch64_neon_uaddlp;
1222  HAddTys.push_back(LLT::fixed_vector(8, 16));
1223  HAddTys.push_back(LLT::fixed_vector(4, 32));
1224  HAddTys.push_back(LLT::fixed_vector(2, 64));
1225  } else if (Ty == LLT::fixed_vector(4, 16)) {
1226  Opc = Intrinsic::aarch64_neon_uaddlp;
1227  HAddTys.push_back(LLT::fixed_vector(4, 16));
1228  } else if (Ty == LLT::fixed_vector(2, 32)) {
1229  Opc = Intrinsic::aarch64_neon_uaddlp;
1230  HAddTys.push_back(LLT::fixed_vector(4, 16));
1231  HAddTys.push_back(LLT::fixed_vector(2, 32));
1232  } else
1233  llvm_unreachable("unexpected vector shape");
1234  MachineInstrBuilder UADD;
1235  for (LLT HTy : HAddTys) {
1236  UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
1237  .addUse(HSum);
1238  HSum = UADD.getReg(0);
1239  }
1240 
1241  // Post-conditioning.
1242  if (Ty.isScalar() && (Size == 64 || Size == 128))
1243  MIRBuilder.buildZExt(Dst, UADD);
1244  else
1245  UADD->getOperand(0).setReg(Dst);
1246  MI.eraseFromParent();
1247  return true;
1248 }
1249 
1250 bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1252  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1253  LLT s64 = LLT::scalar(64);
1254  auto Addr = MI.getOperand(1).getReg();
1255  auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1256  auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1257  auto DstLo = MRI.createGenericVirtualRegister(s64);
1258  auto DstHi = MRI.createGenericVirtualRegister(s64);
1259 
1260  MachineInstrBuilder CAS;
1261  if (ST->hasLSE()) {
1262  // We have 128-bit CASP instructions taking XSeqPair registers, which are
1263  // s128. We need the merge/unmerge to bracket the expansion and pair up with
1264  // the rest of the MIR so we must reassemble the extracted registers into a
1265  // 128-bit known-regclass one with code like this:
1266  //
1267  // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1268  // %out = CASP %in1, ...
1269  // %OldLo = G_EXTRACT %out, 0
1270  // %OldHi = G_EXTRACT %out, 64
1271  auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1272  unsigned Opcode;
1273  switch (Ordering) {
1275  Opcode = AArch64::CASPAX;
1276  break;
1278  Opcode = AArch64::CASPLX;
1279  break;
1282  Opcode = AArch64::CASPALX;
1283  break;
1284  default:
1285  Opcode = AArch64::CASPX;
1286  break;
1287  }
1288 
1289  LLT s128 = LLT::scalar(128);
1290  auto CASDst = MRI.createGenericVirtualRegister(s128);
1291  auto CASDesired = MRI.createGenericVirtualRegister(s128);
1292  auto CASNew = MRI.createGenericVirtualRegister(s128);
1293  MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1294  .addUse(DesiredI->getOperand(0).getReg())
1295  .addImm(AArch64::sube64)
1296  .addUse(DesiredI->getOperand(1).getReg())
1297  .addImm(AArch64::subo64);
1298  MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1299  .addUse(NewI->getOperand(0).getReg())
1300  .addImm(AArch64::sube64)
1301  .addUse(NewI->getOperand(1).getReg())
1302  .addImm(AArch64::subo64);
1303 
1304  CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1305 
1306  MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1307  MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1308  } else {
1309  // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1310  // can take arbitrary registers so it just has the normal GPR64 operands the
1311  // rest of AArch64 is expecting.
1312  auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1313  unsigned Opcode;
1314  switch (Ordering) {
1316  Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1317  break;
1319  Opcode = AArch64::CMP_SWAP_128_RELEASE;
1320  break;
1323  Opcode = AArch64::CMP_SWAP_128;
1324  break;
1325  default:
1326  Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1327  break;
1328  }
1329 
1330  auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1331  CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1332  {Addr, DesiredI->getOperand(0),
1333  DesiredI->getOperand(1), NewI->getOperand(0),
1334  NewI->getOperand(1)});
1335  }
1336 
1337  CAS.cloneMemRefs(MI);
1340  *ST->getRegBankInfo());
1341 
1342  MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi});
1343  MI.eraseFromParent();
1344  return true;
1345 }
1346 
1347 bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1348  LegalizerHelper &Helper) const {
1349  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1350  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1351  LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1352  auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1353  MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1354  MI.eraseFromParent();
1355  return true;
1356 }
AArch64LegalizerInfo.h
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
MIPatternMatch.h
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition: AArch64BaseInfo.h:686
ValueTypes.h
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:516
llvm::LegalizeRuleSet::widenScalarToNextPow2
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
Definition: LegalizerInfo.h:838
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
matchLDPSTPAddrMode
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
Definition: AArch64LegalizerInfo.cpp:1015
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::LLT::getScalarSizeInBits
unsigned getScalarSizeInBits() const
Definition: LowLevelTypeImpl.h:213
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
AArch64RegisterBankInfo.h
llvm::LegalizeRuleSet::unsupportedIfMemSizeNotPow2
LegalizeRuleSet & unsupportedIfMemSizeNotPow2()
Definition: LegalizerInfo.h:797
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:152
llvm::LegalizerInfo::getActionDefinitionsBuilder
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
Definition: LegalizerInfo.cpp:290
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::LegalizeRuleSet::minScalarEltSameAsIf
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
Definition: LegalizerInfo.h:993
llvm::LegalizeRuleSet::maxScalarIf
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
Definition: LegalizerInfo.h:935
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineIRBuilder::getMRI
MachineRegisterInfo * getMRI()
Getter for MRI.
Definition: MachineIRBuilder.h:280
llvm::LegalizeRuleSet::customFor
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
Definition: LegalizerInfo.h:812
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::LegalizeRuleSet::clampNumElements
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
Definition: LegalizerInfo.h:1078
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::LegacyLegalizerInfo::computeTables
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
Definition: LegacyLegalizerInfo.cpp:102
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:513
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::LegalizeRuleSet::minScalarOrEltIf
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
Definition: LegalizerInfo.h:895
llvm::AArch64II::MO_PREL
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
Definition: AArch64BaseInfo.h:737
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:324
llvm::LLT::changeElementCount
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelTypeImpl.h:190
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::LLT::vector
static LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:57
llvm::LegalizeRuleSet::scalarizeIf
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
Definition: LegalizerInfo.h:878
MachineIRBuilder.h
llvm::LegalizeRuleSet::minScalarOrElt
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
Definition: LegalizerInfo.h:886
llvm::LegalizeRuleSet::scalarize
LegalizeRuleSet & scalarize(unsigned TypeIdx)
Definition: LegalizerInfo.h:872
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::LegalizeRuleSet::lower
LegalizeRuleSet & lower()
The instruction is lowered.
Definition: LegalizerInfo.h:643
llvm::LegalizerHelper
Definition: LegalizerHelper.h:39
llvm::LegalizeMutations::changeTo
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
Definition: LegalizeMutations.cpp:17
LegalizerInfo.h
llvm::LegalityPredicates::atomicOrderingAtLeastOrStrongerThan
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Definition: LegalityPredicates.cpp:198
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:321
llvm::constrainSelectedInstRegOperands
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:136
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:255
llvm::LegalizeRuleSet::legalIf
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
Definition: LegalizerInfo.h:570
MachineRegisterInfo.h
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::LegalizeRuleSet::minScalar
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
Definition: LegalizerInfo.h:906
always
bar al al movzbl eax ret Missed when stored in a memory are stored as single byte objects the value of which is always(false) or 1(true). We are not using this fact
Definition: README.txt:1412
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LLT::fixed_vector
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:75
llvm::MachineIRBuilder::buildBitReverse
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
Definition: MachineIRBuilder.h:1872
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::LegalizeRuleSet::customIf
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
Definition: LegalizerInfo.h:806
llvm::LegalityPredicates::typeIs
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
Definition: LegalityPredicates.cpp:28
llvm::MachineIRBuilder::buildUnmerge
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
Definition: MachineIRBuilder.cpp:603
llvm::MachineIRBuilder::buildZExt
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
Definition: MachineIRBuilder.cpp:424
llvm::MachineIRBuilder::buildLoad
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
Definition: MachineIRBuilder.h:832
llvm::LegalityPredicates::smallerThan
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
Definition: LegalityPredicates.cpp:117
llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, true > m_GPtrAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:328
llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:524
Intrinsics.h
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:207
llvm::LLT::getSizeInBits
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelTypeImpl.h:153
llvm::AArch64LegalizerInfo::legalizeIntrinsic
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
Definition: AArch64LegalizerInfo.cpp:956
llvm::LegalizeRuleSet::lowerIfMemSizeNotPow2
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Definition: LegalizerInfo.h:801
Utils.h
llvm::MachineIRBuilder::buildCTLZ
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
Definition: MachineIRBuilder.h:1554
llvm::LegalizeRuleSet::fewerElementsIf
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
Definition: LegalizerInfo.h:776
TargetOpcodes.h
llvm::MachineIRBuilder::buildConcatVectors
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
Definition: MachineIRBuilder.cpp:689
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:239
llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:262
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AArch64LegalizerInfo::AArch64LegalizerInfo
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Definition: AArch64LegalizerInfo.cpp:41
llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
llvm::LegalizeRuleSet::clampMaxNumElements
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
Definition: LegalizerInfo.h:1054
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::LegalizeRuleSet::maxScalarEltSameAsIf
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
Definition: LegalizerInfo.h:1009
Type.h
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:213
llvm::AArch64LegalizerInfo::legalizeCustom
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
Definition: AArch64LegalizerInfo.cpp:797
llvm::MachineIRBuilder::buildCTPOP
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
Definition: MachineIRBuilder.h:1549
llvm::LLT::pointer
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelTypeImpl.h:50
llvm::LLT::getAddressSpace
unsigned getAddressSpace() const
Definition: LowLevelTypeImpl.h:227
llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
llvm::MachineIRBuilder::buildMaskLowPtrBits
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
Definition: MachineIRBuilder.cpp:208
llvm::LegalizeMutations::scalarize
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Definition: LegalizeMutations.cpp:87
llvm::LegalizeRuleSet::lowerIf
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
Definition: LegalizerInfo.h:652
llvm::LLT::divide
LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelTypeImpl.h:197
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:212
llvm::LegalizeRuleSet::legalFor
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
Definition: LegalizerInfo.h:577
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
uint64_t
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::MachineIRBuilder::buildBitcast
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
Definition: MachineIRBuilder.h:633
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:226
llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition: AArch64BaseInfo.h:718
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition: AArch64BaseInfo.h:682
llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition: MachineIRBuilder.cpp:182
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::LLT::isVector
bool isVector() const
Definition: LowLevelTypeImpl.h:123
llvm::LegalizeRuleSet::clampScalar
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
Definition: LegalizerInfo.h:951
llvm::LLT::getNumElements
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelTypeImpl.h:127
llvm::LegalizeRuleSet::legalForCartesianProduct
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
Definition: LegalizerInfo.h:609
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::LLT::isPointer
bool isPointer() const
Definition: LowLevelTypeImpl.h:121
llvm::LegalityPredicates::typeInSet
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Definition: LegalityPredicates.cpp:34
llvm::MachineRegisterInfo::createGenericVirtualRegister
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Definition: MachineRegisterInfo.cpp:188
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::AArch64Subtarget::hasNEON
bool hasNEON() const
Definition: AArch64Subtarget.h:382
llvm::LLT::isScalar
bool isScalar() const
Definition: LowLevelTypeImpl.h:119
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::LegalityQuery::Opcode
unsigned Opcode
Definition: LegalizerInfo.h:107
llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.h:367
llvm::LegalityQuery
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Definition: LegalizerInfo.h:106
llvm::LegalizeRuleSet::customForCartesianProduct
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
Definition: LegalizerInfo.h:822
llvm::getIConstantVRegValWithLookThrough
Optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:393
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::LegalizerHelper::Observer
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Definition: LegalizerHelper.h:46
llvm::MachineIRBuilder::buildExtract
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ...
Definition: MachineIRBuilder.cpp:516
llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29
llvm::LegalizeMutations::widenScalarOrEltToNextPow2
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
Definition: LegalizeMutations.cpp:56
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::LegalityPredicates::isScalar
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
Definition: LegalityPredicates.cpp:67
llvm::AtomicOrdering::Release
@ Release
llvm::LegalizerInfo::getLegacyLegalizerInfo
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
Definition: LegalizerInfo.h:1112
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:339
llvm::LegalizeRuleSet::narrowScalarIf
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
Definition: LegalizerInfo.h:750
llvm::MachineIRBuilder::buildTrunc
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Definition: MachineIRBuilder.cpp:737
llvm::AArch64Subtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: AArch64Subtarget.cpp:247
llvm::AArch64Subtarget::ClassifyGlobalReference
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
Definition: AArch64Subtarget.cpp:254
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::AArch64Subtarget::hasLSE2
bool hasLSE2() const
Definition: AArch64Subtarget.h:387
llvm::LegalityQuery::MMODescrs
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
Definition: LegalizerInfo.h:126
llvm::LegalizeRuleSet::unsupportedIf
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
Definition: LegalizerInfo.h:789
LegalizerHelper.h
llvm::AArch64ISD::ADRP
@ ADRP
Definition: AArch64ISelLowering.h:61
llvm::LegalizeRuleSet::minScalarSameAs
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
Definition: LegalizerInfo.h:964
llvm::AArch64Subtarget::hasLSE
bool hasLSE() const
Definition: AArch64Subtarget.h:386
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:137
llvm::MIPatternMatch::m_ICst
ConstantMatch m_ICst(int64_t &Cst)
Definition: MIPatternMatch.h:74
llvm::LegalizerInfo::verify
void verify(const MCInstrInfo &MII) const
Perform simple self-diagnostic and assert if there is anything obviously wrong with the actions set u...
Definition: LegalizerInfo.cpp:378
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
AArch64Subtarget.h
llvm::MachineIRBuilder::buildIntrinsic
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
Definition: MachineIRBuilder.cpp:713
s1
int s1
Definition: README.txt:182
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:732
llvm::LegalityQuery::Types
ArrayRef< LLT > Types
Definition: LegalizerInfo.h:108
llvm::LegalizeRuleSet::moreElementsToNextPow2
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
Definition: LegalizerInfo.h:1028
llvm::MachineIRBuilder::buildSExt
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Definition: MachineIRBuilder.cpp:419
List
const NodeList & List
Definition: RDFGraph.cpp:201
llvm::LLT::getElementCount
ElementCount getElementCount() const
Definition: LowLevelTypeImpl.h:144
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::LegalizerHelper::MIRBuilder
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
Definition: LegalizerHelper.h:43
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MachineIRBuilder::buildMerge
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
Definition: MachineIRBuilder.cpp:586
llvm::LegalizeRuleSet::custom
LegalizeRuleSet & custom()
Unconditionally custom lower.
Definition: LegalizerInfo.h:832
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::LegalizeMutations::moreElementsToNextPow2
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
Definition: LegalizeMutations.cpp:76
extractParts
static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs)
Definition: AArch64LegalizerInfo.cpp:852
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::LegalizeRuleSet::scalarSameSizeAs
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
Definition: LegalizerInfo.h:987
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:24
llvm::AArch64II::MO_TAGGED
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
Definition: AArch64BaseInfo.h:745
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:387
llvm::LLT::getElementType
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelTypeImpl.h:237
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
llvm::LegalizeRuleSet::libcallFor
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
Definition: LegalizerInfo.h:722
llvm::LinearPolySize::divideCoefficientBy
LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:361
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:43
llvm::LLT::changeElementSize
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelTypeImpl.h:181
llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition: AArch64BaseInfo.h:713
libcall
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj obj **nth_el If the i64 division is lowered to a libcall
Definition: README.txt:127
llvm::LegalizeRuleSet::legalForTypesWithMemDesc
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
Definition: LegalizerInfo.h:600
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition: AArch64BaseInfo.h:677
llvm::LLT
Definition: LowLevelTypeImpl.h:40