LLVM  10.0.0svn
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
25 
26 #define DEBUG_TYPE "aarch64-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
34  using namespace TargetOpcode;
35  const LLT p0 = LLT::pointer(0, 64);
36  const LLT s1 = LLT::scalar(1);
37  const LLT s8 = LLT::scalar(8);
38  const LLT s16 = LLT::scalar(16);
39  const LLT s32 = LLT::scalar(32);
40  const LLT s64 = LLT::scalar(64);
41  const LLT s128 = LLT::scalar(128);
42  const LLT s256 = LLT::scalar(256);
43  const LLT s512 = LLT::scalar(512);
44  const LLT v16s8 = LLT::vector(16, 8);
45  const LLT v8s8 = LLT::vector(8, 8);
46  const LLT v4s8 = LLT::vector(4, 8);
47  const LLT v8s16 = LLT::vector(8, 16);
48  const LLT v4s16 = LLT::vector(4, 16);
49  const LLT v2s16 = LLT::vector(2, 16);
50  const LLT v2s32 = LLT::vector(2, 32);
51  const LLT v4s32 = LLT::vector(4, 32);
52  const LLT v2s64 = LLT::vector(2, 64);
53  const LLT v2p0 = LLT::vector(2, p0);
54 
55  // FIXME: support subtargets which have neon/fp-armv8 disabled.
56  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57  computeTables();
58  return;
59  }
60 
61  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62  .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
63  .clampScalar(0, s1, s64)
64  .widenScalarToNextPow2(0, 8)
65  .fewerElementsIf(
66  [=](const LegalityQuery &Query) {
67  return Query.Types[0].isVector() &&
68  (Query.Types[0].getElementType() != s64 ||
69  Query.Types[0].getNumElements() != 2);
70  },
71  [=](const LegalityQuery &Query) {
72  LLT EltTy = Query.Types[0].getElementType();
73  if (EltTy == s64)
74  return std::make_pair(0, LLT::vector(2, 64));
75  return std::make_pair(0, EltTy);
76  });
77 
78  getActionDefinitionsBuilder(G_PHI)
79  .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80  .clampScalar(0, s16, s64)
81  .widenScalarToNextPow2(0);
82 
83  getActionDefinitionsBuilder(G_BSWAP)
84  .legalFor({s32, s64, v4s32, v2s32, v2s64})
85  .clampScalar(0, s32, s64)
86  .widenScalarToNextPow2(0);
87 
88  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89  .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90  .clampScalar(0, s32, s64)
91  .widenScalarToNextPow2(0)
92  .clampNumElements(0, v2s32, v4s32)
93  .clampNumElements(0, v2s64, v2s64)
94  .moreElementsToNextPow2(0);
95 
96  getActionDefinitionsBuilder(G_SHL)
97  .legalFor({{s32, s32}, {s64, s64},
98  {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99  .clampScalar(1, s32, s64)
100  .clampScalar(0, s32, s64)
101  .widenScalarToNextPow2(0)
102  .clampNumElements(0, v2s32, v4s32)
103  .clampNumElements(0, v2s64, v2s64)
104  .moreElementsToNextPow2(0)
105  .minScalarSameAs(1, 0);
106 
107  getActionDefinitionsBuilder(G_GEP)
108  .legalFor({{p0, s64}})
109  .clampScalar(1, s64, s64);
110 
111  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
112 
113  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114  .legalFor({s32, s64})
115  .libcallFor({s128})
116  .clampScalar(0, s32, s64)
117  .widenScalarToNextPow2(0)
118  .scalarize(0);
119 
120  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
121  .customIf([=](const LegalityQuery &Query) {
122  const auto &SrcTy = Query.Types[0];
123  const auto &AmtTy = Query.Types[1];
124  return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
125  AmtTy.getSizeInBits() == 32;
126  })
127  .legalFor({{s32, s32},
128  {s32, s64},
129  {s64, s64},
130  {v2s32, v2s32},
131  {v4s32, v4s32},
132  {v2s64, v2s64}})
133  .clampScalar(1, s32, s64)
134  .clampScalar(0, s32, s64)
135  .minScalarSameAs(1, 0);
136 
137  getActionDefinitionsBuilder({G_SREM, G_UREM})
138  .lowerFor({s1, s8, s16, s32, s64});
139 
140  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
141  .lowerFor({{s64, s1}});
142 
143  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
144 
145  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
146  .legalFor({{s32, s1}, {s64, s1}});
147 
148  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
149  .legalFor({s32, s64, v2s64, v4s32, v2s32});
150 
151  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
152 
153  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
154  G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
155  G_FNEARBYINT})
156  // If we don't have full FP16 support, then scalarize the elements of
157  // vectors containing fp16 types.
158  .fewerElementsIf(
159  [=, &ST](const LegalityQuery &Query) {
160  const auto &Ty = Query.Types[0];
161  return Ty.isVector() && Ty.getElementType() == s16 &&
162  !ST.hasFullFP16();
163  },
164  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
165  // If we don't have full FP16 support, then widen s16 to s32 if we
166  // encounter it.
167  .widenScalarIf(
168  [=, &ST](const LegalityQuery &Query) {
169  return Query.Types[0] == s16 && !ST.hasFullFP16();
170  },
171  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
172  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
173 
174  getActionDefinitionsBuilder(
175  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
176  // We need a call for these, so we always need to scalarize.
177  .scalarize(0)
178  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
179  .minScalar(0, s32)
180  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
181 
182  getActionDefinitionsBuilder(G_INSERT)
183  .unsupportedIf([=](const LegalityQuery &Query) {
184  return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
185  })
186  .legalIf([=](const LegalityQuery &Query) {
187  const LLT &Ty0 = Query.Types[0];
188  const LLT &Ty1 = Query.Types[1];
189  if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
190  return false;
191  return isPowerOf2_32(Ty1.getSizeInBits()) &&
192  (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
193  })
194  .clampScalar(0, s32, s64)
195  .widenScalarToNextPow2(0)
196  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
197  .maxScalarIf(typeInSet(0, {s64}), 1, s32)
198  .widenScalarToNextPow2(1);
199 
200  getActionDefinitionsBuilder(G_EXTRACT)
201  .unsupportedIf([=](const LegalityQuery &Query) {
202  return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
203  })
204  .legalIf([=](const LegalityQuery &Query) {
205  const LLT &Ty0 = Query.Types[0];
206  const LLT &Ty1 = Query.Types[1];
207  if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
208  return false;
209  if (Ty1 == p0)
210  return true;
211  return isPowerOf2_32(Ty0.getSizeInBits()) &&
212  (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
213  })
214  .clampScalar(1, s32, s128)
215  .widenScalarToNextPow2(1)
216  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
217  .maxScalarIf(typeInSet(1, {s64}), 0, s32)
218  .widenScalarToNextPow2(0);
219 
220  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
221  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
222  {s32, p0, 16, 8},
223  {s32, p0, 32, 8},
224  {s64, p0, 8, 2},
225  {s64, p0, 16, 2},
226  {s64, p0, 32, 4},
227  {s64, p0, 64, 8},
228  {p0, p0, 64, 8},
229  {v2s32, p0, 64, 8}})
230  .clampScalar(0, s32, s64)
231  .widenScalarToNextPow2(0)
232  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
233  // how to do that yet.
234  .unsupportedIfMemSizeNotPow2()
235  // Lower anything left over into G_*EXT and G_LOAD
236  .lower();
237 
238  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
239  const LLT &ValTy = Query.Types[0];
240  if (!ValTy.isVector())
241  return false;
242  const LLT EltTy = ValTy.getElementType();
243  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
244  };
245 
246  getActionDefinitionsBuilder(G_LOAD)
247  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
248  {s16, p0, 16, 8},
249  {s32, p0, 32, 8},
250  {s64, p0, 64, 8},
251  {p0, p0, 64, 8},
252  {s128, p0, 128, 8},
253  {v8s8, p0, 64, 8},
254  {v16s8, p0, 128, 8},
255  {v4s16, p0, 64, 8},
256  {v8s16, p0, 128, 8},
257  {v2s32, p0, 64, 8},
258  {v4s32, p0, 128, 8},
259  {v2s64, p0, 128, 8}})
260  // These extends are also legal
261  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
262  {s32, p0, 16, 8}})
263  .clampScalar(0, s8, s64)
264  .lowerIfMemSizeNotPow2()
265  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
266  .lowerIf([=](const LegalityQuery &Query) {
267  return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
268  })
269  .widenScalarToNextPow2(0)
270  .clampMaxNumElements(0, s32, 2)
271  .clampMaxNumElements(0, s64, 1)
272  .customIf(IsPtrVecPred);
273 
274  getActionDefinitionsBuilder(G_STORE)
275  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
276  {s16, p0, 16, 8},
277  {s32, p0, 8, 8},
278  {s32, p0, 16, 8},
279  {s32, p0, 32, 8},
280  {s64, p0, 64, 8},
281  {p0, p0, 64, 8},
282  {s128, p0, 128, 8},
283  {v16s8, p0, 128, 8},
284  {v4s16, p0, 64, 8},
285  {v8s16, p0, 128, 8},
286  {v2s32, p0, 64, 8},
287  {v4s32, p0, 128, 8},
288  {v2s64, p0, 128, 8}})
289  .clampScalar(0, s8, s64)
290  .lowerIfMemSizeNotPow2()
291  .lowerIf([=](const LegalityQuery &Query) {
292  return Query.Types[0].isScalar() &&
293  Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
294  })
295  .clampMaxNumElements(0, s32, 2)
296  .clampMaxNumElements(0, s64, 1)
297  .customIf(IsPtrVecPred);
298 
299  // Constants
300  getActionDefinitionsBuilder(G_CONSTANT)
301  .legalFor({p0, s8, s16, s32, s64})
302  .clampScalar(0, s8, s64)
303  .widenScalarToNextPow2(0);
304  getActionDefinitionsBuilder(G_FCONSTANT)
305  .legalFor({s32, s64})
306  .clampScalar(0, s32, s64);
307 
308  getActionDefinitionsBuilder(G_ICMP)
309  .legalFor({{s32, s32},
310  {s32, s64},
311  {s32, p0},
312  {v4s32, v4s32},
313  {v2s32, v2s32},
314  {v2s64, v2s64},
315  {v2s64, v2p0},
316  {v4s16, v4s16},
317  {v8s16, v8s16},
318  {v8s8, v8s8},
319  {v16s8, v16s8}})
320  .clampScalar(1, s32, s64)
321  .clampScalar(0, s32, s32)
322  .minScalarEltSameAsIf(
323  [=](const LegalityQuery &Query) {
324  const LLT &Ty = Query.Types[0];
325  const LLT &SrcTy = Query.Types[1];
326  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
327  Ty.getElementType() != SrcTy.getElementType();
328  },
329  0, 1)
330  .minScalarOrEltIf(
331  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
332  1, s32)
333  .minScalarOrEltIf(
334  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
335  s64)
337 
338  getActionDefinitionsBuilder(G_FCMP)
339  .legalFor({{s32, s32}, {s32, s64}})
340  .clampScalar(0, s32, s32)
341  .clampScalar(1, s32, s64)
342  .widenScalarToNextPow2(1);
343 
344  // Extensions
345  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
346  unsigned DstSize = Query.Types[0].getSizeInBits();
347 
348  if (DstSize == 128 && !Query.Types[0].isVector())
349  return false; // Extending to a scalar s128 needs narrowing.
350 
351  // Make sure that we have something that will fit in a register, and
352  // make sure it's a power of 2.
353  if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
354  return false;
355 
356  const LLT &SrcTy = Query.Types[1];
357 
358  // Special case for s1.
359  if (SrcTy == s1)
360  return true;
361 
362  // Make sure we fit in a register otherwise. Don't bother checking that
363  // the source type is below 128 bits. We shouldn't be allowing anything
364  // through which is wider than the destination in the first place.
365  unsigned SrcSize = SrcTy.getSizeInBits();
366  if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
367  return false;
368 
369  return true;
370  };
371  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
372  .legalIf(ExtLegalFunc)
373  .clampScalar(0, s64, s64); // Just for s128, others are handled above.
374 
375  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
376 
377  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
378 
379  // FP conversions
380  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
381  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
382  getActionDefinitionsBuilder(G_FPEXT).legalFor(
383  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
384 
385  // Conversions
386  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
387  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
388  .clampScalar(0, s32, s64)
389  .widenScalarToNextPow2(0)
390  .clampScalar(1, s32, s64)
391  .widenScalarToNextPow2(1);
392 
393  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
394  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
395  .clampScalar(1, s32, s64)
396  .widenScalarToNextPow2(1)
397  .clampScalar(0, s32, s64)
398  .widenScalarToNextPow2(0);
399 
400  // Control-flow
401  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
402  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
403 
404  // Select
405  // FIXME: We can probably do a bit better than just scalarizing vector
406  // selects.
407  getActionDefinitionsBuilder(G_SELECT)
408  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
409  .clampScalar(0, s32, s64)
410  .widenScalarToNextPow2(0)
411  .scalarize(0);
412 
413  // Pointer-handling
414  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
415  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
416 
417  getActionDefinitionsBuilder(G_PTRTOINT)
418  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
419  .maxScalar(0, s64)
420  .widenScalarToNextPow2(0, /*Min*/ 8);
421 
422  getActionDefinitionsBuilder(G_INTTOPTR)
423  .unsupportedIf([&](const LegalityQuery &Query) {
424  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
425  })
426  .legalFor({{p0, s64}});
427 
428  // Casts for 32 and 64-bit width type are just copies.
429  // Same for 128-bit width type, except they are on the FPR bank.
430  getActionDefinitionsBuilder(G_BITCAST)
431  // FIXME: This is wrong since G_BITCAST is not allowed to change the
432  // number of bits but it's what the previous code described and fixing
433  // it breaks tests.
434  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
435  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
436  v2p0});
437 
438  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
439 
440  // va_list must be a pointer, but most sized types are pretty easy to handle
441  // as the destination.
442  getActionDefinitionsBuilder(G_VAARG)
443  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
444  .clampScalar(0, s8, s64)
445  .widenScalarToNextPow2(0, /*Min*/ 8);
446 
447  if (ST.hasLSE()) {
448  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
449  .lowerIf(all(
450  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
452 
453  getActionDefinitionsBuilder(
454  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
455  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
456  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
457  .legalIf(all(
458  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
460  }
461 
462  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
463 
464  // Merge/Unmerge
465  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
466  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
467  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
468 
469  auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
470  const LLT &Ty = Query.Types[TypeIdx];
471  if (Ty.isVector()) {
472  const LLT &EltTy = Ty.getElementType();
473  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
474  return true;
475  if (!isPowerOf2_32(EltTy.getSizeInBits()))
476  return true;
477  }
478  return false;
479  };
480 
481  // FIXME: This rule is horrible, but specifies the same as what we had
482  // before with the particularly strange definitions removed (e.g.
483  // s8 = G_MERGE_VALUES s32, s32).
484  // Part of the complexity comes from these ops being extremely flexible. For
485  // example, you can build/decompose vectors with it, concatenate vectors,
486  // etc. and in addition to this you can also bitcast with it at the same
487  // time. We've been considering breaking it up into multiple ops to make it
488  // more manageable throughout the backend.
489  getActionDefinitionsBuilder(Op)
490  // Break up vectors with weird elements into scalars
491  .fewerElementsIf(
492  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
493  scalarize(0))
494  .fewerElementsIf(
495  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
496  scalarize(1))
497  // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
498  // or 384.
499  .clampScalar(BigTyIdx, s8, s512)
500  .widenScalarIf(
501  [=](const LegalityQuery &Query) {
502  const LLT &Ty = Query.Types[BigTyIdx];
503  return !isPowerOf2_32(Ty.getSizeInBits()) &&
504  Ty.getSizeInBits() % 64 != 0;
505  },
506  [=](const LegalityQuery &Query) {
507  // Pick the next power of 2, or a multiple of 64 over 128.
508  // Whichever is smaller.
509  const LLT &Ty = Query.Types[BigTyIdx];
510  unsigned NewSizeInBits = 1
511  << Log2_32_Ceil(Ty.getSizeInBits() + 1);
512  if (NewSizeInBits >= 256) {
513  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
514  if (RoundedTo < NewSizeInBits)
515  NewSizeInBits = RoundedTo;
516  }
517  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
518  })
519  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
520  // worth considering the multiples of 64 since 2*192 and 2*384 are not
521  // valid.
522  .clampScalar(LitTyIdx, s8, s256)
523  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
524  // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
525  // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
526  // At this point it's simple enough to accept the legal types.
527  .legalIf([=](const LegalityQuery &Query) {
528  const LLT &BigTy = Query.Types[BigTyIdx];
529  const LLT &LitTy = Query.Types[LitTyIdx];
530  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
531  return false;
532  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
533  return false;
534  return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
535  })
536  // Any vectors left are the wrong size. Scalarize them.
537  .scalarize(0)
538  .scalarize(1);
539  }
540 
541  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
542  .unsupportedIf([=](const LegalityQuery &Query) {
543  const LLT &EltTy = Query.Types[1].getElementType();
544  return Query.Types[0] != EltTy;
545  })
546  .minScalar(2, s64)
547  .legalIf([=](const LegalityQuery &Query) {
548  const LLT &VecTy = Query.Types[1];
549  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
550  VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
551  });
552 
553  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
554  .legalIf([=](const LegalityQuery &Query) {
555  const LLT &VecTy = Query.Types[0];
556  // TODO: Support s8 and s16
557  return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
558  });
559 
560  getActionDefinitionsBuilder(G_BUILD_VECTOR)
561  .legalFor({{v4s16, s16},
562  {v8s16, s16},
563  {v2s32, s32},
564  {v4s32, s32},
565  {v2p0, p0},
566  {v2s64, s64}})
567  .clampNumElements(0, v4s32, v4s32)
568  .clampNumElements(0, v2s64, v2s64)
569 
570  // Deal with larger scalar types, which will be implicitly truncated.
571  .legalIf([=](const LegalityQuery &Query) {
572  return Query.Types[0].getScalarSizeInBits() <
573  Query.Types[1].getSizeInBits();
574  })
575  .minScalarSameAs(1, 0);
576 
577  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
578  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
579  .scalarize(1);
580 
581  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
582  .legalIf([=](const LegalityQuery &Query) {
583  const LLT &DstTy = Query.Types[0];
584  const LLT &SrcTy = Query.Types[1];
585  // For now just support the TBL2 variant which needs the source vectors
586  // to be the same size as the dest.
587  if (DstTy != SrcTy)
588  return false;
589  for (auto &Ty : {v2s32, v4s32, v2s64}) {
590  if (DstTy == Ty)
591  return true;
592  }
593  return false;
594  })
595  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
596  // just want those lowered into G_BUILD_VECTOR
597  .lowerIf([=](const LegalityQuery &Query) {
598  return !Query.Types[1].isVector();
599  })
600  .clampNumElements(0, v4s32, v4s32)
601  .clampNumElements(0, v2s64, v2s64);
602 
603  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
604  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
605 
606  getActionDefinitionsBuilder(G_JUMP_TABLE)
607  .legalFor({{p0}, {s64}});
608 
609  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
610  return Query.Types[0] == p0 && Query.Types[1] == s64;
611  });
612 
613  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
614 
615  computeTables();
616  verify(*ST.getInstrInfo());
617 }
618 
621  MachineIRBuilder &MIRBuilder,
622  GISelChangeObserver &Observer) const {
623  switch (MI.getOpcode()) {
624  default:
625  // No idea what to do.
626  return false;
627  case TargetOpcode::G_VAARG:
628  return legalizeVaArg(MI, MRI, MIRBuilder);
629  case TargetOpcode::G_LOAD:
630  case TargetOpcode::G_STORE:
631  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
632  case TargetOpcode::G_SHL:
633  case TargetOpcode::G_ASHR:
634  case TargetOpcode::G_LSHR:
635  return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
636  }
637 
638  llvm_unreachable("expected switch to return");
639 }
640 
643  MachineIRBuilder &MIRBuilder) const {
644  switch (MI.getIntrinsicID()) {
645  case Intrinsic::memcpy:
646  case Intrinsic::memset:
647  case Intrinsic::memmove:
648  if (createMemLibcall(MIRBuilder, MRI, MI) ==
650  return false;
651  MI.eraseFromParent();
652  return true;
653  default:
654  break;
655  }
656  return true;
657 }
658 
659 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
661  GISelChangeObserver &Observer) const {
662  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
663  MI.getOpcode() == TargetOpcode::G_LSHR ||
664  MI.getOpcode() == TargetOpcode::G_SHL);
665  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
666  // imported patterns can select it later. Either way, it will be legal.
667  Register AmtReg = MI.getOperand(2).getReg();
668  auto *CstMI = MRI.getVRegDef(AmtReg);
669  assert(CstMI && "expected to find a vreg def");
670  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
671  return true;
672  // Check the shift amount is in range for an immediate form.
673  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
674  if (Amount > 31)
675  return true; // This will have to remain a register variant.
676  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
677  MIRBuilder.setInstr(MI);
678  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
679  MI.getOperand(2).setReg(ExtCst.getReg(0));
680  return true;
681 }
682 
683 bool AArch64LegalizerInfo::legalizeLoadStore(
684  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
685  GISelChangeObserver &Observer) const {
686  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
687  MI.getOpcode() == TargetOpcode::G_LOAD);
688  // Here we just try to handle vector loads/stores where our value type might
689  // have pointer elements, which the SelectionDAG importer can't handle. To
690  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
691  // the value to use s64 types.
692 
693  // Custom legalization requires the instruction, if not deleted, must be fully
694  // legalized. In order to allow further legalization of the inst, we create
695  // a new instruction and erase the existing one.
696 
697  Register ValReg = MI.getOperand(0).getReg();
698  const LLT ValTy = MRI.getType(ValReg);
699 
700  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
701  ValTy.getElementType().getAddressSpace() != 0) {
702  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
703  return false;
704  }
705 
706  MIRBuilder.setInstr(MI);
707  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
708  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
709  auto &MMO = **MI.memoperands_begin();
710  if (MI.getOpcode() == TargetOpcode::G_STORE) {
711  auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
712  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
713  } else {
714  Register NewReg = MRI.createGenericVirtualRegister(NewTy);
715  auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
716  MIRBuilder.buildBitcast({ValReg}, {NewLoad});
717  }
718  MI.eraseFromParent();
719  return true;
720 }
721 
722 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
723  MachineRegisterInfo &MRI,
724  MachineIRBuilder &MIRBuilder) const {
725  MIRBuilder.setInstr(MI);
726  MachineFunction &MF = MIRBuilder.getMF();
727  unsigned Align = MI.getOperand(2).getImm();
728  Register Dst = MI.getOperand(0).getReg();
729  Register ListPtr = MI.getOperand(1).getReg();
730 
731  LLT PtrTy = MRI.getType(ListPtr);
732  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
733 
734  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
736  MIRBuilder.buildLoad(
737  List, ListPtr,
739  PtrSize, /* Align = */ PtrSize));
740 
741  Register DstPtr;
742  if (Align > PtrSize) {
743  // Realign the list to the actual required alignment.
744  auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
745 
746  auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
747 
748  DstPtr = MRI.createGenericVirtualRegister(PtrTy);
749  MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
750  } else
751  DstPtr = List;
752 
753  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
754  MIRBuilder.buildLoad(
755  Dst, DstPtr,
757  ValSize, std::max(Align, PtrSize)));
758 
759  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
760 
761  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
762 
763  MIRBuilder.buildStore(
764  NewList, ListPtr,
766  PtrSize, /* Align = */ PtrSize));
767 
768  MI.eraseFromParent();
769  return true;
770 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:598
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
bool isVector() const
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override
Return true if MI is either legal or has been legalized and false if not legal.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
This file declares the targeting of the Machinelegalizer class for AArch64.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Helper class to build MachineInstr.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
unsigned getAddressSpace() const
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
Some kind of error has occurred and we could not legalize this instruction.
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
The memory access writes data.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:389
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
int64_t getImm() const
MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTR_MASK Op0, NumBits.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isPointer() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:163
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
ArrayRef< LLT > Types
const NodeList & List
Definition: RDFGraph.cpp:201
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
const AArch64InstrInfo * getInstrInfo() const override
IRTranslator LLVM IR MI
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:591
Wrapper class representing virtual and physical registers.
Definition: Register.h:19