LLVM  10.0.0svn
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
25 
26 #define DEBUG_TYPE "aarch64-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
34  using namespace TargetOpcode;
35  const LLT p0 = LLT::pointer(0, 64);
36  const LLT s1 = LLT::scalar(1);
37  const LLT s8 = LLT::scalar(8);
38  const LLT s16 = LLT::scalar(16);
39  const LLT s32 = LLT::scalar(32);
40  const LLT s64 = LLT::scalar(64);
41  const LLT s128 = LLT::scalar(128);
42  const LLT s256 = LLT::scalar(256);
43  const LLT s512 = LLT::scalar(512);
44  const LLT v16s8 = LLT::vector(16, 8);
45  const LLT v8s8 = LLT::vector(8, 8);
46  const LLT v4s8 = LLT::vector(4, 8);
47  const LLT v8s16 = LLT::vector(8, 16);
48  const LLT v4s16 = LLT::vector(4, 16);
49  const LLT v2s16 = LLT::vector(2, 16);
50  const LLT v2s32 = LLT::vector(2, 32);
51  const LLT v4s32 = LLT::vector(4, 32);
52  const LLT v2s64 = LLT::vector(2, 64);
53  const LLT v2p0 = LLT::vector(2, p0);
54 
55  // FIXME: support subtargets which have neon/fp-armv8 disabled.
56  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57  computeTables();
58  return;
59  }
60 
61  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62  .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
63  .clampScalar(0, s1, s64)
64  .widenScalarToNextPow2(0, 8)
65  .fewerElementsIf(
66  [=](const LegalityQuery &Query) {
67  return Query.Types[0].isVector() &&
68  (Query.Types[0].getElementType() != s64 ||
69  Query.Types[0].getNumElements() != 2);
70  },
71  [=](const LegalityQuery &Query) {
72  LLT EltTy = Query.Types[0].getElementType();
73  if (EltTy == s64)
74  return std::make_pair(0, LLT::vector(2, 64));
75  return std::make_pair(0, EltTy);
76  });
77 
78  getActionDefinitionsBuilder(G_PHI)
79  .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80  .clampScalar(0, s16, s64)
81  .widenScalarToNextPow2(0);
82 
83  getActionDefinitionsBuilder(G_BSWAP)
84  .legalFor({s32, s64, v4s32, v2s32, v2s64})
85  .clampScalar(0, s16, s64)
86  .widenScalarToNextPow2(0);
87 
88  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89  .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90  .clampScalar(0, s32, s64)
91  .widenScalarToNextPow2(0)
92  .clampNumElements(0, v2s32, v4s32)
93  .clampNumElements(0, v2s64, v2s64)
94  .moreElementsToNextPow2(0);
95 
96  getActionDefinitionsBuilder(G_SHL)
97  .legalFor({{s32, s32}, {s64, s64},
98  {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99  .clampScalar(1, s32, s64)
100  .clampScalar(0, s32, s64)
101  .widenScalarToNextPow2(0)
102  .clampNumElements(0, v2s32, v4s32)
103  .clampNumElements(0, v2s64, v2s64)
104  .moreElementsToNextPow2(0)
105  .minScalarSameAs(1, 0);
106 
107  getActionDefinitionsBuilder(G_GEP)
108  .legalFor({{p0, s64}})
109  .clampScalar(1, s64, s64);
110 
111  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
112 
113  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114  .legalFor({s32, s64})
115  .clampScalar(0, s32, s64)
116  .widenScalarToNextPow2(0)
117  .scalarize(0);
118 
119  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
120  .customIf([=](const LegalityQuery &Query) {
121  const auto &SrcTy = Query.Types[0];
122  const auto &AmtTy = Query.Types[1];
123  return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
124  AmtTy.getSizeInBits() == 32;
125  })
126  .legalFor(
127  {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
128  .clampScalar(1, s32, s64)
129  .clampScalar(0, s32, s64)
130  .minScalarSameAs(1, 0);
131 
132  getActionDefinitionsBuilder({G_SREM, G_UREM})
133  .lowerFor({s1, s8, s16, s32, s64});
134 
135  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
136  .lowerFor({{s64, s1}});
137 
138  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
139 
140  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
141  .legalFor({{s32, s1}, {s64, s1}});
142 
143  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
144  .legalFor({s32, s64, v2s64, v4s32, v2s32});
145 
146  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
147 
148  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
149  G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
150  G_FNEARBYINT})
151  // If we don't have full FP16 support, then scalarize the elements of
152  // vectors containing fp16 types.
153  .fewerElementsIf(
154  [=, &ST](const LegalityQuery &Query) {
155  const auto &Ty = Query.Types[0];
156  return Ty.isVector() && Ty.getElementType() == s16 &&
157  !ST.hasFullFP16();
158  },
159  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
160  // If we don't have full FP16 support, then widen s16 to s32 if we
161  // encounter it.
162  .widenScalarIf(
163  [=, &ST](const LegalityQuery &Query) {
164  return Query.Types[0] == s16 && !ST.hasFullFP16();
165  },
166  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
167  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
168 
169  getActionDefinitionsBuilder(
170  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
171  // We need a call for these, so we always need to scalarize.
172  .scalarize(0)
173  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
174  .minScalar(0, s32)
175  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
176 
177  getActionDefinitionsBuilder(G_INSERT)
178  .unsupportedIf([=](const LegalityQuery &Query) {
179  return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
180  })
181  .legalIf([=](const LegalityQuery &Query) {
182  const LLT &Ty0 = Query.Types[0];
183  const LLT &Ty1 = Query.Types[1];
184  if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
185  return false;
186  return isPowerOf2_32(Ty1.getSizeInBits()) &&
187  (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
188  })
189  .clampScalar(0, s32, s64)
190  .widenScalarToNextPow2(0)
191  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
192  .maxScalarIf(typeInSet(0, {s64}), 1, s32)
193  .widenScalarToNextPow2(1);
194 
195  getActionDefinitionsBuilder(G_EXTRACT)
196  .unsupportedIf([=](const LegalityQuery &Query) {
197  return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
198  })
199  .legalIf([=](const LegalityQuery &Query) {
200  const LLT &Ty0 = Query.Types[0];
201  const LLT &Ty1 = Query.Types[1];
202  if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
203  return false;
204  if (Ty1 == p0)
205  return true;
206  return isPowerOf2_32(Ty0.getSizeInBits()) &&
207  (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
208  })
209  .clampScalar(1, s32, s128)
210  .widenScalarToNextPow2(1)
211  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
212  .maxScalarIf(typeInSet(1, {s64}), 0, s32)
213  .widenScalarToNextPow2(0);
214 
215  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
216  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
217  {s32, p0, 16, 8},
218  {s32, p0, 32, 8},
219  {s64, p0, 8, 2},
220  {s64, p0, 16, 2},
221  {s64, p0, 32, 4},
222  {s64, p0, 64, 8},
223  {p0, p0, 64, 8},
224  {v2s32, p0, 64, 8}})
225  .clampScalar(0, s32, s64)
226  .widenScalarToNextPow2(0)
227  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
228  // how to do that yet.
229  .unsupportedIfMemSizeNotPow2()
230  // Lower anything left over into G_*EXT and G_LOAD
231  .lower();
232 
233  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
234  const LLT &ValTy = Query.Types[0];
235  if (!ValTy.isVector())
236  return false;
237  const LLT EltTy = ValTy.getElementType();
238  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
239  };
240 
241  getActionDefinitionsBuilder(G_LOAD)
242  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
243  {s16, p0, 16, 8},
244  {s32, p0, 32, 8},
245  {s64, p0, 64, 8},
246  {p0, p0, 64, 8},
247  {s128, p0, 128, 8},
248  {v8s8, p0, 64, 8},
249  {v16s8, p0, 128, 8},
250  {v4s16, p0, 64, 8},
251  {v8s16, p0, 128, 8},
252  {v2s32, p0, 64, 8},
253  {v4s32, p0, 128, 8},
254  {v2s64, p0, 128, 8}})
255  // These extends are also legal
256  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
257  {s32, p0, 16, 8}})
258  .clampScalar(0, s8, s64)
259  .lowerIfMemSizeNotPow2()
260  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
261  .lowerIf([=](const LegalityQuery &Query) {
262  return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
263  })
264  .widenScalarToNextPow2(0)
265  .clampMaxNumElements(0, s32, 2)
266  .clampMaxNumElements(0, s64, 1)
267  .customIf(IsPtrVecPred);
268 
269  getActionDefinitionsBuilder(G_STORE)
270  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
271  {s16, p0, 16, 8},
272  {s32, p0, 8, 8},
273  {s32, p0, 16, 8},
274  {s32, p0, 32, 8},
275  {s64, p0, 64, 8},
276  {p0, p0, 64, 8},
277  {s128, p0, 128, 8},
278  {v16s8, p0, 128, 8},
279  {v4s16, p0, 64, 8},
280  {v8s16, p0, 128, 8},
281  {v2s32, p0, 64, 8},
282  {v4s32, p0, 128, 8},
283  {v2s64, p0, 128, 8}})
284  .clampScalar(0, s8, s64)
285  .lowerIfMemSizeNotPow2()
286  .lowerIf([=](const LegalityQuery &Query) {
287  return Query.Types[0].isScalar() &&
288  Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
289  })
290  .clampMaxNumElements(0, s32, 2)
291  .clampMaxNumElements(0, s64, 1)
292  .customIf(IsPtrVecPred);
293 
294  // Constants
295  getActionDefinitionsBuilder(G_CONSTANT)
296  .legalFor({p0, s8, s16, s32, s64})
297  .clampScalar(0, s8, s64)
298  .widenScalarToNextPow2(0);
299  getActionDefinitionsBuilder(G_FCONSTANT)
300  .legalFor({s32, s64})
301  .clampScalar(0, s32, s64);
302 
303  getActionDefinitionsBuilder(G_ICMP)
304  .legalFor({{s32, s32},
305  {s32, s64},
306  {s32, p0},
307  {v4s32, v4s32},
308  {v2s32, v2s32},
309  {v2s64, v2s64},
310  {v2s64, v2p0},
311  {v4s16, v4s16},
312  {v8s16, v8s16},
313  {v8s8, v8s8},
314  {v16s8, v16s8}})
315  .clampScalar(1, s32, s64)
316  .clampScalar(0, s32, s32)
317  .minScalarEltSameAsIf(
318  [=](const LegalityQuery &Query) {
319  const LLT &Ty = Query.Types[0];
320  const LLT &SrcTy = Query.Types[1];
321  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
322  Ty.getElementType() != SrcTy.getElementType();
323  },
324  0, 1)
325  .minScalarOrEltIf(
326  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
327  1, s32)
328  .minScalarOrEltIf(
329  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
330  s64)
332 
333  getActionDefinitionsBuilder(G_FCMP)
334  .legalFor({{s32, s32}, {s32, s64}})
335  .clampScalar(0, s32, s32)
336  .clampScalar(1, s32, s64)
337  .widenScalarToNextPow2(1);
338 
339  // Extensions
340  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
341  unsigned DstSize = Query.Types[0].getSizeInBits();
342 
343  if (DstSize == 128 && !Query.Types[0].isVector())
344  return false; // Extending to a scalar s128 is not legal.
345 
346  // Make sure that we have something that will fit in a register, and
347  // make sure it's a power of 2.
348  if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
349  return false;
350 
351  const LLT &SrcTy = Query.Types[1];
352 
353  // Special case for s1.
354  if (SrcTy == s1)
355  return true;
356 
357  // Make sure we fit in a register otherwise. Don't bother checking that
358  // the source type is below 128 bits. We shouldn't be allowing anything
359  // through which is wider than the destination in the first place.
360  unsigned SrcSize = SrcTy.getSizeInBits();
361  if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
362  return false;
363 
364  return true;
365  };
366  getActionDefinitionsBuilder({G_ZEXT, G_ANYEXT}).legalIf(ExtLegalFunc);
367  getActionDefinitionsBuilder(G_SEXT)
368  .legalIf(ExtLegalFunc)
369  .clampScalar(0, s64, s64); // Just for s128, others are handled above.
370 
371  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
372 
373  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
374 
375  // FP conversions
376  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
377  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
378  getActionDefinitionsBuilder(G_FPEXT).legalFor(
379  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
380 
381  // Conversions
382  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
383  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
384  .clampScalar(0, s32, s64)
385  .widenScalarToNextPow2(0)
386  .clampScalar(1, s32, s64)
387  .widenScalarToNextPow2(1);
388 
389  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
390  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
391  .clampScalar(1, s32, s64)
392  .widenScalarToNextPow2(1)
393  .clampScalar(0, s32, s64)
394  .widenScalarToNextPow2(0);
395 
396  // Control-flow
397  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
398  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
399 
400  // Select
401  // FIXME: We can probably do a bit better than just scalarizing vector
402  // selects.
403  getActionDefinitionsBuilder(G_SELECT)
404  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
405  .clampScalar(0, s32, s64)
406  .widenScalarToNextPow2(0)
407  .scalarize(0);
408 
409  // Pointer-handling
410  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
411  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
412 
413  getActionDefinitionsBuilder(G_PTRTOINT)
414  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
415  .maxScalar(0, s64)
416  .widenScalarToNextPow2(0, /*Min*/ 8);
417 
418  getActionDefinitionsBuilder(G_INTTOPTR)
419  .unsupportedIf([&](const LegalityQuery &Query) {
420  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
421  })
422  .legalFor({{p0, s64}});
423 
424  // Casts for 32 and 64-bit width type are just copies.
425  // Same for 128-bit width type, except they are on the FPR bank.
426  getActionDefinitionsBuilder(G_BITCAST)
427  // FIXME: This is wrong since G_BITCAST is not allowed to change the
428  // number of bits but it's what the previous code described and fixing
429  // it breaks tests.
430  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
431  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
432  v2p0});
433 
434  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
435 
436  // va_list must be a pointer, but most sized types are pretty easy to handle
437  // as the destination.
438  getActionDefinitionsBuilder(G_VAARG)
439  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
440  .clampScalar(0, s8, s64)
441  .widenScalarToNextPow2(0, /*Min*/ 8);
442 
443  if (ST.hasLSE()) {
444  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
445  .lowerIf(all(
446  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
448 
449  getActionDefinitionsBuilder(
450  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
451  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
452  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
453  .legalIf(all(
454  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
456  }
457 
458  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
459 
460  // Merge/Unmerge
461  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
462  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
463  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
464 
465  auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
466  const LLT &Ty = Query.Types[TypeIdx];
467  if (Ty.isVector()) {
468  const LLT &EltTy = Ty.getElementType();
469  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
470  return true;
471  if (!isPowerOf2_32(EltTy.getSizeInBits()))
472  return true;
473  }
474  return false;
475  };
476 
477  // FIXME: This rule is horrible, but specifies the same as what we had
478  // before with the particularly strange definitions removed (e.g.
479  // s8 = G_MERGE_VALUES s32, s32).
480  // Part of the complexity comes from these ops being extremely flexible. For
481  // example, you can build/decompose vectors with it, concatenate vectors,
482  // etc. and in addition to this you can also bitcast with it at the same
483  // time. We've been considering breaking it up into multiple ops to make it
484  // more manageable throughout the backend.
485  getActionDefinitionsBuilder(Op)
486  // Break up vectors with weird elements into scalars
487  .fewerElementsIf(
488  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
489  scalarize(0))
490  .fewerElementsIf(
491  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
492  scalarize(1))
493  // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
494  // or 384.
495  .clampScalar(BigTyIdx, s8, s512)
496  .widenScalarIf(
497  [=](const LegalityQuery &Query) {
498  const LLT &Ty = Query.Types[BigTyIdx];
499  return !isPowerOf2_32(Ty.getSizeInBits()) &&
500  Ty.getSizeInBits() % 64 != 0;
501  },
502  [=](const LegalityQuery &Query) {
503  // Pick the next power of 2, or a multiple of 64 over 128.
504  // Whichever is smaller.
505  const LLT &Ty = Query.Types[BigTyIdx];
506  unsigned NewSizeInBits = 1
507  << Log2_32_Ceil(Ty.getSizeInBits() + 1);
508  if (NewSizeInBits >= 256) {
509  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
510  if (RoundedTo < NewSizeInBits)
511  NewSizeInBits = RoundedTo;
512  }
513  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
514  })
515  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
516  // worth considering the multiples of 64 since 2*192 and 2*384 are not
517  // valid.
518  .clampScalar(LitTyIdx, s8, s256)
519  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
520  // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
521  // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
522  // At this point it's simple enough to accept the legal types.
523  .legalIf([=](const LegalityQuery &Query) {
524  const LLT &BigTy = Query.Types[BigTyIdx];
525  const LLT &LitTy = Query.Types[LitTyIdx];
526  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
527  return false;
528  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
529  return false;
530  return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
531  })
532  // Any vectors left are the wrong size. Scalarize them.
533  .scalarize(0)
534  .scalarize(1);
535  }
536 
537  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
538  .unsupportedIf([=](const LegalityQuery &Query) {
539  const LLT &EltTy = Query.Types[1].getElementType();
540  return Query.Types[0] != EltTy;
541  })
542  .minScalar(2, s64)
543  .legalIf([=](const LegalityQuery &Query) {
544  const LLT &VecTy = Query.Types[1];
545  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
546  VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
547  });
548 
549  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
550  .legalIf([=](const LegalityQuery &Query) {
551  const LLT &VecTy = Query.Types[0];
552  // TODO: Support s8 and s16
553  return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
554  });
555 
556  getActionDefinitionsBuilder(G_BUILD_VECTOR)
557  .legalFor({{v4s16, s16},
558  {v8s16, s16},
559  {v2s32, s32},
560  {v4s32, s32},
561  {v2p0, p0},
562  {v2s64, s64}})
563  .clampNumElements(0, v4s32, v4s32)
564  .clampNumElements(0, v2s64, v2s64)
565 
566  // Deal with larger scalar types, which will be implicitly truncated.
567  .legalIf([=](const LegalityQuery &Query) {
568  return Query.Types[0].getScalarSizeInBits() <
569  Query.Types[1].getSizeInBits();
570  })
571  .minScalarSameAs(1, 0);
572 
573  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
574  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
575  .scalarize(1);
576 
577  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
578  .legalIf([=](const LegalityQuery &Query) {
579  const LLT &DstTy = Query.Types[0];
580  const LLT &SrcTy = Query.Types[1];
581  // For now just support the TBL2 variant which needs the source vectors
582  // to be the same size as the dest.
583  if (DstTy != SrcTy)
584  return false;
585  for (auto &Ty : {v2s32, v4s32, v2s64}) {
586  if (DstTy == Ty)
587  return true;
588  }
589  return false;
590  })
591  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
592  // just want those lowered into G_BUILD_VECTOR
593  .lowerIf([=](const LegalityQuery &Query) {
594  return !Query.Types[1].isVector();
595  })
596  .clampNumElements(0, v4s32, v4s32)
597  .clampNumElements(0, v2s64, v2s64);
598 
599  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
600  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
601 
602  getActionDefinitionsBuilder(G_JUMP_TABLE)
603  .legalFor({{p0}, {s64}});
604 
605  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
606  return Query.Types[0] == p0 && Query.Types[1] == s64;
607  });
608 
609  computeTables();
610  verify(*ST.getInstrInfo());
611 }
612 
615  MachineIRBuilder &MIRBuilder,
616  GISelChangeObserver &Observer) const {
617  switch (MI.getOpcode()) {
618  default:
619  // No idea what to do.
620  return false;
621  case TargetOpcode::G_VAARG:
622  return legalizeVaArg(MI, MRI, MIRBuilder);
623  case TargetOpcode::G_LOAD:
624  case TargetOpcode::G_STORE:
625  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
626  case TargetOpcode::G_SHL:
627  case TargetOpcode::G_ASHR:
628  case TargetOpcode::G_LSHR:
629  return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
630  }
631 
632  llvm_unreachable("expected switch to return");
633 }
634 
637  MachineIRBuilder &MIRBuilder) const {
638  switch (MI.getIntrinsicID()) {
639  case Intrinsic::memcpy:
640  case Intrinsic::memset:
641  case Intrinsic::memmove:
642  if (createMemLibcall(MIRBuilder, MRI, MI) ==
644  return false;
645  MI.eraseFromParent();
646  return true;
647  default:
648  break;
649  }
650  return true;
651 }
652 
653 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
655  GISelChangeObserver &Observer) const {
656  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
657  MI.getOpcode() == TargetOpcode::G_LSHR ||
658  MI.getOpcode() == TargetOpcode::G_SHL);
659  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
660  // imported patterns can select it later. Either way, it will be legal.
661  Register AmtReg = MI.getOperand(2).getReg();
662  auto *CstMI = MRI.getVRegDef(AmtReg);
663  assert(CstMI && "expected to find a vreg def");
664  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
665  return true;
666  // Check the shift amount is in range for an immediate form.
667  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
668  if (Amount > 31)
669  return true; // This will have to remain a register variant.
670  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
671  MIRBuilder.setInstr(MI);
672  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
673  MI.getOperand(2).setReg(ExtCst.getReg(0));
674  return true;
675 }
676 
677 bool AArch64LegalizerInfo::legalizeLoadStore(
678  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
679  GISelChangeObserver &Observer) const {
680  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
681  MI.getOpcode() == TargetOpcode::G_LOAD);
682  // Here we just try to handle vector loads/stores where our value type might
683  // have pointer elements, which the SelectionDAG importer can't handle. To
684  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
685  // the value to use s64 types.
686 
687  // Custom legalization requires the instruction, if not deleted, must be fully
688  // legalized. In order to allow further legalization of the inst, we create
689  // a new instruction and erase the existing one.
690 
691  Register ValReg = MI.getOperand(0).getReg();
692  const LLT ValTy = MRI.getType(ValReg);
693 
694  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
695  ValTy.getElementType().getAddressSpace() != 0) {
696  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
697  return false;
698  }
699 
700  MIRBuilder.setInstr(MI);
701  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
702  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
703  auto &MMO = **MI.memoperands_begin();
704  if (MI.getOpcode() == TargetOpcode::G_STORE) {
705  auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
706  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
707  } else {
708  Register NewReg = MRI.createGenericVirtualRegister(NewTy);
709  auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
710  MIRBuilder.buildBitcast({ValReg}, {NewLoad});
711  }
712  MI.eraseFromParent();
713  return true;
714 }
715 
716 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
717  MachineRegisterInfo &MRI,
718  MachineIRBuilder &MIRBuilder) const {
719  MIRBuilder.setInstr(MI);
720  MachineFunction &MF = MIRBuilder.getMF();
721  unsigned Align = MI.getOperand(2).getImm();
722  Register Dst = MI.getOperand(0).getReg();
723  Register ListPtr = MI.getOperand(1).getReg();
724 
725  LLT PtrTy = MRI.getType(ListPtr);
726  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
727 
728  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
730  MIRBuilder.buildLoad(
731  List, ListPtr,
733  PtrSize, /* Align = */ PtrSize));
734 
735  Register DstPtr;
736  if (Align > PtrSize) {
737  // Realign the list to the actual required alignment.
738  auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
739 
740  auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
741 
742  DstPtr = MRI.createGenericVirtualRegister(PtrTy);
743  MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
744  } else
745  DstPtr = List;
746 
747  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
748  MIRBuilder.buildLoad(
749  Dst, DstPtr,
751  ValSize, std::max(Align, PtrSize)));
752 
753  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
754 
755  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
756 
757  MIRBuilder.buildStore(
758  NewList, ListPtr,
760  PtrSize, /* Align = */ PtrSize));
761 
762  MI.eraseFromParent();
763  return true;
764 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
bool isVector() const
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override
Return true if MI is either legal or has been legalized and false if not legal.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
This file declares the targeting of the Machinelegalizer class for AArch64.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Helper class to build MachineInstr.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
unsigned getAddressSpace() const
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
Some kind of error has occurred and we could not legalize this instruction.
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
The memory access writes data.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
int64_t getImm() const
MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTR_MASK Op0, NumBits.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isPointer() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:126
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
ArrayRef< LLT > Types
const NodeList & List
Definition: RDFGraph.cpp:201
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
const AArch64InstrInfo * getInstrInfo() const override
IRTranslator LLVM IR MI
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:544
Wrapper class representing virtual and physical registers.
Definition: Register.h:19