LLVM  9.0.0svn
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Type.h"
23 
24 #define DEBUG_TYPE "aarch64-legalinfo"
25 
26 using namespace llvm;
27 using namespace LegalizeActions;
28 using namespace LegalizeMutations;
29 using namespace LegalityPredicates;
30 
32  using namespace TargetOpcode;
33  const LLT p0 = LLT::pointer(0, 64);
34  const LLT s1 = LLT::scalar(1);
35  const LLT s8 = LLT::scalar(8);
36  const LLT s16 = LLT::scalar(16);
37  const LLT s32 = LLT::scalar(32);
38  const LLT s64 = LLT::scalar(64);
39  const LLT s128 = LLT::scalar(128);
40  const LLT s256 = LLT::scalar(256);
41  const LLT s512 = LLT::scalar(512);
42  const LLT v16s8 = LLT::vector(16, 8);
43  const LLT v8s8 = LLT::vector(8, 8);
44  const LLT v4s8 = LLT::vector(4, 8);
45  const LLT v8s16 = LLT::vector(8, 16);
46  const LLT v4s16 = LLT::vector(4, 16);
47  const LLT v2s16 = LLT::vector(2, 16);
48  const LLT v2s32 = LLT::vector(2, 32);
49  const LLT v4s32 = LLT::vector(4, 32);
50  const LLT v2s64 = LLT::vector(2, 64);
51  const LLT v2p0 = LLT::vector(2, p0);
52 
53  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
54  .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
55  .clampScalar(0, s1, s64)
56  .widenScalarToNextPow2(0, 8)
57  .fewerElementsIf(
58  [=](const LegalityQuery &Query) {
59  return Query.Types[0].isVector() &&
60  (Query.Types[0].getElementType() != s64 ||
61  Query.Types[0].getNumElements() != 2);
62  },
63  [=](const LegalityQuery &Query) {
64  LLT EltTy = Query.Types[0].getElementType();
65  if (EltTy == s64)
66  return std::make_pair(0, LLT::vector(2, 64));
67  return std::make_pair(0, EltTy);
68  });
69 
70  getActionDefinitionsBuilder(G_PHI)
71  .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
72  .clampScalar(0, s16, s64)
73  .widenScalarToNextPow2(0);
74 
75  getActionDefinitionsBuilder(G_BSWAP)
76  .legalFor({s32, s64, v4s32, v2s32, v2s64})
77  .clampScalar(0, s16, s64)
78  .widenScalarToNextPow2(0);
79 
80  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
81  .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
82  .clampScalar(0, s32, s64)
83  .widenScalarToNextPow2(0)
84  .clampNumElements(0, v2s32, v4s32)
85  .clampNumElements(0, v2s64, v2s64)
86  .moreElementsToNextPow2(0);
87 
88  getActionDefinitionsBuilder(G_SHL)
89  .legalFor({{s32, s32}, {s64, s64},
90  {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
91  .clampScalar(1, s32, s64)
92  .clampScalar(0, s32, s64)
93  .widenScalarToNextPow2(0)
94  .clampNumElements(0, v2s32, v4s32)
95  .clampNumElements(0, v2s64, v2s64)
96  .moreElementsToNextPow2(0)
97  .minScalarSameAs(1, 0);
98 
99  getActionDefinitionsBuilder(G_GEP)
100  .legalFor({{p0, s64}})
101  .clampScalar(1, s64, s64);
102 
103  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
104 
105  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
106  .legalFor({s32, s64})
107  .clampScalar(0, s32, s64)
108  .widenScalarToNextPow2(0)
109  .scalarize(0);
110 
111  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
112  .legalFor({{s32, s32}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
113  .clampScalar(1, s32, s64)
114  .clampScalar(0, s32, s64)
115  .minScalarSameAs(1, 0);
116 
117  getActionDefinitionsBuilder({G_SREM, G_UREM})
118  .lowerFor({s1, s8, s16, s32, s64});
119 
120  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
121  .lowerFor({{s64, s1}});
122 
123  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
124 
125  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
126  .legalFor({{s32, s1}, {s64, s1}});
127 
128  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
129  .legalFor({s32, s64, v2s64, v4s32, v2s32});
130 
131  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
132 
133  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
134  G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
135  G_FNEARBYINT})
136  // If we don't have full FP16 support, then scalarize the elements of
137  // vectors containing fp16 types.
138  .fewerElementsIf(
139  [=, &ST](const LegalityQuery &Query) {
140  const auto &Ty = Query.Types[0];
141  return Ty.isVector() && Ty.getElementType() == s16 &&
142  !ST.hasFullFP16();
143  },
144  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
145  // If we don't have full FP16 support, then widen s16 to s32 if we
146  // encounter it.
147  .widenScalarIf(
148  [=, &ST](const LegalityQuery &Query) {
149  return Query.Types[0] == s16 && !ST.hasFullFP16();
150  },
151  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
152  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
153 
154  getActionDefinitionsBuilder(
155  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
156  // We need a call for these, so we always need to scalarize.
157  .scalarize(0)
158  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
159  .minScalar(0, s32)
160  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
161 
162  getActionDefinitionsBuilder(G_INSERT)
163  .unsupportedIf([=](const LegalityQuery &Query) {
164  return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
165  })
166  .legalIf([=](const LegalityQuery &Query) {
167  const LLT &Ty0 = Query.Types[0];
168  const LLT &Ty1 = Query.Types[1];
169  if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
170  return false;
171  return isPowerOf2_32(Ty1.getSizeInBits()) &&
172  (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
173  })
174  .clampScalar(0, s32, s64)
175  .widenScalarToNextPow2(0)
176  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
177  .maxScalarIf(typeInSet(0, {s64}), 1, s32)
178  .widenScalarToNextPow2(1);
179 
180  getActionDefinitionsBuilder(G_EXTRACT)
181  .unsupportedIf([=](const LegalityQuery &Query) {
182  return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
183  })
184  .legalIf([=](const LegalityQuery &Query) {
185  const LLT &Ty0 = Query.Types[0];
186  const LLT &Ty1 = Query.Types[1];
187  if (Ty1 != s32 && Ty1 != s64)
188  return false;
189  if (Ty1 == p0)
190  return true;
191  return isPowerOf2_32(Ty0.getSizeInBits()) &&
192  (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
193  })
194  .clampScalar(1, s32, s64)
195  .widenScalarToNextPow2(1)
196  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
197  .maxScalarIf(typeInSet(1, {s64}), 0, s32)
198  .widenScalarToNextPow2(0);
199 
200  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
201  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
202  {s32, p0, 16, 8},
203  {s32, p0, 32, 8},
204  {s64, p0, 8, 2},
205  {s64, p0, 16, 2},
206  {s64, p0, 32, 4},
207  {s64, p0, 64, 8},
208  {p0, p0, 64, 8},
209  {v2s32, p0, 64, 8}})
210  .clampScalar(0, s32, s64)
211  .widenScalarToNextPow2(0)
212  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
213  // how to do that yet.
214  .unsupportedIfMemSizeNotPow2()
215  // Lower anything left over into G_*EXT and G_LOAD
216  .lower();
217 
218  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
219  const LLT &ValTy = Query.Types[0];
220  if (!ValTy.isVector())
221  return false;
222  const LLT EltTy = ValTy.getElementType();
223  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
224  };
225 
226  getActionDefinitionsBuilder(G_LOAD)
227  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
228  {s16, p0, 16, 8},
229  {s32, p0, 32, 8},
230  {s64, p0, 64, 8},
231  {p0, p0, 64, 8},
232  {v8s8, p0, 64, 8},
233  {v16s8, p0, 128, 8},
234  {v4s16, p0, 64, 8},
235  {v8s16, p0, 128, 8},
236  {v2s32, p0, 64, 8},
237  {v4s32, p0, 128, 8},
238  {v2s64, p0, 128, 8}})
239  // These extends are also legal
240  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
241  {s32, p0, 16, 8}})
242  .clampScalar(0, s8, s64)
243  .widenScalarToNextPow2(0)
244  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
245  // how to do that yet.
246  .unsupportedIfMemSizeNotPow2()
247  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
248  .lowerIf([=](const LegalityQuery &Query) {
249  return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
250  })
251  .clampMaxNumElements(0, s32, 2)
252  .clampMaxNumElements(0, s64, 1)
253  .customIf(IsPtrVecPred);
254 
255  getActionDefinitionsBuilder(G_STORE)
256  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
257  {s16, p0, 16, 8},
258  {s32, p0, 32, 8},
259  {s64, p0, 64, 8},
260  {p0, p0, 64, 8},
261  {v16s8, p0, 128, 8},
262  {v4s16, p0, 64, 8},
263  {v8s16, p0, 128, 8},
264  {v2s32, p0, 64, 8},
265  {v4s32, p0, 128, 8},
266  {v2s64, p0, 128, 8}})
267  .clampScalar(0, s8, s64)
268  .widenScalarToNextPow2(0)
269  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
270  // how to do that yet.
271  .unsupportedIfMemSizeNotPow2()
272  .lowerIf([=](const LegalityQuery &Query) {
273  return Query.Types[0].isScalar() &&
274  Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
275  })
276  .clampMaxNumElements(0, s32, 2)
277  .clampMaxNumElements(0, s64, 1)
278  .customIf(IsPtrVecPred);
279 
280  // Constants
281  getActionDefinitionsBuilder(G_CONSTANT)
282  .legalFor({p0, s32, s64})
283  .clampScalar(0, s32, s64)
284  .widenScalarToNextPow2(0);
285  getActionDefinitionsBuilder(G_FCONSTANT)
286  .legalFor({s32, s64})
287  .clampScalar(0, s32, s64);
288 
289  getActionDefinitionsBuilder(G_ICMP)
290  .legalFor({{s32, s32},
291  {s32, s64},
292  {s32, p0},
293  {v4s32, v4s32},
294  {v2s32, v2s32},
295  {v2s64, v2s64},
296  {v2s64, v2p0},
297  {v4s16, v4s16},
298  {v8s16, v8s16},
299  {v8s8, v8s8},
300  {v16s8, v16s8}})
301  .clampScalar(0, s32, s32)
302  .clampScalar(1, s32, s64)
303  .minScalarEltSameAsIf(
304  [=](const LegalityQuery &Query) {
305  const LLT &Ty = Query.Types[0];
306  const LLT &SrcTy = Query.Types[1];
307  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
308  Ty.getElementType() != SrcTy.getElementType();
309  },
310  0, 1)
311  .minScalarOrEltIf(
312  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
313  1, s32)
314  .minScalarOrEltIf(
315  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
316  s64)
318 
319  getActionDefinitionsBuilder(G_FCMP)
320  .legalFor({{s32, s32}, {s32, s64}})
321  .clampScalar(0, s32, s32)
322  .clampScalar(1, s32, s64)
323  .widenScalarToNextPow2(1);
324 
325  // Extensions
326  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
327  .legalIf([=](const LegalityQuery &Query) {
328  unsigned DstSize = Query.Types[0].getSizeInBits();
329 
330  // Make sure that we have something that will fit in a register, and
331  // make sure it's a power of 2.
332  if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
333  return false;
334 
335  const LLT &SrcTy = Query.Types[1];
336 
337  // Special case for s1.
338  if (SrcTy == s1)
339  return true;
340 
341  // Make sure we fit in a register otherwise. Don't bother checking that
342  // the source type is below 128 bits. We shouldn't be allowing anything
343  // through which is wider than the destination in the first place.
344  unsigned SrcSize = SrcTy.getSizeInBits();
345  if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
346  return false;
347 
348  return true;
349  });
350 
351  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
352 
353  // FP conversions
354  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
355  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
356  getActionDefinitionsBuilder(G_FPEXT).legalFor(
357  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
358 
359  // Conversions
360  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
361  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
362  .clampScalar(0, s32, s64)
363  .widenScalarToNextPow2(0)
364  .clampScalar(1, s32, s64)
365  .widenScalarToNextPow2(1);
366 
367  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
368  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
369  .clampScalar(1, s32, s64)
370  .widenScalarToNextPow2(1)
371  .clampScalar(0, s32, s64)
372  .widenScalarToNextPow2(0);
373 
374  // Control-flow
375  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
376  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
377 
378  // Select
379  // FIXME: We can probably do a bit better than just scalarizing vector
380  // selects.
381  getActionDefinitionsBuilder(G_SELECT)
382  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
383  .clampScalar(0, s32, s64)
384  .widenScalarToNextPow2(0)
385  .scalarize(0);
386 
387  // Pointer-handling
388  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
389  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
390 
391  getActionDefinitionsBuilder(G_PTRTOINT)
392  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
393  .maxScalar(0, s64)
394  .widenScalarToNextPow2(0, /*Min*/ 8);
395 
396  getActionDefinitionsBuilder(G_INTTOPTR)
397  .unsupportedIf([&](const LegalityQuery &Query) {
398  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
399  })
400  .legalFor({{p0, s64}});
401 
402  // Casts for 32 and 64-bit width type are just copies.
403  // Same for 128-bit width type, except they are on the FPR bank.
404  getActionDefinitionsBuilder(G_BITCAST)
405  // FIXME: This is wrong since G_BITCAST is not allowed to change the
406  // number of bits but it's what the previous code described and fixing
407  // it breaks tests.
408  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
409  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
410  v2p0});
411 
412  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
413 
414  // va_list must be a pointer, but most sized types are pretty easy to handle
415  // as the destination.
416  getActionDefinitionsBuilder(G_VAARG)
417  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
418  .clampScalar(0, s8, s64)
419  .widenScalarToNextPow2(0, /*Min*/ 8);
420 
421  if (ST.hasLSE()) {
422  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
423  .lowerIf(all(
424  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
426 
427  getActionDefinitionsBuilder(
428  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
429  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
430  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
431  .legalIf(all(
432  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
434  }
435 
436  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
437 
438  // Merge/Unmerge
439  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
440  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
441  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
442 
443  auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
444  const LLT &Ty = Query.Types[TypeIdx];
445  if (Ty.isVector()) {
446  const LLT &EltTy = Ty.getElementType();
447  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
448  return true;
449  if (!isPowerOf2_32(EltTy.getSizeInBits()))
450  return true;
451  }
452  return false;
453  };
454 
455  // FIXME: This rule is horrible, but specifies the same as what we had
456  // before with the particularly strange definitions removed (e.g.
457  // s8 = G_MERGE_VALUES s32, s32).
458  // Part of the complexity comes from these ops being extremely flexible. For
459  // example, you can build/decompose vectors with it, concatenate vectors,
460  // etc. and in addition to this you can also bitcast with it at the same
461  // time. We've been considering breaking it up into multiple ops to make it
462  // more manageable throughout the backend.
463  getActionDefinitionsBuilder(Op)
464  // Break up vectors with weird elements into scalars
465  .fewerElementsIf(
466  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
467  scalarize(0))
468  .fewerElementsIf(
469  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
470  scalarize(1))
471  // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
472  // or 384.
473  .clampScalar(BigTyIdx, s8, s512)
474  .widenScalarIf(
475  [=](const LegalityQuery &Query) {
476  const LLT &Ty = Query.Types[BigTyIdx];
477  return !isPowerOf2_32(Ty.getSizeInBits()) &&
478  Ty.getSizeInBits() % 64 != 0;
479  },
480  [=](const LegalityQuery &Query) {
481  // Pick the next power of 2, or a multiple of 64 over 128.
482  // Whichever is smaller.
483  const LLT &Ty = Query.Types[BigTyIdx];
484  unsigned NewSizeInBits = 1
485  << Log2_32_Ceil(Ty.getSizeInBits() + 1);
486  if (NewSizeInBits >= 256) {
487  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
488  if (RoundedTo < NewSizeInBits)
489  NewSizeInBits = RoundedTo;
490  }
491  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
492  })
493  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
494  // worth considering the multiples of 64 since 2*192 and 2*384 are not
495  // valid.
496  .clampScalar(LitTyIdx, s8, s256)
497  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
498  // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
499  // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
500  // At this point it's simple enough to accept the legal types.
501  .legalIf([=](const LegalityQuery &Query) {
502  const LLT &BigTy = Query.Types[BigTyIdx];
503  const LLT &LitTy = Query.Types[LitTyIdx];
504  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
505  return false;
506  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
507  return false;
508  return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
509  })
510  // Any vectors left are the wrong size. Scalarize them.
511  .scalarize(0)
512  .scalarize(1);
513  }
514 
515  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
516  .unsupportedIf([=](const LegalityQuery &Query) {
517  const LLT &EltTy = Query.Types[1].getElementType();
518  return Query.Types[0] != EltTy;
519  })
520  .minScalar(2, s64)
521  .legalIf([=](const LegalityQuery &Query) {
522  const LLT &VecTy = Query.Types[1];
523  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
524  VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
525  });
526 
527  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
528  .legalIf([=](const LegalityQuery &Query) {
529  const LLT &VecTy = Query.Types[0];
530  // TODO: Support s8 and s16
531  return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
532  });
533 
534  getActionDefinitionsBuilder(G_BUILD_VECTOR)
535  .legalFor({{v4s16, s16},
536  {v8s16, s16},
537  {v2s32, s32},
538  {v4s32, s32},
539  {v2p0, p0},
540  {v2s64, s64}})
541  .clampNumElements(0, v4s32, v4s32)
542  .clampNumElements(0, v2s64, v2s64)
543 
544  // Deal with larger scalar types, which will be implicitly truncated.
545  .legalIf([=](const LegalityQuery &Query) {
546  return Query.Types[0].getScalarSizeInBits() <
547  Query.Types[1].getSizeInBits();
548  })
549  .minScalarSameAs(1, 0);
550 
551  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
552  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
553  .scalarize(1);
554 
555  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
556  .legalIf([=](const LegalityQuery &Query) {
557  const LLT &DstTy = Query.Types[0];
558  const LLT &SrcTy = Query.Types[1];
559  // For now just support the TBL2 variant which needs the source vectors
560  // to be the same size as the dest.
561  if (DstTy != SrcTy)
562  return false;
563  for (auto &Ty : {v2s32, v4s32, v2s64}) {
564  if (DstTy == Ty)
565  return true;
566  }
567  return false;
568  })
569  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
570  // just want those lowered into G_BUILD_VECTOR
571  .lowerIf([=](const LegalityQuery &Query) {
572  return !Query.Types[1].isVector();
573  })
574  .clampNumElements(0, v4s32, v4s32)
575  .clampNumElements(0, v2s64, v2s64);
576 
577  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
578  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
579 
580  computeTables();
581  verify(*ST.getInstrInfo());
582 }
583 
586  MachineIRBuilder &MIRBuilder,
587  GISelChangeObserver &Observer) const {
588  switch (MI.getOpcode()) {
589  default:
590  // No idea what to do.
591  return false;
592  case TargetOpcode::G_VAARG:
593  return legalizeVaArg(MI, MRI, MIRBuilder);
594  case TargetOpcode::G_LOAD:
595  case TargetOpcode::G_STORE:
596  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
597  }
598 
599  llvm_unreachable("expected switch to return");
600 }
601 
602 bool AArch64LegalizerInfo::legalizeLoadStore(
604  GISelChangeObserver &Observer) const {
605  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
606  MI.getOpcode() == TargetOpcode::G_LOAD);
607  // Here we just try to handle vector loads/stores where our value type might
608  // have pointer elements, which the SelectionDAG importer can't handle. To
609  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
610  // the value to use s64 types.
611 
612  // Custom legalization requires the instruction, if not deleted, must be fully
613  // legalized. In order to allow further legalization of the inst, we create
614  // a new instruction and erase the existing one.
615 
616  unsigned ValReg = MI.getOperand(0).getReg();
617  const LLT ValTy = MRI.getType(ValReg);
618 
619  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
620  ValTy.getElementType().getAddressSpace() != 0) {
621  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
622  return false;
623  }
624 
625  MIRBuilder.setInstr(MI);
626  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
627  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
628  auto &MMO = **MI.memoperands_begin();
629  if (MI.getOpcode() == TargetOpcode::G_STORE) {
630  auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
631  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
632  } else {
633  unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
634  auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
635  MIRBuilder.buildBitcast({ValReg}, {NewLoad});
636  }
637  MI.eraseFromParent();
638  return true;
639 }
640 
641 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
642  MachineRegisterInfo &MRI,
643  MachineIRBuilder &MIRBuilder) const {
644  MIRBuilder.setInstr(MI);
645  MachineFunction &MF = MIRBuilder.getMF();
646  unsigned Align = MI.getOperand(2).getImm();
647  unsigned Dst = MI.getOperand(0).getReg();
648  unsigned ListPtr = MI.getOperand(1).getReg();
649 
650  LLT PtrTy = MRI.getType(ListPtr);
651  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
652 
653  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
654  unsigned List = MRI.createGenericVirtualRegister(PtrTy);
655  MIRBuilder.buildLoad(
656  List, ListPtr,
658  PtrSize, /* Align = */ PtrSize));
659 
660  unsigned DstPtr;
661  if (Align > PtrSize) {
662  // Realign the list to the actual required alignment.
663  auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
664 
665  unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
666  MIRBuilder.buildGEP(ListTmp, List, AlignMinus1.getReg(0));
667 
668  DstPtr = MRI.createGenericVirtualRegister(PtrTy);
669  MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
670  } else
671  DstPtr = List;
672 
673  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
674  MIRBuilder.buildLoad(
675  Dst, DstPtr,
677  ValSize, std::max(Align, PtrSize)));
678 
679  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
680 
681  unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
682  MIRBuilder.buildGEP(NewList, DstPtr, Size.getReg(0));
683 
684  MIRBuilder.buildStore(
685  NewList, ListPtr,
687  PtrSize, /* Align = */ PtrSize));
688 
689  MI.eraseFromParent();
690  return true;
691 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
unsigned getReg() const
getReg - Returns the register number.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool isVector() const
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
This file declares the targeting of the Machinelegalizer class for AArch64.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
AArch64LegalizerInfo(const AArch64Subtarget &ST)
MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits)
Build and insert Res = G_PTR_MASK Op0, NumBits.
Helper class to build MachineInstr.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
unsigned getAddressSpace() const
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
The memory access writes data.
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:535
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
int64_t getImm() const
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isPointer() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:63
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
ArrayRef< LLT > Types
const NodeList & List
Definition: RDFGraph.cpp:201
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
const AArch64InstrInfo * getInstrInfo() const override
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
IRTranslator LLVM IR MI
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:544