LLVM  9.0.0svn
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Type.h"
23 
24 #define DEBUG_TYPE "aarch64-legalinfo"
25 
26 using namespace llvm;
27 using namespace LegalizeActions;
28 using namespace LegalizeMutations;
29 using namespace LegalityPredicates;
30 
32  using namespace TargetOpcode;
33  const LLT p0 = LLT::pointer(0, 64);
34  const LLT s1 = LLT::scalar(1);
35  const LLT s8 = LLT::scalar(8);
36  const LLT s16 = LLT::scalar(16);
37  const LLT s32 = LLT::scalar(32);
38  const LLT s64 = LLT::scalar(64);
39  const LLT s128 = LLT::scalar(128);
40  const LLT s256 = LLT::scalar(256);
41  const LLT s512 = LLT::scalar(512);
42  const LLT v16s8 = LLT::vector(16, 8);
43  const LLT v8s8 = LLT::vector(8, 8);
44  const LLT v4s8 = LLT::vector(4, 8);
45  const LLT v8s16 = LLT::vector(8, 16);
46  const LLT v4s16 = LLT::vector(4, 16);
47  const LLT v2s16 = LLT::vector(2, 16);
48  const LLT v2s32 = LLT::vector(2, 32);
49  const LLT v4s32 = LLT::vector(4, 32);
50  const LLT v2s64 = LLT::vector(2, 64);
51  const LLT v2p0 = LLT::vector(2, p0);
52 
53  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
54  .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
55  .clampScalar(0, s1, s64)
56  .widenScalarToNextPow2(0, 8)
57  .fewerElementsIf(
58  [=](const LegalityQuery &Query) {
59  return Query.Types[0].isVector() &&
60  (Query.Types[0].getElementType() != s64 ||
61  Query.Types[0].getNumElements() != 2);
62  },
63  [=](const LegalityQuery &Query) {
64  LLT EltTy = Query.Types[0].getElementType();
65  if (EltTy == s64)
66  return std::make_pair(0, LLT::vector(2, 64));
67  return std::make_pair(0, EltTy);
68  });
69 
70  getActionDefinitionsBuilder(G_PHI)
71  .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
72  .clampScalar(0, s16, s64)
73  .widenScalarToNextPow2(0);
74 
75  getActionDefinitionsBuilder(G_BSWAP)
76  .legalFor({s32, s64})
77  .clampScalar(0, s16, s64)
78  .widenScalarToNextPow2(0);
79 
80  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
81  .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
82  .clampScalar(0, s32, s64)
83  .widenScalarToNextPow2(0)
84  .clampNumElements(0, v2s32, v4s32)
85  .clampNumElements(0, v2s64, v2s64)
86  .moreElementsToNextPow2(0);
87 
88  getActionDefinitionsBuilder(G_SHL)
89  .legalFor({{s32, s32}, {s64, s64},
90  {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
91  .clampScalar(1, s32, s64)
92  .clampScalar(0, s32, s64)
93  .widenScalarToNextPow2(0)
94  .clampNumElements(0, v2s32, v4s32)
95  .clampNumElements(0, v2s64, v2s64)
96  .moreElementsToNextPow2(0)
97  .minScalarSameAs(1, 0);
98 
99  getActionDefinitionsBuilder(G_GEP)
100  .legalFor({{p0, s64}})
101  .clampScalar(1, s64, s64);
102 
103  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
104 
105  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
106  .legalFor({s32, s64})
107  .clampScalar(0, s32, s64)
108  .widenScalarToNextPow2(0)
109  .scalarize(0);
110 
111  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
112  .legalFor({{s32, s32}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
113  .clampScalar(1, s32, s64)
114  .clampScalar(0, s32, s64)
115  .minScalarSameAs(1, 0);
116 
117  getActionDefinitionsBuilder({G_SREM, G_UREM})
118  .lowerFor({s1, s8, s16, s32, s64});
119 
120  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
121  .lowerFor({{s64, s1}});
122 
123  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
124 
125  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
126  .legalFor({{s32, s1}, {s64, s1}});
127 
128  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV, G_FNEG})
129  .legalFor({s32, s64, v2s64, v4s32, v2s32});
130 
131  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
132 
133  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT})
134  // If we don't have full FP16 support, then scalarize the elements of
135  // vectors containing fp16 types.
136  .fewerElementsIf(
137  [=, &ST](const LegalityQuery &Query) {
138  const auto &Ty = Query.Types[0];
139  return Ty.isVector() && Ty.getElementType() == s16 &&
140  !ST.hasFullFP16();
141  },
142  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
143  // If we don't have full FP16 support, then widen s16 to s32 if we
144  // encounter it.
145  .widenScalarIf(
146  [=, &ST](const LegalityQuery &Query) {
147  return Query.Types[0] == s16 && !ST.hasFullFP16();
148  },
149  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
150  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
151 
152  getActionDefinitionsBuilder(
153  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
154  // We need a call for these, so we always need to scalarize.
155  .scalarize(0)
156  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
157  .minScalar(0, s32)
158  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
159 
160  getActionDefinitionsBuilder(G_INSERT)
161  .unsupportedIf([=](const LegalityQuery &Query) {
162  return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
163  })
164  .legalIf([=](const LegalityQuery &Query) {
165  const LLT &Ty0 = Query.Types[0];
166  const LLT &Ty1 = Query.Types[1];
167  if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
168  return false;
169  return isPowerOf2_32(Ty1.getSizeInBits()) &&
170  (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
171  })
172  .clampScalar(0, s32, s64)
173  .widenScalarToNextPow2(0)
174  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
175  .maxScalarIf(typeInSet(0, {s64}), 1, s32)
176  .widenScalarToNextPow2(1);
177 
178  getActionDefinitionsBuilder(G_EXTRACT)
179  .unsupportedIf([=](const LegalityQuery &Query) {
180  return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
181  })
182  .legalIf([=](const LegalityQuery &Query) {
183  const LLT &Ty0 = Query.Types[0];
184  const LLT &Ty1 = Query.Types[1];
185  if (Ty1 != s32 && Ty1 != s64)
186  return false;
187  if (Ty1 == p0)
188  return true;
189  return isPowerOf2_32(Ty0.getSizeInBits()) &&
190  (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
191  })
192  .clampScalar(1, s32, s64)
193  .widenScalarToNextPow2(1)
194  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
195  .maxScalarIf(typeInSet(1, {s64}), 0, s32)
196  .widenScalarToNextPow2(0);
197 
198  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
199  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
200  {s32, p0, 16, 8},
201  {s32, p0, 32, 8},
202  {s64, p0, 64, 8},
203  {p0, p0, 64, 8},
204  {v2s32, p0, 64, 8}})
205  .clampScalar(0, s32, s64)
206  .widenScalarToNextPow2(0)
207  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
208  // how to do that yet.
209  .unsupportedIfMemSizeNotPow2()
210  // Lower anything left over into G_*EXT and G_LOAD
211  .lower();
212 
213  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
214  const LLT &ValTy = Query.Types[0];
215  if (!ValTy.isVector())
216  return false;
217  const LLT EltTy = ValTy.getElementType();
218  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
219  };
220 
221  getActionDefinitionsBuilder(G_LOAD)
222  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
223  {s16, p0, 16, 8},
224  {s32, p0, 32, 8},
225  {s64, p0, 64, 8},
226  {p0, p0, 64, 8},
227  {v8s8, p0, 64, 8},
228  {v16s8, p0, 128, 8},
229  {v4s16, p0, 64, 8},
230  {v8s16, p0, 128, 8},
231  {v2s32, p0, 64, 8},
232  {v4s32, p0, 128, 8},
233  {v2s64, p0, 128, 8}})
234  // These extends are also legal
235  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
236  {s32, p0, 16, 8}})
237  .clampScalar(0, s8, s64)
238  .widenScalarToNextPow2(0)
239  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
240  // how to do that yet.
241  .unsupportedIfMemSizeNotPow2()
242  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
243  .lowerIf([=](const LegalityQuery &Query) {
244  return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
245  })
246  .clampMaxNumElements(0, s32, 2)
247  .clampMaxNumElements(0, s64, 1)
248  .customIf(IsPtrVecPred);
249 
250  getActionDefinitionsBuilder(G_STORE)
251  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
252  {s16, p0, 16, 8},
253  {s32, p0, 32, 8},
254  {s64, p0, 64, 8},
255  {p0, p0, 64, 8},
256  {v16s8, p0, 128, 8},
257  {v4s16, p0, 64, 8},
258  {v8s16, p0, 128, 8},
259  {v2s32, p0, 64, 8},
260  {v4s32, p0, 128, 8},
261  {v2s64, p0, 128, 8}})
262  .clampScalar(0, s8, s64)
263  .widenScalarToNextPow2(0)
264  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
265  // how to do that yet.
266  .unsupportedIfMemSizeNotPow2()
267  .lowerIf([=](const LegalityQuery &Query) {
268  return Query.Types[0].isScalar() &&
269  Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
270  })
271  .clampMaxNumElements(0, s32, 2)
272  .clampMaxNumElements(0, s64, 1)
273  .customIf(IsPtrVecPred);
274 
275  // Constants
276  getActionDefinitionsBuilder(G_CONSTANT)
277  .legalFor({p0, s32, s64})
278  .clampScalar(0, s32, s64)
279  .widenScalarToNextPow2(0);
280  getActionDefinitionsBuilder(G_FCONSTANT)
281  .legalFor({s32, s64})
282  .clampScalar(0, s32, s64);
283 
284  getActionDefinitionsBuilder(G_ICMP)
285  .legalFor({{s32, s32},
286  {s32, s64},
287  {s32, p0},
288  {v4s32, v4s32},
289  {v2s32, v2s32},
290  {v2s64, v2s64},
291  {v2s64, v2p0},
292  {v4s16, v4s16},
293  {v8s16, v8s16},
294  {v8s8, v8s8},
295  {v16s8, v16s8}})
296  .clampScalar(0, s32, s32)
297  .clampScalar(1, s32, s64)
298  .minScalarEltSameAsIf(
299  [=](const LegalityQuery &Query) {
300  const LLT &Ty = Query.Types[0];
301  const LLT &SrcTy = Query.Types[1];
302  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
303  Ty.getElementType() != SrcTy.getElementType();
304  },
305  0, 1)
306  .minScalarOrEltIf(
307  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
308  1, s32)
309  .minScalarOrEltIf(
310  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
311  s64)
313 
314  getActionDefinitionsBuilder(G_FCMP)
315  .legalFor({{s32, s32}, {s32, s64}})
316  .clampScalar(0, s32, s32)
317  .clampScalar(1, s32, s64)
318  .widenScalarToNextPow2(1);
319 
320  // Extensions
321  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
322  .legalForCartesianProduct({s8, s16, s32, s64}, {s1, s8, s16, s32})
323  .legalFor({v8s16, v8s8});
324 
325  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
326 
327  // FP conversions
328  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
329  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
330  getActionDefinitionsBuilder(G_FPEXT).legalFor(
331  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
332 
333  // Conversions
334  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
335  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
336  .clampScalar(0, s32, s64)
337  .widenScalarToNextPow2(0)
338  .clampScalar(1, s32, s64)
339  .widenScalarToNextPow2(1);
340 
341  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
342  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
343  .clampScalar(1, s32, s64)
344  .widenScalarToNextPow2(1)
345  .clampScalar(0, s32, s64)
346  .widenScalarToNextPow2(0);
347 
348  // Control-flow
349  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
350  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
351 
352  // Select
353  // FIXME: We can probably do a bit better than just scalarizing vector
354  // selects.
355  getActionDefinitionsBuilder(G_SELECT)
356  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
357  .clampScalar(0, s32, s64)
358  .widenScalarToNextPow2(0)
359  .scalarize(0);
360 
361  // Pointer-handling
362  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
363  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
364 
365  getActionDefinitionsBuilder(G_PTRTOINT)
366  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
367  .maxScalar(0, s64)
368  .widenScalarToNextPow2(0, /*Min*/ 8);
369 
370  getActionDefinitionsBuilder(G_INTTOPTR)
371  .unsupportedIf([&](const LegalityQuery &Query) {
372  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
373  })
374  .legalFor({{p0, s64}});
375 
376  // Casts for 32 and 64-bit width type are just copies.
377  // Same for 128-bit width type, except they are on the FPR bank.
378  getActionDefinitionsBuilder(G_BITCAST)
379  // FIXME: This is wrong since G_BITCAST is not allowed to change the
380  // number of bits but it's what the previous code described and fixing
381  // it breaks tests.
382  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
383  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
384  v2p0});
385 
386  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
387 
388  // va_list must be a pointer, but most sized types are pretty easy to handle
389  // as the destination.
390  getActionDefinitionsBuilder(G_VAARG)
391  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
392  .clampScalar(0, s8, s64)
393  .widenScalarToNextPow2(0, /*Min*/ 8);
394 
395  if (ST.hasLSE()) {
396  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
397  .lowerIf(all(
398  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
400 
401  getActionDefinitionsBuilder(
402  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
403  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
404  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
405  .legalIf(all(
406  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
408  }
409 
410  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
411 
412  // Merge/Unmerge
413  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
414  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
415  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
416 
417  auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
418  const LLT &Ty = Query.Types[TypeIdx];
419  if (Ty.isVector()) {
420  const LLT &EltTy = Ty.getElementType();
421  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
422  return true;
423  if (!isPowerOf2_32(EltTy.getSizeInBits()))
424  return true;
425  }
426  return false;
427  };
428 
429  // FIXME: This rule is horrible, but specifies the same as what we had
430  // before with the particularly strange definitions removed (e.g.
431  // s8 = G_MERGE_VALUES s32, s32).
432  // Part of the complexity comes from these ops being extremely flexible. For
433  // example, you can build/decompose vectors with it, concatenate vectors,
434  // etc. and in addition to this you can also bitcast with it at the same
435  // time. We've been considering breaking it up into multiple ops to make it
436  // more manageable throughout the backend.
437  getActionDefinitionsBuilder(Op)
438  // Break up vectors with weird elements into scalars
439  .fewerElementsIf(
440  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
441  scalarize(0))
442  .fewerElementsIf(
443  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
444  scalarize(1))
445  // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
446  // or 384.
447  .clampScalar(BigTyIdx, s8, s512)
448  .widenScalarIf(
449  [=](const LegalityQuery &Query) {
450  const LLT &Ty = Query.Types[BigTyIdx];
451  return !isPowerOf2_32(Ty.getSizeInBits()) &&
452  Ty.getSizeInBits() % 64 != 0;
453  },
454  [=](const LegalityQuery &Query) {
455  // Pick the next power of 2, or a multiple of 64 over 128.
456  // Whichever is smaller.
457  const LLT &Ty = Query.Types[BigTyIdx];
458  unsigned NewSizeInBits = 1
459  << Log2_32_Ceil(Ty.getSizeInBits() + 1);
460  if (NewSizeInBits >= 256) {
461  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
462  if (RoundedTo < NewSizeInBits)
463  NewSizeInBits = RoundedTo;
464  }
465  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
466  })
467  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
468  // worth considering the multiples of 64 since 2*192 and 2*384 are not
469  // valid.
470  .clampScalar(LitTyIdx, s8, s256)
471  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
472  // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
473  // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
474  // At this point it's simple enough to accept the legal types.
475  .legalIf([=](const LegalityQuery &Query) {
476  const LLT &BigTy = Query.Types[BigTyIdx];
477  const LLT &LitTy = Query.Types[LitTyIdx];
478  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
479  return false;
480  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
481  return false;
482  return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
483  })
484  // Any vectors left are the wrong size. Scalarize them.
485  .scalarize(0)
486  .scalarize(1);
487  }
488 
489  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
490  .unsupportedIf([=](const LegalityQuery &Query) {
491  const LLT &EltTy = Query.Types[1].getElementType();
492  return Query.Types[0] != EltTy;
493  })
494  .minScalar(2, s64)
495  .legalIf([=](const LegalityQuery &Query) {
496  const LLT &VecTy = Query.Types[1];
497  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v4s32 ||
498  VecTy == v2s64 || VecTy == v2s32;
499  });
500 
501  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
502  .legalIf([=](const LegalityQuery &Query) {
503  const LLT &VecTy = Query.Types[0];
504  // TODO: Support s8 and s16
505  return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
506  });
507 
508  getActionDefinitionsBuilder(G_BUILD_VECTOR)
509  .legalFor({{v4s16, s16},
510  {v8s16, s16},
511  {v2s32, s32},
512  {v4s32, s32},
513  {v2p0, p0},
514  {v2s64, s64}})
515  .clampNumElements(0, v4s32, v4s32)
516  .clampNumElements(0, v2s64, v2s64)
517 
518  // Deal with larger scalar types, which will be implicitly truncated.
519  .legalIf([=](const LegalityQuery &Query) {
520  return Query.Types[0].getScalarSizeInBits() <
521  Query.Types[1].getSizeInBits();
522  })
523  .minScalarSameAs(1, 0);
524 
525  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
526  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
527  .scalarize(1);
528 
529  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
530  .legalIf([=](const LegalityQuery &Query) {
531  const LLT &DstTy = Query.Types[0];
532  const LLT &SrcTy = Query.Types[1];
533  // For now just support the TBL2 variant which needs the source vectors
534  // to be the same size as the dest.
535  if (DstTy != SrcTy)
536  return false;
537  for (auto &Ty : {v2s32, v4s32, v2s64}) {
538  if (DstTy == Ty)
539  return true;
540  }
541  return false;
542  })
543  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
544  // just want those lowered into G_BUILD_VECTOR
545  .lowerIf([=](const LegalityQuery &Query) {
546  return !Query.Types[1].isVector();
547  })
548  .clampNumElements(0, v4s32, v4s32)
549  .clampNumElements(0, v2s64, v2s64);
550 
551  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
552  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
553 
554  computeTables();
555  verify(*ST.getInstrInfo());
556 }
557 
560  MachineIRBuilder &MIRBuilder,
561  GISelChangeObserver &Observer) const {
562  switch (MI.getOpcode()) {
563  default:
564  // No idea what to do.
565  return false;
566  case TargetOpcode::G_VAARG:
567  return legalizeVaArg(MI, MRI, MIRBuilder);
568  case TargetOpcode::G_LOAD:
569  case TargetOpcode::G_STORE:
570  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
571  }
572 
573  llvm_unreachable("expected switch to return");
574 }
575 
576 bool AArch64LegalizerInfo::legalizeLoadStore(
578  GISelChangeObserver &Observer) const {
579  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
580  MI.getOpcode() == TargetOpcode::G_LOAD);
581  // Here we just try to handle vector loads/stores where our value type might
582  // have pointer elements, which the SelectionDAG importer can't handle. To
583  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
584  // the value to use s64 types.
585 
586  // Custom legalization requires the instruction, if not deleted, must be fully
587  // legalized. In order to allow further legalization of the inst, we create
588  // a new instruction and erase the existing one.
589 
590  unsigned ValReg = MI.getOperand(0).getReg();
591  const LLT ValTy = MRI.getType(ValReg);
592 
593  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
594  ValTy.getElementType().getAddressSpace() != 0) {
595  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
596  return false;
597  }
598 
599  MIRBuilder.setInstr(MI);
600  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
601  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
602  auto &MMO = **MI.memoperands_begin();
603  if (MI.getOpcode() == TargetOpcode::G_STORE) {
604  auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
605  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
606  } else {
607  unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
608  auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
609  MIRBuilder.buildBitcast({ValReg}, {NewLoad});
610  }
611  MI.eraseFromParent();
612  return true;
613 }
614 
615 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
616  MachineRegisterInfo &MRI,
617  MachineIRBuilder &MIRBuilder) const {
618  MIRBuilder.setInstr(MI);
619  MachineFunction &MF = MIRBuilder.getMF();
620  unsigned Align = MI.getOperand(2).getImm();
621  unsigned Dst = MI.getOperand(0).getReg();
622  unsigned ListPtr = MI.getOperand(1).getReg();
623 
624  LLT PtrTy = MRI.getType(ListPtr);
625  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
626 
627  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
628  unsigned List = MRI.createGenericVirtualRegister(PtrTy);
629  MIRBuilder.buildLoad(
630  List, ListPtr,
632  PtrSize, /* Align = */ PtrSize));
633 
634  unsigned DstPtr;
635  if (Align > PtrSize) {
636  // Realign the list to the actual required alignment.
637  auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
638 
639  unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
640  MIRBuilder.buildGEP(ListTmp, List, AlignMinus1.getReg(0));
641 
642  DstPtr = MRI.createGenericVirtualRegister(PtrTy);
643  MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
644  } else
645  DstPtr = List;
646 
647  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
648  MIRBuilder.buildLoad(
649  Dst, DstPtr,
651  ValSize, std::max(Align, PtrSize)));
652 
653  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
654 
655  unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
656  MIRBuilder.buildGEP(NewList, DstPtr, Size.getReg(0));
657 
658  MIRBuilder.buildStore(
659  NewList, ListPtr,
661  PtrSize, /* Align = */ PtrSize));
662 
663  MI.eraseFromParent();
664  return true;
665 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
unsigned getReg() const
getReg - Returns the register number.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool isVector() const
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
This file declares the targeting of the Machinelegalizer class for AArch64.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
AArch64LegalizerInfo(const AArch64Subtarget &ST)
MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits)
Build and insert Res = G_PTR_MASK Op0, NumBits.
Helper class to build MachineInstr.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
unsigned getAddressSpace() const
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
The memory access writes data.
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
int64_t getImm() const
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isPointer() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:63
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
ArrayRef< LLT > Types
const NodeList & List
Definition: RDFGraph.cpp:201
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
const AArch64InstrInfo * getInstrInfo() const override
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
IRTranslator LLVM IR MI
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:544