LLVM  9.0.0svn
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "SIMachineFunctionInfo.h"
18 
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/Support/Debug.h"
25 
26 using namespace llvm;
27 using namespace LegalizeActions;
28 using namespace LegalizeMutations;
29 using namespace LegalityPredicates;
30 
31 
32 static LegalityPredicate isMultiple32(unsigned TypeIdx,
33  unsigned MaxSize = 512) {
34  return [=](const LegalityQuery &Query) {
35  const LLT Ty = Query.Types[TypeIdx];
36  const LLT EltTy = Ty.getScalarType();
37  return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
38  };
39 }
40 
41 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
42  return [=](const LegalityQuery &Query) {
43  const LLT Ty = Query.Types[TypeIdx];
44  return Ty.isVector() &&
45  Ty.getNumElements() % 2 != 0 &&
46  Ty.getElementType().getSizeInBits() < 32;
47  };
48 }
49 
50 static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
51  return [=](const LegalityQuery &Query) {
52  const LLT Ty = Query.Types[TypeIdx];
53  const LLT EltTy = Ty.getElementType();
54  return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
55  };
56 }
57 
58 static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
59  return [=](const LegalityQuery &Query) {
60  const LLT Ty = Query.Types[TypeIdx];
61  const LLT EltTy = Ty.getElementType();
62  unsigned Size = Ty.getSizeInBits();
63  unsigned Pieces = (Size + 63) / 64;
64  unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
65  return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
66  };
67 }
68 
69 static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
70  return [=](const LegalityQuery &Query) {
71  const LLT QueryTy = Query.Types[TypeIdx];
72  return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
73  };
74 }
75 
76 static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
77  return [=](const LegalityQuery &Query) {
78  const LLT QueryTy = Query.Types[TypeIdx];
79  return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
80  };
81 }
82 
84  const GCNTargetMachine &TM) {
85  using namespace TargetOpcode;
86 
87  auto GetAddrSpacePtr = [&TM](unsigned AS) {
88  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
89  };
90 
91  const LLT S1 = LLT::scalar(1);
92  const LLT S8 = LLT::scalar(8);
93  const LLT S16 = LLT::scalar(16);
94  const LLT S32 = LLT::scalar(32);
95  const LLT S64 = LLT::scalar(64);
96  const LLT S128 = LLT::scalar(128);
97  const LLT S256 = LLT::scalar(256);
98  const LLT S512 = LLT::scalar(512);
99 
100  const LLT V2S16 = LLT::vector(2, 16);
101  const LLT V4S16 = LLT::vector(4, 16);
102  const LLT V8S16 = LLT::vector(8, 16);
103 
104  const LLT V2S32 = LLT::vector(2, 32);
105  const LLT V3S32 = LLT::vector(3, 32);
106  const LLT V4S32 = LLT::vector(4, 32);
107  const LLT V5S32 = LLT::vector(5, 32);
108  const LLT V6S32 = LLT::vector(6, 32);
109  const LLT V7S32 = LLT::vector(7, 32);
110  const LLT V8S32 = LLT::vector(8, 32);
111  const LLT V9S32 = LLT::vector(9, 32);
112  const LLT V10S32 = LLT::vector(10, 32);
113  const LLT V11S32 = LLT::vector(11, 32);
114  const LLT V12S32 = LLT::vector(12, 32);
115  const LLT V13S32 = LLT::vector(13, 32);
116  const LLT V14S32 = LLT::vector(14, 32);
117  const LLT V15S32 = LLT::vector(15, 32);
118  const LLT V16S32 = LLT::vector(16, 32);
119 
120  const LLT V2S64 = LLT::vector(2, 64);
121  const LLT V3S64 = LLT::vector(3, 64);
122  const LLT V4S64 = LLT::vector(4, 64);
123  const LLT V5S64 = LLT::vector(5, 64);
124  const LLT V6S64 = LLT::vector(6, 64);
125  const LLT V7S64 = LLT::vector(7, 64);
126  const LLT V8S64 = LLT::vector(8, 64);
127 
128  std::initializer_list<LLT> AllS32Vectors =
129  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
130  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
131  std::initializer_list<LLT> AllS64Vectors =
132  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
133 
134  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
135  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
136  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
137  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
138  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
139 
140  const LLT CodePtr = FlatPtr;
141 
142  const std::initializer_list<LLT> AddrSpaces64 = {
143  GlobalPtr, ConstantPtr, FlatPtr
144  };
145 
146  const std::initializer_list<LLT> AddrSpaces32 = {
147  LocalPtr, PrivatePtr
148  };
149 
150  setAction({G_BRCOND, S1}, Legal);
151 
152  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
153  // elements for v3s16
154  getActionDefinitionsBuilder(G_PHI)
155  .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
156  .legalFor(AllS32Vectors)
157  .legalFor(AllS64Vectors)
158  .legalFor(AddrSpaces64)
159  .legalFor(AddrSpaces32)
160  .clampScalar(0, S32, S256)
161  .widenScalarToNextPow2(0, 32)
162  .legalIf(isPointer(0));
163 
164 
165  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UMULH, G_SMULH})
166  .legalFor({S32})
167  .clampScalar(0, S32, S32)
168  .scalarize(0);
169 
170  // Report legal for any types we can handle anywhere. For the cases only legal
171  // on the SALU, RegBankSelect will be able to re-legalize.
172  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
173  .legalFor({S32, S1, S64, V2S32, V2S16, V4S16})
174  .clampScalar(0, S32, S64)
175  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
176  .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
177  .scalarize(0);
178 
179  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
180  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
181  .legalFor({{S32, S1}})
182  .clampScalar(0, S32, S32);
183 
184  getActionDefinitionsBuilder(G_BITCAST)
185  .legalForCartesianProduct({S32, V2S16})
186  .legalForCartesianProduct({S64, V2S32, V4S16})
187  .legalForCartesianProduct({V2S64, V4S32})
188  // Don't worry about the size constraint.
189  .legalIf(all(isPointer(0), isPointer(1)));
190 
191  if (ST.has16BitInsts()) {
192  getActionDefinitionsBuilder(G_FCONSTANT)
193  .legalFor({S32, S64, S16})
194  .clampScalar(0, S16, S64);
195  } else {
196  getActionDefinitionsBuilder(G_FCONSTANT)
197  .legalFor({S32, S64})
198  .clampScalar(0, S32, S64);
199  }
200 
201  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
202  .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
203  ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
204  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
205  .clampScalarOrElt(0, S32, S512)
206  .legalIf(isMultiple32(0))
207  .widenScalarToNextPow2(0, 32);
208 
209 
210  // FIXME: i1 operands to intrinsics should always be legal, but other i1
211  // values may not be legal. We need to figure out how to distinguish
212  // between these two scenarios.
213  getActionDefinitionsBuilder(G_CONSTANT)
214  .legalFor({S1, S32, S64, GlobalPtr,
215  LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
216  .clampScalar(0, S32, S64)
217  .widenScalarToNextPow2(0)
218  .legalIf(isPointer(0));
219 
220  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
221 
222  auto &FPOpActions = getActionDefinitionsBuilder(
223  { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
224  .legalFor({S32, S64});
225 
226  if (ST.has16BitInsts()) {
227  if (ST.hasVOP3PInsts())
228  FPOpActions.legalFor({S16, V2S16});
229  else
230  FPOpActions.legalFor({S16});
231  }
232 
233  if (ST.hasVOP3PInsts())
234  FPOpActions.clampMaxNumElements(0, S16, 2);
235  FPOpActions
236  .scalarize(0)
237  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
238 
239  if (ST.has16BitInsts()) {
240  getActionDefinitionsBuilder(G_FSQRT)
241  .legalFor({S32, S64, S16})
242  .scalarize(0)
243  .clampScalar(0, S16, S64);
244  } else {
245  getActionDefinitionsBuilder(G_FSQRT)
246  .legalFor({S32, S64})
247  .scalarize(0)
248  .clampScalar(0, S32, S64);
249  }
250 
251  getActionDefinitionsBuilder(G_FPTRUNC)
252  .legalFor({{S32, S64}, {S16, S32}})
253  .scalarize(0);
254 
255  getActionDefinitionsBuilder(G_FPEXT)
256  .legalFor({{S64, S32}, {S32, S16}})
257  .lowerFor({{S64, S16}}) // FIXME: Implement
258  .scalarize(0);
259 
260  getActionDefinitionsBuilder(G_FSUB)
261  // Use actual fsub instruction
262  .legalFor({S32})
263  // Must use fadd + fneg
264  .lowerFor({S64, S16, V2S16})
265  .scalarize(0)
266  .clampScalar(0, S32, S64);
267 
268  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
269  .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
270  {S32, S1}, {S64, S1}, {S16, S1},
271  // FIXME: Hack
272  {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
273  .scalarize(0);
274 
275  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
276  .legalFor({{S32, S32}, {S64, S32}})
277  .scalarize(0);
278 
279  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
280  .legalFor({{S32, S32}, {S32, S64}})
281  .scalarize(0);
282 
283  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
284  .legalFor({S32, S64})
285  .scalarize(0);
286 
287 
288  getActionDefinitionsBuilder(G_GEP)
289  .legalForCartesianProduct(AddrSpaces64, {S64})
290  .legalForCartesianProduct(AddrSpaces32, {S32})
291  .scalarize(0);
292 
293  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
294 
295  getActionDefinitionsBuilder(G_ICMP)
296  .legalForCartesianProduct(
297  {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
298  .legalFor({{S1, S32}, {S1, S64}})
299  .widenScalarToNextPow2(1)
300  .clampScalar(1, S32, S64)
301  .scalarize(0)
302  .legalIf(all(typeIs(0, S1), isPointer(1)));
303 
304  getActionDefinitionsBuilder(G_FCMP)
305  .legalFor({{S1, S32}, {S1, S64}})
306  .widenScalarToNextPow2(1)
307  .clampScalar(1, S32, S64)
308  .scalarize(0);
309 
310  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
311  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
312  G_FLOG, G_FLOG2, G_FLOG10})
313  .legalFor({S32})
314  .scalarize(0);
315 
316  // The 64-bit versions produce 32-bit results, but only on the SALU.
317  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
318  G_CTTZ, G_CTTZ_ZERO_UNDEF,
319  G_CTPOP})
320  .legalFor({{S32, S32}, {S32, S64}})
321  .clampScalar(0, S32, S32)
322  .clampScalar(1, S32, S64)
323  .scalarize(0)
324  .widenScalarToNextPow2(0, 32)
325  .widenScalarToNextPow2(1, 32);
326 
327  // TODO: Expand for > s32
328  getActionDefinitionsBuilder(G_BSWAP)
329  .legalFor({S32})
330  .clampScalar(0, S32, S32)
331  .scalarize(0);
332 
333 
334  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
335  return [=](const LegalityQuery &Query) {
336  return Query.Types[TypeIdx0].getSizeInBits() <
337  Query.Types[TypeIdx1].getSizeInBits();
338  };
339  };
340 
341  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
342  return [=](const LegalityQuery &Query) {
343  return Query.Types[TypeIdx0].getSizeInBits() >
344  Query.Types[TypeIdx1].getSizeInBits();
345  };
346  };
347 
348  getActionDefinitionsBuilder(G_INTTOPTR)
349  // List the common cases
350  .legalForCartesianProduct(AddrSpaces64, {S64})
351  .legalForCartesianProduct(AddrSpaces32, {S32})
352  .scalarize(0)
353  // Accept any address space as long as the size matches
354  .legalIf(sameSize(0, 1))
355  .widenScalarIf(smallerThan(1, 0),
356  [](const LegalityQuery &Query) {
357  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
358  })
359  .narrowScalarIf(greaterThan(1, 0),
360  [](const LegalityQuery &Query) {
361  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
362  });
363 
364  getActionDefinitionsBuilder(G_PTRTOINT)
365  // List the common cases
366  .legalForCartesianProduct(AddrSpaces64, {S64})
367  .legalForCartesianProduct(AddrSpaces32, {S32})
368  .scalarize(0)
369  // Accept any address space as long as the size matches
370  .legalIf(sameSize(0, 1))
371  .widenScalarIf(smallerThan(0, 1),
372  [](const LegalityQuery &Query) {
373  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
374  })
375  .narrowScalarIf(
376  greaterThan(0, 1),
377  [](const LegalityQuery &Query) {
378  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
379  });
380 
381  if (ST.hasFlatAddressSpace()) {
382  getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
383  .scalarize(0)
384  .custom();
385  }
386 
387  getActionDefinitionsBuilder({G_LOAD, G_STORE})
388  .narrowScalarIf([](const LegalityQuery &Query) {
389  unsigned Size = Query.Types[0].getSizeInBits();
390  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
391  return (Size > 32 && MemSize < Size);
392  },
393  [](const LegalityQuery &Query) {
394  return std::make_pair(0, LLT::scalar(32));
395  })
396  .fewerElementsIf([=, &ST](const LegalityQuery &Query) {
397  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
398  return (MemSize == 96) &&
399  Query.Types[0].isVector() &&
401  },
402  [=](const LegalityQuery &Query) {
403  return std::make_pair(0, V2S32);
404  })
405  .legalIf([=, &ST](const LegalityQuery &Query) {
406  const LLT &Ty0 = Query.Types[0];
407 
408  unsigned Size = Ty0.getSizeInBits();
409  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
410  if (Size < 32 || (Size > 32 && MemSize < Size))
411  return false;
412 
413  if (Ty0.isVector() && Size != MemSize)
414  return false;
415 
416  // TODO: Decompose private loads into 4-byte components.
417  // TODO: Illegal flat loads on SI
418  switch (MemSize) {
419  case 8:
420  case 16:
421  return Size == 32;
422  case 32:
423  case 64:
424  case 128:
425  return true;
426 
427  case 96:
428  // XXX hasLoadX3
430 
431  case 256:
432  case 512:
433  // TODO: constant loads
434  default:
435  return false;
436  }
437  })
438  .clampScalar(0, S32, S64);
439 
440 
441  // FIXME: Handle alignment requirements.
442  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
443  .legalForTypesWithMemDesc({
444  {S32, GlobalPtr, 8, 8},
445  {S32, GlobalPtr, 16, 8},
446  {S32, LocalPtr, 8, 8},
447  {S32, LocalPtr, 16, 8},
448  {S32, PrivatePtr, 8, 8},
449  {S32, PrivatePtr, 16, 8}});
450  if (ST.hasFlatAddressSpace()) {
451  ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
452  {S32, FlatPtr, 16, 8}});
453  }
454 
455  ExtLoads.clampScalar(0, S32, S32)
456  .widenScalarToNextPow2(0)
457  .unsupportedIfMemSizeNotPow2()
458  .lower();
459 
460  auto &Atomics = getActionDefinitionsBuilder(
461  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
462  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
463  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
464  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
465  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
466  {S64, GlobalPtr}, {S64, LocalPtr}});
467  if (ST.hasFlatAddressSpace()) {
468  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
469  }
470 
471  // TODO: Pointer types, any 32-bit or 64-bit vector
472  getActionDefinitionsBuilder(G_SELECT)
473  .legalForCartesianProduct({S32, S64, V2S32, V2S16, V4S16,
474  GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
475  LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
476  .clampScalar(0, S32, S64)
477  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
478  .fewerElementsIf(numElementsNotEven(0), scalarize(0))
479  .scalarize(1)
480  .clampMaxNumElements(0, S32, 2)
481  .clampMaxNumElements(0, LocalPtr, 2)
482  .clampMaxNumElements(0, PrivatePtr, 2)
483  .scalarize(0)
484  .legalIf(all(isPointer(0), typeIs(1, S1)));
485 
486  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
487  // be more flexible with the shift amount type.
488  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
489  .legalFor({{S32, S32}, {S64, S32}});
490  if (ST.has16BitInsts()) {
491  if (ST.hasVOP3PInsts()) {
492  Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
493  .clampMaxNumElements(0, S16, 2);
494  } else
495  Shifts.legalFor({{S16, S32}, {S16, S16}});
496 
497  Shifts.clampScalar(1, S16, S32);
498  Shifts.clampScalar(0, S16, S64);
499  Shifts.widenScalarToNextPow2(0, 16);
500  } else {
501  // Make sure we legalize the shift amount type first, as the general
502  // expansion for the shifted type will produce much worse code if it hasn't
503  // been truncated already.
504  Shifts.clampScalar(1, S32, S32);
505  Shifts.clampScalar(0, S32, S64);
506  Shifts.widenScalarToNextPow2(0, 32);
507  }
508  Shifts.scalarize(0);
509 
510  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
511  unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
512  unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
513  unsigned IdxTypeIdx = 2;
514 
515  getActionDefinitionsBuilder(Op)
516  .legalIf([=](const LegalityQuery &Query) {
517  const LLT &VecTy = Query.Types[VecTypeIdx];
518  const LLT &IdxTy = Query.Types[IdxTypeIdx];
519  return VecTy.getSizeInBits() % 32 == 0 &&
520  VecTy.getSizeInBits() <= 512 &&
521  IdxTy.getSizeInBits() == 32;
522  })
523  .clampScalar(EltTypeIdx, S32, S64)
524  .clampScalar(VecTypeIdx, S32, S64)
525  .clampScalar(IdxTypeIdx, S32, S32);
526  }
527 
528  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
529  .unsupportedIf([=](const LegalityQuery &Query) {
530  const LLT &EltTy = Query.Types[1].getElementType();
531  return Query.Types[0] != EltTy;
532  });
533 
534  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
535  unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
536  unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
537 
538  // FIXME: Doesn't handle extract of illegal sizes.
539  getActionDefinitionsBuilder(Op)
540  .legalIf([=](const LegalityQuery &Query) {
541  const LLT BigTy = Query.Types[BigTyIdx];
542  const LLT LitTy = Query.Types[LitTyIdx];
543  return (BigTy.getSizeInBits() % 32 == 0) &&
544  (LitTy.getSizeInBits() % 16 == 0);
545  })
546  .widenScalarIf(
547  [=](const LegalityQuery &Query) {
548  const LLT BigTy = Query.Types[BigTyIdx];
549  return (BigTy.getScalarSizeInBits() < 16);
550  },
552  .widenScalarIf(
553  [=](const LegalityQuery &Query) {
554  const LLT LitTy = Query.Types[LitTyIdx];
555  return (LitTy.getScalarSizeInBits() < 16);
556  },
558  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx));
559  }
560 
561  // TODO: vectors of pointers
562  getActionDefinitionsBuilder(G_BUILD_VECTOR)
563  .legalForCartesianProduct(AllS32Vectors, {S32})
564  .legalForCartesianProduct(AllS64Vectors, {S64})
565  .clampNumElements(0, V16S32, V16S32)
566  .clampNumElements(0, V2S64, V8S64)
567  .minScalarSameAs(1, 0)
568  // FIXME: Sort of a hack to make progress on other legalizations.
569  .legalIf([=](const LegalityQuery &Query) {
570  return Query.Types[0].getScalarSizeInBits() <= 32 ||
571  Query.Types[0].getScalarSizeInBits() == 64;
572  });
573 
574  // TODO: Support any combination of v2s32
575  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
576  .legalFor({{V4S32, V2S32},
577  {V8S32, V2S32},
578  {V8S32, V4S32},
579  {V4S64, V2S64},
580  {V4S16, V2S16},
581  {V8S16, V2S16},
582  {V8S16, V4S16},
583  {LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)},
584  {LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}});
585 
586  // Merge/Unmerge
587  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
588  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
589  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
590 
591  auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
592  const LLT &Ty = Query.Types[TypeIdx];
593  if (Ty.isVector()) {
594  const LLT &EltTy = Ty.getElementType();
595  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
596  return true;
597  if (!isPowerOf2_32(EltTy.getSizeInBits()))
598  return true;
599  }
600  return false;
601  };
602 
603  getActionDefinitionsBuilder(Op)
604  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
605  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
606  // worth considering the multiples of 64 since 2*192 and 2*384 are not
607  // valid.
608  .clampScalar(LitTyIdx, S16, S256)
609  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
610 
611  // Break up vectors with weird elements into scalars
612  .fewerElementsIf(
613  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
614  scalarize(0))
615  .fewerElementsIf(
616  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
617  scalarize(1))
618  .clampScalar(BigTyIdx, S32, S512)
619  .widenScalarIf(
620  [=](const LegalityQuery &Query) {
621  const LLT &Ty = Query.Types[BigTyIdx];
622  return !isPowerOf2_32(Ty.getSizeInBits()) &&
623  Ty.getSizeInBits() % 16 != 0;
624  },
625  [=](const LegalityQuery &Query) {
626  // Pick the next power of 2, or a multiple of 64 over 128.
627  // Whichever is smaller.
628  const LLT &Ty = Query.Types[BigTyIdx];
629  unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
630  if (NewSizeInBits >= 256) {
631  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
632  if (RoundedTo < NewSizeInBits)
633  NewSizeInBits = RoundedTo;
634  }
635  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
636  })
637  .legalIf([=](const LegalityQuery &Query) {
638  const LLT &BigTy = Query.Types[BigTyIdx];
639  const LLT &LitTy = Query.Types[LitTyIdx];
640 
641  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
642  return false;
643  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
644  return false;
645 
646  return BigTy.getSizeInBits() % 16 == 0 &&
647  LitTy.getSizeInBits() % 16 == 0 &&
648  BigTy.getSizeInBits() <= 512;
649  })
650  // Any vectors left are the wrong size. Scalarize them.
651  .scalarize(0)
652  .scalarize(1);
653  }
654 
655  computeTables();
656  verify(*ST.getInstrInfo());
657 }
658 
661  MachineIRBuilder &MIRBuilder,
662  GISelChangeObserver &Observer) const {
663  switch (MI.getOpcode()) {
664  case TargetOpcode::G_ADDRSPACE_CAST:
665  return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
666  default:
667  return false;
668  }
669 
670  llvm_unreachable("expected switch to return");
671 }
672 
674  unsigned AS,
676  MachineIRBuilder &MIRBuilder) const {
677  MachineFunction &MF = MIRBuilder.getMF();
678  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
679  const LLT S32 = LLT::scalar(32);
680 
681  if (ST.hasApertureRegs()) {
682  // FIXME: Use inline constants (src_{shared, private}_base) instead of
683  // getreg.
684  unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
687  unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
690  unsigned Encoding =
692  Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
694 
695  unsigned ShiftAmt = MRI.createGenericVirtualRegister(S32);
696  unsigned ApertureReg = MRI.createGenericVirtualRegister(S32);
697  unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
698 
699  MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
700  .addDef(GetReg)
701  .addImm(Encoding);
702  MRI.setType(GetReg, S32);
703 
704  MIRBuilder.buildConstant(ShiftAmt, WidthM1 + 1);
705  MIRBuilder.buildInstr(TargetOpcode::G_SHL)
706  .addDef(ApertureReg)
707  .addUse(GetReg)
708  .addUse(ShiftAmt);
709 
710  return ApertureReg;
711  }
712 
713  unsigned QueuePtr = MRI.createGenericVirtualRegister(
715 
716  // FIXME: Placeholder until we can track the input registers.
717  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
718 
719  // Offset into amd_queue_t for group_segment_aperture_base_hi /
720  // private_segment_aperture_base_hi.
721  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
722 
723  // FIXME: Don't use undef
727 
728  MachinePointerInfo PtrInfo(V, StructOffset);
730  PtrInfo,
734  4,
735  MinAlign(64, StructOffset));
736 
737  unsigned LoadResult = MRI.createGenericVirtualRegister(S32);
738  unsigned LoadAddr = AMDGPU::NoRegister;
739 
740  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
741  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
742  return LoadResult;
743 }
744 
747  MachineIRBuilder &MIRBuilder) const {
748  MachineFunction &MF = MIRBuilder.getMF();
749 
750  MIRBuilder.setInstr(MI);
751 
752  unsigned Dst = MI.getOperand(0).getReg();
753  unsigned Src = MI.getOperand(1).getReg();
754 
755  LLT DstTy = MRI.getType(Dst);
756  LLT SrcTy = MRI.getType(Src);
757  unsigned DestAS = DstTy.getAddressSpace();
758  unsigned SrcAS = SrcTy.getAddressSpace();
759 
760  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
761  // vector element.
762  assert(!DstTy.isVector());
763 
764  const AMDGPUTargetMachine &TM
765  = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
766 
767  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
768  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
769  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
770  return true;
771  }
772 
773  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
774  assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
775  DestAS == AMDGPUAS::PRIVATE_ADDRESS);
776  unsigned NullVal = TM.getNullPointerValue(DestAS);
777 
778  unsigned SegmentNullReg = MRI.createGenericVirtualRegister(DstTy);
779  unsigned FlatNullReg = MRI.createGenericVirtualRegister(SrcTy);
780 
781  MIRBuilder.buildConstant(SegmentNullReg, NullVal);
782  MIRBuilder.buildConstant(FlatNullReg, 0);
783 
784  unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
785 
786  // Extract low 32-bits of the pointer.
787  MIRBuilder.buildExtract(PtrLo32, Src, 0);
788 
789  unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
790  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNullReg);
791  MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNullReg);
792 
793  MI.eraseFromParent();
794  return true;
795  }
796 
797  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
798  SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
799 
800  unsigned FlatNullReg = MRI.createGenericVirtualRegister(DstTy);
801  unsigned SegmentNullReg = MRI.createGenericVirtualRegister(SrcTy);
802  MIRBuilder.buildConstant(SegmentNullReg, TM.getNullPointerValue(SrcAS));
803  MIRBuilder.buildConstant(FlatNullReg, TM.getNullPointerValue(DestAS));
804 
805  unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
806 
807  unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
808  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNullReg);
809 
810  unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy);
811 
812  // Coerce the type of the low half of the result so we can use merge_values.
813  unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
814  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
815  .addDef(SrcAsInt)
816  .addUse(Src);
817 
818  // TODO: Should we allow mismatched types but matching sizes in merges to
819  // avoid the ptrtoint?
820  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
821  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNullReg);
822 
823  MI.eraseFromParent();
824  return true;
825 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
bool hasApertureRegs() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getScalarSizeInBits() const
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx)
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
unsigned getReg() const
getReg - Returns the register number.
Address space for private memory.
Definition: AMDGPU.h:256
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
LLT getScalarType() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
static LegalizeMutation oneMoreElement(unsigned TypeIdx)
LegalityPredicate isPointer(unsigned TypeIdx)
True iff the specified type index is a pointer (with any address space).
bool hasVOP3PInsts() const
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
bool isVector() const
A description of a memory reference used in the backend.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
Address space for local memory.
Definition: AMDGPU.h:255
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
unsigned getPointerSizeInBits(unsigned AS) const
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
bool has16BitInsts() const
Helper class to build MachineInstr.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:251
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size)
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Generation getGeneration() const
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
unsigned getAddressSpace() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
std::function< std::pair< unsigned, LLT >(const LegalityQuery &)> LegalizeMutation
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
static LegalityPredicate isMultiple32(unsigned TypeIdx, unsigned MaxSize=512)
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
bool hasFlatAddressSpace() const
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
Address space for constant memory (VTX2)
Definition: AMDGPU.h:254
Address space for flat memory.
Definition: AMDGPU.h:250
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:63
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx)
ArrayRef< LLT > Types
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
The memory access always returns the same value (or traps).
uint32_t Size
Definition: Profile.cpp:46
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
Optional< MachineInstrBuilder > materializeGEP(unsigned &Res, unsigned Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
IRTranslator LLVM IR MI
std::function< bool(const LegalityQuery &)> LegalityPredicate
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
static LegalityPredicate numElementsNotEven(unsigned TypeIdx)
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
const SITargetLowering * getTargetLowering() const override
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the specified type indices are both the same bit size.