LLVM  10.0.0svn
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #if defined(_MSC_VER) || defined(__MINGW32__)
15 // According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI
16 // from the Visual C++ cmath / math.h headers:
17 // https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019
18 #define _USE_MATH_DEFINES
19 #endif
20 
21 #include "AMDGPU.h"
22 #include "AMDGPULegalizerInfo.h"
23 #include "AMDGPUTargetMachine.h"
24 #include "SIMachineFunctionInfo.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
33 
34 #define DEBUG_TYPE "amdgpu-legalinfo"
35 
36 using namespace llvm;
37 using namespace LegalizeActions;
38 using namespace LegalizeMutations;
39 using namespace LegalityPredicates;
40 
41 
42 static LegalityPredicate isMultiple32(unsigned TypeIdx,
43  unsigned MaxSize = 512) {
44  return [=](const LegalityQuery &Query) {
45  const LLT Ty = Query.Types[TypeIdx];
46  const LLT EltTy = Ty.getScalarType();
47  return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
48  };
49 }
50 
51 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
52  return [=](const LegalityQuery &Query) {
53  const LLT Ty = Query.Types[TypeIdx];
54  return Ty.isVector() &&
55  Ty.getNumElements() % 2 != 0 &&
56  Ty.getElementType().getSizeInBits() < 32;
57  };
58 }
59 
60 static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
61  return [=](const LegalityQuery &Query) {
62  const LLT Ty = Query.Types[TypeIdx];
63  const LLT EltTy = Ty.getElementType();
64  return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
65  };
66 }
67 
68 static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
69  return [=](const LegalityQuery &Query) {
70  const LLT Ty = Query.Types[TypeIdx];
71  const LLT EltTy = Ty.getElementType();
72  unsigned Size = Ty.getSizeInBits();
73  unsigned Pieces = (Size + 63) / 64;
74  unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
75  return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
76  };
77 }
78 
79 // Increase the number of vector elements to reach the next multiple of 32-bit
80 // type.
81 static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
82  return [=](const LegalityQuery &Query) {
83  const LLT Ty = Query.Types[TypeIdx];
84 
85  const LLT EltTy = Ty.getElementType();
86  const int Size = Ty.getSizeInBits();
87  const int EltSize = EltTy.getSizeInBits();
88  const int NextMul32 = (Size + 31) / 32;
89 
90  assert(EltSize < 32);
91 
92  const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize;
93  return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy));
94  };
95 }
96 
97 static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
98  return [=](const LegalityQuery &Query) {
99  const LLT QueryTy = Query.Types[TypeIdx];
100  return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
101  };
102 }
103 
104 static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
105  return [=](const LegalityQuery &Query) {
106  const LLT QueryTy = Query.Types[TypeIdx];
107  return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
108  };
109 }
110 
111 static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
112  return [=](const LegalityQuery &Query) {
113  const LLT QueryTy = Query.Types[TypeIdx];
114  return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
115  };
116 }
117 
118 // Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
119 // v2s16.
120 static LegalityPredicate isRegisterType(unsigned TypeIdx) {
121  return [=](const LegalityQuery &Query) {
122  const LLT Ty = Query.Types[TypeIdx];
123  if (Ty.isVector()) {
124  const int EltSize = Ty.getElementType().getSizeInBits();
125  return EltSize == 32 || EltSize == 64 ||
126  (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
127  EltSize == 128 || EltSize == 256;
128  }
129 
130  return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
131  };
132 }
133 
134 static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
135  return [=](const LegalityQuery &Query) {
136  return Query.Types[TypeIdx].getElementType() == Type;
137  };
138 }
139 
140 static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
141  return [=](const LegalityQuery &Query) {
142  const LLT Ty = Query.Types[TypeIdx];
143  return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
144  Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits();
145  };
146 }
147 
149  const GCNTargetMachine &TM)
150  : ST(ST_) {
151  using namespace TargetOpcode;
152 
153  auto GetAddrSpacePtr = [&TM](unsigned AS) {
154  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
155  };
156 
157  const LLT S1 = LLT::scalar(1);
158  const LLT S8 = LLT::scalar(8);
159  const LLT S16 = LLT::scalar(16);
160  const LLT S32 = LLT::scalar(32);
161  const LLT S64 = LLT::scalar(64);
162  const LLT S96 = LLT::scalar(96);
163  const LLT S128 = LLT::scalar(128);
164  const LLT S256 = LLT::scalar(256);
165  const LLT S512 = LLT::scalar(512);
166 
167  const LLT V2S16 = LLT::vector(2, 16);
168  const LLT V4S16 = LLT::vector(4, 16);
169 
170  const LLT V2S32 = LLT::vector(2, 32);
171  const LLT V3S32 = LLT::vector(3, 32);
172  const LLT V4S32 = LLT::vector(4, 32);
173  const LLT V5S32 = LLT::vector(5, 32);
174  const LLT V6S32 = LLT::vector(6, 32);
175  const LLT V7S32 = LLT::vector(7, 32);
176  const LLT V8S32 = LLT::vector(8, 32);
177  const LLT V9S32 = LLT::vector(9, 32);
178  const LLT V10S32 = LLT::vector(10, 32);
179  const LLT V11S32 = LLT::vector(11, 32);
180  const LLT V12S32 = LLT::vector(12, 32);
181  const LLT V13S32 = LLT::vector(13, 32);
182  const LLT V14S32 = LLT::vector(14, 32);
183  const LLT V15S32 = LLT::vector(15, 32);
184  const LLT V16S32 = LLT::vector(16, 32);
185 
186  const LLT V2S64 = LLT::vector(2, 64);
187  const LLT V3S64 = LLT::vector(3, 64);
188  const LLT V4S64 = LLT::vector(4, 64);
189  const LLT V5S64 = LLT::vector(5, 64);
190  const LLT V6S64 = LLT::vector(6, 64);
191  const LLT V7S64 = LLT::vector(7, 64);
192  const LLT V8S64 = LLT::vector(8, 64);
193 
194  std::initializer_list<LLT> AllS32Vectors =
195  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
196  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
197  std::initializer_list<LLT> AllS64Vectors =
198  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
199 
200  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
201  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
202  const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT);
203  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
204  const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
205  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
206  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
207 
208  const LLT CodePtr = FlatPtr;
209 
210  const std::initializer_list<LLT> AddrSpaces64 = {
211  GlobalPtr, ConstantPtr, FlatPtr
212  };
213 
214  const std::initializer_list<LLT> AddrSpaces32 = {
215  LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
216  };
217 
218  const std::initializer_list<LLT> FPTypesBase = {
219  S32, S64
220  };
221 
222  const std::initializer_list<LLT> FPTypes16 = {
223  S32, S64, S16
224  };
225 
226  const std::initializer_list<LLT> FPTypesPK16 = {
227  S32, S64, S16, V2S16
228  };
229 
230  setAction({G_BRCOND, S1}, Legal);
231 
232  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
233  // elements for v3s16
235  .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
236  .legalFor(AllS32Vectors)
237  .legalFor(AllS64Vectors)
238  .legalFor(AddrSpaces64)
239  .legalFor(AddrSpaces32)
240  .clampScalar(0, S32, S256)
241  .widenScalarToNextPow2(0, 32)
242  .clampMaxNumElements(0, S32, 16)
244  .legalIf(isPointer(0));
245 
246  if (ST.has16BitInsts()) {
247  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
248  .legalFor({S32, S16})
249  .clampScalar(0, S16, S32)
250  .scalarize(0);
251  } else {
252  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
253  .legalFor({S32})
254  .clampScalar(0, S32, S32)
255  .scalarize(0);
256  }
257 
258  getActionDefinitionsBuilder({G_UMULH, G_SMULH})
259  .legalFor({S32})
260  .clampScalar(0, S32, S32)
261  .scalarize(0);
262 
263  // Report legal for any types we can handle anywhere. For the cases only legal
264  // on the SALU, RegBankSelect will be able to re-legalize.
265  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
266  .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
267  .clampScalar(0, S32, S64)
271  .scalarize(0);
272 
273  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
274  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
275  .legalFor({{S32, S1}})
276  .clampScalar(0, S32, S32);
277 
278  getActionDefinitionsBuilder(G_BITCAST)
279  .legalForCartesianProduct({S32, V2S16})
280  .legalForCartesianProduct({S64, V2S32, V4S16})
281  .legalForCartesianProduct({V2S64, V4S32})
282  // Don't worry about the size constraint.
283  .legalIf(all(isPointer(0), isPointer(1)))
284  // FIXME: Testing hack
285  .legalForCartesianProduct({S16, LLT::vector(2, 8), });
286 
287  getActionDefinitionsBuilder(G_FCONSTANT)
288  .legalFor({S32, S64, S16})
289  .clampScalar(0, S16, S64);
290 
291  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
292  .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
293  ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
294  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
295  .clampScalarOrElt(0, S32, S512)
296  .legalIf(isMultiple32(0))
297  .widenScalarToNextPow2(0, 32)
298  .clampMaxNumElements(0, S32, 16);
299 
300 
301  // FIXME: i1 operands to intrinsics should always be legal, but other i1
302  // values may not be legal. We need to figure out how to distinguish
303  // between these two scenarios.
304  getActionDefinitionsBuilder(G_CONSTANT)
305  .legalFor({S1, S32, S64, S16, GlobalPtr,
306  LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
307  .clampScalar(0, S32, S64)
309  .legalIf(isPointer(0));
310 
311  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
312  getActionDefinitionsBuilder(G_GLOBAL_VALUE).customFor({LocalPtr});
313 
314 
315  auto &FPOpActions = getActionDefinitionsBuilder(
316  { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
317  .legalFor({S32, S64});
318  auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
319  .customFor({S32, S64});
320 
321  if (ST.has16BitInsts()) {
322  if (ST.hasVOP3PInsts())
323  FPOpActions.legalFor({S16, V2S16});
324  else
325  FPOpActions.legalFor({S16});
326 
327  TrigActions.customFor({S16});
328  }
329 
330  auto &MinNumMaxNum = getActionDefinitionsBuilder({
331  G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
332 
333  if (ST.hasVOP3PInsts()) {
334  MinNumMaxNum.customFor(FPTypesPK16)
335  .clampMaxNumElements(0, S16, 2)
336  .clampScalar(0, S16, S64)
337  .scalarize(0);
338  } else if (ST.has16BitInsts()) {
339  MinNumMaxNum.customFor(FPTypes16)
340  .clampScalar(0, S16, S64)
341  .scalarize(0);
342  } else {
343  MinNumMaxNum.customFor(FPTypesBase)
344  .clampScalar(0, S32, S64)
345  .scalarize(0);
346  }
347 
348  if (ST.hasVOP3PInsts())
349  FPOpActions.clampMaxNumElements(0, S16, 2);
350 
351  FPOpActions
352  .scalarize(0)
353  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
354 
355  TrigActions
356  .scalarize(0)
357  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
358 
359  getActionDefinitionsBuilder({G_FNEG, G_FABS})
360  .legalFor(FPTypesPK16)
361  .clampMaxNumElements(0, S16, 2)
362  .scalarize(0)
363  .clampScalar(0, S16, S64);
364 
365  // TODO: Implement
366  getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
367 
368  if (ST.has16BitInsts()) {
369  getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
370  .legalFor({S32, S64, S16})
371  .scalarize(0)
372  .clampScalar(0, S16, S64);
373  } else {
374  getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
375  .legalFor({S32, S64})
376  .scalarize(0)
377  .clampScalar(0, S32, S64);
378  }
379 
380  getActionDefinitionsBuilder(G_FPTRUNC)
381  .legalFor({{S32, S64}, {S16, S32}})
382  .scalarize(0);
383 
385  .legalFor({{S64, S32}, {S32, S16}})
386  .lowerFor({{S64, S16}}) // FIXME: Implement
387  .scalarize(0);
388 
389  // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
390  getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
391 
393  // Use actual fsub instruction
394  .legalFor({S32})
395  // Must use fadd + fneg
396  .lowerFor({S64, S16, V2S16})
397  .scalarize(0)
398  .clampScalar(0, S32, S64);
399 
400  // Whether this is legal depends on the floating point mode for the function.
401  auto &FMad = getActionDefinitionsBuilder(G_FMAD);
402  if (ST.hasMadF16())
403  FMad.customFor({S32, S16});
404  else
405  FMad.customFor({S32});
406  FMad.scalarize(0)
407  .lower();
408 
409  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
410  .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
411  {S32, S1}, {S64, S1}, {S16, S1},
412  {S96, S32},
413  // FIXME: Hack
414  {S64, LLT::scalar(33)},
415  {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
416  .scalarize(0);
417 
418  // TODO: Legal for s1->s64, requires split for VALU.
419  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
420  .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}})
421  .lowerFor({{S32, S64}})
422  .customFor({{S64, S64}})
423  .scalarize(0);
424 
425  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
426  .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
427  .scalarize(0);
428 
429  getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
430  .legalFor({S32, S64})
431  .scalarize(0);
432 
433  if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
434  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
435  .legalFor({S32, S64})
436  .clampScalar(0, S32, S64)
437  .scalarize(0);
438  } else {
439  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
440  .legalFor({S32})
441  .customFor({S64})
442  .clampScalar(0, S32, S64)
443  .scalarize(0);
444  }
445 
447  .legalForCartesianProduct(AddrSpaces64, {S64})
448  .legalForCartesianProduct(AddrSpaces32, {S32})
449  .scalarize(0);
450 
451  getActionDefinitionsBuilder(G_PTR_MASK)
452  .scalarize(0)
453  .alwaysLegal();
454 
455  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
456 
457  auto &CmpBuilder =
460  {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
461  .legalFor({{S1, S32}, {S1, S64}});
462  if (ST.has16BitInsts()) {
463  CmpBuilder.legalFor({{S1, S16}});
464  }
465 
466  CmpBuilder
468  .clampScalar(1, S32, S64)
469  .scalarize(0)
470  .legalIf(all(typeIs(0, S1), isPointer(1)));
471 
473  .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
474  .widenScalarToNextPow2(1)
475  .clampScalar(1, S32, S64)
476  .scalarize(0);
477 
478  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
479  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
480  G_FLOG, G_FLOG2, G_FLOG10})
481  .legalFor({S32})
482  .scalarize(0);
483 
484  // The 64-bit versions produce 32-bit results, but only on the SALU.
485  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
486  G_CTTZ, G_CTTZ_ZERO_UNDEF,
487  G_CTPOP})
488  .legalFor({{S32, S32}, {S32, S64}})
489  .clampScalar(0, S32, S32)
490  .clampScalar(1, S32, S64)
491  .scalarize(0)
492  .widenScalarToNextPow2(0, 32)
493  .widenScalarToNextPow2(1, 32);
494 
495  // TODO: Expand for > s32
496  getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
497  .legalFor({S32})
498  .clampScalar(0, S32, S32)
499  .scalarize(0);
500 
501  if (ST.has16BitInsts()) {
502  if (ST.hasVOP3PInsts()) {
503  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
504  .legalFor({S32, S16, V2S16})
505  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
506  .clampMaxNumElements(0, S16, 2)
507  .clampScalar(0, S16, S32)
509  .scalarize(0);
510  } else {
511  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
512  .legalFor({S32, S16})
513  .widenScalarToNextPow2(0)
514  .clampScalar(0, S16, S32)
515  .scalarize(0);
516  }
517  } else {
518  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
519  .legalFor({S32})
520  .clampScalar(0, S32, S32)
522  .scalarize(0);
523  }
524 
525  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
526  return [=](const LegalityQuery &Query) {
527  return Query.Types[TypeIdx0].getSizeInBits() <
528  Query.Types[TypeIdx1].getSizeInBits();
529  };
530  };
531 
532  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
533  return [=](const LegalityQuery &Query) {
534  return Query.Types[TypeIdx0].getSizeInBits() >
535  Query.Types[TypeIdx1].getSizeInBits();
536  };
537  };
538 
539  getActionDefinitionsBuilder(G_INTTOPTR)
540  // List the common cases
541  .legalForCartesianProduct(AddrSpaces64, {S64})
542  .legalForCartesianProduct(AddrSpaces32, {S32})
543  .scalarize(0)
544  // Accept any address space as long as the size matches
545  .legalIf(sameSize(0, 1))
546  .widenScalarIf(smallerThan(1, 0),
547  [](const LegalityQuery &Query) {
548  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
549  })
550  .narrowScalarIf(greaterThan(1, 0),
551  [](const LegalityQuery &Query) {
552  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
553  });
554 
555  getActionDefinitionsBuilder(G_PTRTOINT)
556  // List the common cases
557  .legalForCartesianProduct(AddrSpaces64, {S64})
558  .legalForCartesianProduct(AddrSpaces32, {S32})
559  .scalarize(0)
560  // Accept any address space as long as the size matches
561  .legalIf(sameSize(0, 1))
562  .widenScalarIf(smallerThan(0, 1),
563  [](const LegalityQuery &Query) {
564  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
565  })
566  .narrowScalarIf(
567  greaterThan(0, 1),
568  [](const LegalityQuery &Query) {
569  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
570  });
571 
572  getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
573  .scalarize(0)
574  .custom();
575 
576  // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
577  // handle some operations by just promoting the register during
578  // selection. There are also d16 loads on GFX9+ which preserve the high bits.
579  auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
580  switch (AS) {
581  // FIXME: Private element size.
583  return 32;
584  // FIXME: Check subtarget
586  return ST.useDS128() ? 128 : 64;
587 
588  // Treat constant and global as identical. SMRD loads are sometimes usable
589  // for global loads (ideally constant address space should be eliminated)
590  // depending on the context. Legality cannot be context dependent, but
591  // RegBankSelect can split the load as necessary depending on the pointer
592  // register bank/uniformity and if the memory is invariant or not written in
593  // a kernel.
596  return 512;
597  default:
598  return 128;
599  }
600  };
601 
602  const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
603  const LLT DstTy = Query.Types[0];
604 
605  // Split vector extloads.
606  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
607  if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
608  return true;
609 
610  const LLT PtrTy = Query.Types[1];
611  unsigned AS = PtrTy.getAddressSpace();
612  if (MemSize > maxSizeForAddrSpace(AS))
613  return true;
614 
615  // Catch weird sized loads that don't evenly divide into the access sizes
616  // TODO: May be able to widen depending on alignment etc.
617  unsigned NumRegs = MemSize / 32;
618  if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
619  return true;
620 
621  unsigned Align = Query.MMODescrs[0].AlignInBits;
622  if (Align < MemSize) {
623  const SITargetLowering *TLI = ST.getTargetLowering();
624  return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
625  }
626 
627  return false;
628  };
629 
630  unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
631  unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
632  unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
633 
634  // TODO: Refine based on subtargets which support unaligned access or 128-bit
635  // LDS
636  // TODO: Unsupported flat for SI.
637 
638  for (unsigned Op : {G_LOAD, G_STORE}) {
639  const bool IsStore = Op == G_STORE;
640 
641  auto &Actions = getActionDefinitionsBuilder(Op);
642  // Whitelist the common cases.
643  // TODO: Pointer loads
644  // TODO: Wide constant loads
645  // TODO: Only CI+ has 3x loads
646  // TODO: Loads to s16 on gfx9
647  Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
648  {V2S32, GlobalPtr, 64, GlobalAlign32},
649  {V3S32, GlobalPtr, 96, GlobalAlign32},
650  {S96, GlobalPtr, 96, GlobalAlign32},
651  {V4S32, GlobalPtr, 128, GlobalAlign32},
652  {S128, GlobalPtr, 128, GlobalAlign32},
653  {S64, GlobalPtr, 64, GlobalAlign32},
654  {V2S64, GlobalPtr, 128, GlobalAlign32},
655  {V2S16, GlobalPtr, 32, GlobalAlign32},
656  {S32, GlobalPtr, 8, GlobalAlign8},
657  {S32, GlobalPtr, 16, GlobalAlign16},
658 
659  {S32, LocalPtr, 32, 32},
660  {S64, LocalPtr, 64, 32},
661  {V2S32, LocalPtr, 64, 32},
662  {S32, LocalPtr, 8, 8},
663  {S32, LocalPtr, 16, 16},
664  {V2S16, LocalPtr, 32, 32},
665 
666  {S32, PrivatePtr, 32, 32},
667  {S32, PrivatePtr, 8, 8},
668  {S32, PrivatePtr, 16, 16},
669  {V2S16, PrivatePtr, 32, 32},
670 
671  {S32, FlatPtr, 32, GlobalAlign32},
672  {S32, FlatPtr, 16, GlobalAlign16},
673  {S32, FlatPtr, 8, GlobalAlign8},
674  {V2S16, FlatPtr, 32, GlobalAlign32},
675 
676  {S32, ConstantPtr, 32, GlobalAlign32},
677  {V2S32, ConstantPtr, 64, GlobalAlign32},
678  {V3S32, ConstantPtr, 96, GlobalAlign32},
679  {V4S32, ConstantPtr, 128, GlobalAlign32},
680  {S64, ConstantPtr, 64, GlobalAlign32},
681  {S128, ConstantPtr, 128, GlobalAlign32},
682  {V2S32, ConstantPtr, 32, GlobalAlign32}});
683  Actions
684  .customIf(typeIs(1, Constant32Ptr))
685  .narrowScalarIf(
686  [=](const LegalityQuery &Query) -> bool {
687  return !Query.Types[0].isVector() && needToSplitLoad(Query);
688  },
689  [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
690  const LLT DstTy = Query.Types[0];
691  const LLT PtrTy = Query.Types[1];
692 
693  const unsigned DstSize = DstTy.getSizeInBits();
694  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
695 
696  // Split extloads.
697  if (DstSize > MemSize)
698  return std::make_pair(0, LLT::scalar(MemSize));
699 
700  if (DstSize > 32 && (DstSize % 32 != 0)) {
701  // FIXME: Need a way to specify non-extload of larger size if
702  // suitably aligned.
703  return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
704  }
705 
706  unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
707  if (MemSize > MaxSize)
708  return std::make_pair(0, LLT::scalar(MaxSize));
709 
710  unsigned Align = Query.MMODescrs[0].AlignInBits;
711  return std::make_pair(0, LLT::scalar(Align));
712  })
713  .fewerElementsIf(
714  [=](const LegalityQuery &Query) -> bool {
715  return Query.Types[0].isVector() && needToSplitLoad(Query);
716  },
717  [=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
718  const LLT DstTy = Query.Types[0];
719  const LLT PtrTy = Query.Types[1];
720 
721  LLT EltTy = DstTy.getElementType();
722  unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
723 
724  // Split if it's too large for the address space.
725  if (Query.MMODescrs[0].SizeInBits > MaxSize) {
726  unsigned NumElts = DstTy.getNumElements();
727  unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
728 
729  // FIXME: Refine when odd breakdowns handled
730  // The scalars will need to be re-legalized.
731  if (NumPieces == 1 || NumPieces >= NumElts ||
732  NumElts % NumPieces != 0)
733  return std::make_pair(0, EltTy);
734 
735  return std::make_pair(0,
736  LLT::vector(NumElts / NumPieces, EltTy));
737  }
738 
739  // Need to split because of alignment.
740  unsigned Align = Query.MMODescrs[0].AlignInBits;
741  unsigned EltSize = EltTy.getSizeInBits();
742  if (EltSize > Align &&
743  (EltSize / Align < DstTy.getNumElements())) {
744  return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
745  }
746 
747  // May need relegalization for the scalars.
748  return std::make_pair(0, EltTy);
749  })
750  .minScalar(0, S32);
751 
752  if (IsStore)
753  Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
754 
755  // TODO: Need a bitcast lower option?
756  Actions
757  .legalIf([=](const LegalityQuery &Query) {
758  const LLT Ty0 = Query.Types[0];
759  unsigned Size = Ty0.getSizeInBits();
760  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
761  unsigned Align = Query.MMODescrs[0].AlignInBits;
762 
763  // No extending vector loads.
764  if (Size > MemSize && Ty0.isVector())
765  return false;
766 
767  // FIXME: Widening store from alignment not valid.
768  if (MemSize < Size)
769  MemSize = std::max(MemSize, Align);
770 
771  switch (MemSize) {
772  case 8:
773  case 16:
774  return Size == 32;
775  case 32:
776  case 64:
777  case 128:
778  return true;
779  case 96:
780  return ST.hasDwordx3LoadStores();
781  case 256:
782  case 512:
783  return true;
784  default:
785  return false;
786  }
787  })
788  .widenScalarToNextPow2(0)
789  // TODO: v3s32->v4s32 with alignment
790  .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
791  }
792 
793  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
794  .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8},
795  {S32, GlobalPtr, 16, 2 * 8},
796  {S32, LocalPtr, 8, 8},
797  {S32, LocalPtr, 16, 16},
798  {S32, PrivatePtr, 8, 8},
799  {S32, PrivatePtr, 16, 16},
800  {S32, ConstantPtr, 8, 8},
801  {S32, ConstantPtr, 16, 2 * 8}});
802  if (ST.hasFlatAddressSpace()) {
803  ExtLoads.legalForTypesWithMemDesc(
804  {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
805  }
806 
807  ExtLoads.clampScalar(0, S32, S32)
810  .lower();
811 
812  auto &Atomics = getActionDefinitionsBuilder(
813  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
814  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
815  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
816  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
817  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
818  {S64, GlobalPtr}, {S64, LocalPtr}});
819  if (ST.hasFlatAddressSpace()) {
820  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
821  }
822 
823  getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
824  .legalFor({{S32, LocalPtr}});
825 
826  // TODO: Pointer types, any 32-bit or 64-bit vector
828  .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
829  GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
830  LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
831  .clampScalar(0, S16, S64)
834  .scalarize(1)
835  .clampMaxNumElements(0, S32, 2)
836  .clampMaxNumElements(0, LocalPtr, 2)
837  .clampMaxNumElements(0, PrivatePtr, 2)
838  .scalarize(0)
840  .legalIf(all(isPointer(0), typeIs(1, S1)));
841 
842  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
843  // be more flexible with the shift amount type.
844  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
845  .legalFor({{S32, S32}, {S64, S32}});
846  if (ST.has16BitInsts()) {
847  if (ST.hasVOP3PInsts()) {
848  Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
849  .clampMaxNumElements(0, S16, 2);
850  } else
851  Shifts.legalFor({{S16, S32}, {S16, S16}});
852 
853  Shifts.clampScalar(1, S16, S32);
854  Shifts.clampScalar(0, S16, S64);
855  Shifts.widenScalarToNextPow2(0, 16);
856  } else {
857  // Make sure we legalize the shift amount type first, as the general
858  // expansion for the shifted type will produce much worse code if it hasn't
859  // been truncated already.
860  Shifts.clampScalar(1, S32, S32);
861  Shifts.clampScalar(0, S32, S64);
862  Shifts.widenScalarToNextPow2(0, 32);
863  }
864  Shifts.scalarize(0);
865 
866  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
867  unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
868  unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
869  unsigned IdxTypeIdx = 2;
870 
872  .customIf([=](const LegalityQuery &Query) {
873  const LLT EltTy = Query.Types[EltTypeIdx];
874  const LLT VecTy = Query.Types[VecTypeIdx];
875  const LLT IdxTy = Query.Types[IdxTypeIdx];
876  return (EltTy.getSizeInBits() == 16 ||
877  EltTy.getSizeInBits() % 32 == 0) &&
878  VecTy.getSizeInBits() % 32 == 0 &&
879  VecTy.getSizeInBits() <= 512 &&
880  IdxTy.getSizeInBits() == 32;
881  })
882  .clampScalar(EltTypeIdx, S32, S64)
883  .clampScalar(VecTypeIdx, S32, S64)
884  .clampScalar(IdxTypeIdx, S32, S32);
885  }
886 
887  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
888  .unsupportedIf([=](const LegalityQuery &Query) {
889  const LLT &EltTy = Query.Types[1].getElementType();
890  return Query.Types[0] != EltTy;
891  });
892 
893  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
894  unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
895  unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
896 
897  // FIXME: Doesn't handle extract of illegal sizes.
899  .legalIf([=](const LegalityQuery &Query) {
900  const LLT BigTy = Query.Types[BigTyIdx];
901  const LLT LitTy = Query.Types[LitTyIdx];
902  return (BigTy.getSizeInBits() % 32 == 0) &&
903  (LitTy.getSizeInBits() % 16 == 0);
904  })
905  .widenScalarIf(
906  [=](const LegalityQuery &Query) {
907  const LLT BigTy = Query.Types[BigTyIdx];
908  return (BigTy.getScalarSizeInBits() < 16);
909  },
911  .widenScalarIf(
912  [=](const LegalityQuery &Query) {
913  const LLT LitTy = Query.Types[LitTyIdx];
914  return (LitTy.getScalarSizeInBits() < 16);
915  },
917  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
918  .widenScalarToNextPow2(BigTyIdx, 32);
919 
920  }
921 
922  auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
923  .legalForCartesianProduct(AllS32Vectors, {S32})
924  .legalForCartesianProduct(AllS64Vectors, {S64})
925  .clampNumElements(0, V16S32, V16S32)
926  .clampNumElements(0, V2S64, V8S64);
927 
928  if (ST.hasScalarPackInsts())
929  BuildVector.legalFor({V2S16, S32});
930 
931  BuildVector
932  .minScalarSameAs(1, 0)
934  .minScalarOrElt(0, S32);
935 
936  if (ST.hasScalarPackInsts()) {
937  getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
938  .legalFor({V2S16, S32})
939  .lower();
940  } else {
941  getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
942  .lower();
943  }
944 
945  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
946  .legalIf(isRegisterType(0));
947 
948  // TODO: Don't fully scalarize v2s16 pieces
949  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
950 
951  // Merge/Unmerge
952  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
953  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
954  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
955 
956  auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
957  const LLT &Ty = Query.Types[TypeIdx];
958  if (Ty.isVector()) {
959  const LLT &EltTy = Ty.getElementType();
960  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
961  return true;
962  if (!isPowerOf2_32(EltTy.getSizeInBits()))
963  return true;
964  }
965  return false;
966  };
967 
969  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
970  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
971  // worth considering the multiples of 64 since 2*192 and 2*384 are not
972  // valid.
973  .clampScalar(LitTyIdx, S16, S256)
974  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
975  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
976  .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
977  elementTypeIs(1, S16)),
978  changeTo(1, V2S16))
979  // Break up vectors with weird elements into scalars
981  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
982  scalarize(0))
983  .fewerElementsIf(
984  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
985  scalarize(1))
986  .clampScalar(BigTyIdx, S32, S512)
987  .lowerFor({{S16, V2S16}})
988  .widenScalarIf(
989  [=](const LegalityQuery &Query) {
990  const LLT &Ty = Query.Types[BigTyIdx];
991  return !isPowerOf2_32(Ty.getSizeInBits()) &&
992  Ty.getSizeInBits() % 16 != 0;
993  },
994  [=](const LegalityQuery &Query) {
995  // Pick the next power of 2, or a multiple of 64 over 128.
996  // Whichever is smaller.
997  const LLT &Ty = Query.Types[BigTyIdx];
998  unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
999  if (NewSizeInBits >= 256) {
1000  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
1001  if (RoundedTo < NewSizeInBits)
1002  NewSizeInBits = RoundedTo;
1003  }
1004  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
1005  })
1006  .legalIf([=](const LegalityQuery &Query) {
1007  const LLT &BigTy = Query.Types[BigTyIdx];
1008  const LLT &LitTy = Query.Types[LitTyIdx];
1009 
1010  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
1011  return false;
1012  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
1013  return false;
1014 
1015  return BigTy.getSizeInBits() % 16 == 0 &&
1016  LitTy.getSizeInBits() % 16 == 0 &&
1017  BigTy.getSizeInBits() <= 512;
1018  })
1019  // Any vectors left are the wrong size. Scalarize them.
1020  .scalarize(0)
1021  .scalarize(1);
1022  }
1023 
1024  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
1025 
1026  computeTables();
1027  verify(*ST.getInstrInfo());
1028 }
1029 
1033  GISelChangeObserver &Observer) const {
1034  switch (MI.getOpcode()) {
1035  case TargetOpcode::G_ADDRSPACE_CAST:
1036  return legalizeAddrSpaceCast(MI, MRI, B);
1037  case TargetOpcode::G_FRINT:
1038  return legalizeFrint(MI, MRI, B);
1039  case TargetOpcode::G_FCEIL:
1040  return legalizeFceil(MI, MRI, B);
1041  case TargetOpcode::G_INTRINSIC_TRUNC:
1042  return legalizeIntrinsicTrunc(MI, MRI, B);
1043  case TargetOpcode::G_SITOFP:
1044  return legalizeITOFP(MI, MRI, B, true);
1045  case TargetOpcode::G_UITOFP:
1046  return legalizeITOFP(MI, MRI, B, false);
1047  case TargetOpcode::G_FMINNUM:
1048  case TargetOpcode::G_FMAXNUM:
1049  case TargetOpcode::G_FMINNUM_IEEE:
1050  case TargetOpcode::G_FMAXNUM_IEEE:
1051  return legalizeMinNumMaxNum(MI, MRI, B);
1052  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1053  return legalizeExtractVectorElt(MI, MRI, B);
1054  case TargetOpcode::G_INSERT_VECTOR_ELT:
1055  return legalizeInsertVectorElt(MI, MRI, B);
1056  case TargetOpcode::G_FSIN:
1057  case TargetOpcode::G_FCOS:
1058  return legalizeSinCos(MI, MRI, B);
1059  case TargetOpcode::G_GLOBAL_VALUE:
1060  return legalizeGlobalValue(MI, MRI, B);
1061  case TargetOpcode::G_LOAD:
1062  return legalizeLoad(MI, MRI, B, Observer);
1063  case TargetOpcode::G_FMAD:
1064  return legalizeFMad(MI, MRI, B);
1065  default:
1066  return false;
1067  }
1068 
1069  llvm_unreachable("expected switch to return");
1070 }
1071 
1073  unsigned AS,
1075  MachineIRBuilder &B) const {
1076  MachineFunction &MF = B.getMF();
1077  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1078  const LLT S32 = LLT::scalar(32);
1079 
1080  if (ST.hasApertureRegs()) {
1081  // FIXME: Use inline constants (src_{shared, private}_base) instead of
1082  // getreg.
1083  unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
1086  unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
1089  unsigned Encoding =
1091  Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
1092  WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
1093 
1094  Register ApertureReg = MRI.createGenericVirtualRegister(S32);
1095  Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1096 
1097  B.buildInstr(AMDGPU::S_GETREG_B32)
1098  .addDef(GetReg)
1099  .addImm(Encoding);
1100  MRI.setType(GetReg, S32);
1101 
1102  auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1);
1103  B.buildInstr(TargetOpcode::G_SHL)
1104  .addDef(ApertureReg)
1105  .addUse(GetReg)
1106  .addUse(ShiftAmt.getReg(0));
1107 
1108  return ApertureReg;
1109  }
1110 
1111  Register QueuePtr = MRI.createGenericVirtualRegister(
1113 
1115  if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr))
1116  return Register();
1117 
1118  // Offset into amd_queue_t for group_segment_aperture_base_hi /
1119  // private_segment_aperture_base_hi.
1120  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
1121 
1122  // FIXME: Don't use undef
1126 
1127  MachinePointerInfo PtrInfo(V, StructOffset);
1129  PtrInfo,
1133  4,
1134  MinAlign(64, StructOffset));
1135 
1136  Register LoadResult = MRI.createGenericVirtualRegister(S32);
1137  Register LoadAddr;
1138 
1139  B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
1140  B.buildLoad(LoadResult, LoadAddr, *MMO);
1141  return LoadResult;
1142 }
1143 
1146  MachineIRBuilder &B) const {
1147  MachineFunction &MF = B.getMF();
1148 
1149  B.setInstr(MI);
1150 
1151  const LLT S32 = LLT::scalar(32);
1152  Register Dst = MI.getOperand(0).getReg();
1153  Register Src = MI.getOperand(1).getReg();
1154 
1155  LLT DstTy = MRI.getType(Dst);
1156  LLT SrcTy = MRI.getType(Src);
1157  unsigned DestAS = DstTy.getAddressSpace();
1158  unsigned SrcAS = SrcTy.getAddressSpace();
1159 
1160  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
1161  // vector element.
1162  assert(!DstTy.isVector());
1163 
1164  const AMDGPUTargetMachine &TM
1165  = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
1166 
1167  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1168  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
1169  MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST));
1170  return true;
1171  }
1172 
1173  if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
1174  // Truncate.
1175  B.buildExtract(Dst, Src, 0);
1176  MI.eraseFromParent();
1177  return true;
1178  }
1179 
1180  if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
1182  uint32_t AddrHiVal = Info->get32BitAddressHighBits();
1183 
1184  // FIXME: This is a bit ugly due to creating a merge of 2 pointers to
1185  // another. Merge operands are required to be the same type, but creating an
1186  // extra ptrtoint would be kind of pointless.
1187  auto HighAddr = B.buildConstant(
1189  B.buildMerge(Dst, {Src, HighAddr.getReg(0)});
1190  MI.eraseFromParent();
1191  return true;
1192  }
1193 
1194  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
1195  assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
1196  DestAS == AMDGPUAS::PRIVATE_ADDRESS);
1197  unsigned NullVal = TM.getNullPointerValue(DestAS);
1198 
1199  auto SegmentNull = B.buildConstant(DstTy, NullVal);
1200  auto FlatNull = B.buildConstant(SrcTy, 0);
1201 
1202  Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
1203 
1204  // Extract low 32-bits of the pointer.
1205  B.buildExtract(PtrLo32, Src, 0);
1206 
1208  B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
1209  B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
1210 
1211  MI.eraseFromParent();
1212  return true;
1213  }
1214 
1215  if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS)
1216  return false;
1217 
1218  if (!ST.hasFlatAddressSpace())
1219  return false;
1220 
1221  auto SegmentNull =
1222  B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
1223  auto FlatNull =
1224  B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
1225 
1226  Register ApertureReg = getSegmentAperture(DestAS, MRI, B);
1227  if (!ApertureReg.isValid())
1228  return false;
1229 
1231  B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
1232 
1233  Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
1234 
1235  // Coerce the type of the low half of the result so we can use merge_values.
1236  Register SrcAsInt = MRI.createGenericVirtualRegister(S32);
1237  B.buildInstr(TargetOpcode::G_PTRTOINT)
1238  .addDef(SrcAsInt)
1239  .addUse(Src);
1240 
1241  // TODO: Should we allow mismatched types but matching sizes in merges to
1242  // avoid the ptrtoint?
1243  B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
1244  B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
1245 
1246  MI.eraseFromParent();
1247  return true;
1248 }
1249 
1252  MachineIRBuilder &B) const {
1253  B.setInstr(MI);
1254 
1255  Register Src = MI.getOperand(1).getReg();
1256  LLT Ty = MRI.getType(Src);
1257  assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
1258 
1259  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
1260  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
1261 
1262  auto C1 = B.buildFConstant(Ty, C1Val);
1263  auto CopySign = B.buildFCopysign(Ty, C1, Src);
1264 
1265  // TODO: Should this propagate fast-math-flags?
1266  auto Tmp1 = B.buildFAdd(Ty, Src, CopySign);
1267  auto Tmp2 = B.buildFSub(Ty, Tmp1, CopySign);
1268 
1269  auto C2 = B.buildFConstant(Ty, C2Val);
1270  auto Fabs = B.buildFAbs(Ty, Src);
1271 
1272  auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
1273  B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
1274  return true;
1275 }
1276 
1279  MachineIRBuilder &B) const {
1280  B.setInstr(MI);
1281 
1282  const LLT S1 = LLT::scalar(1);
1283  const LLT S64 = LLT::scalar(64);
1284 
1285  Register Src = MI.getOperand(1).getReg();
1286  assert(MRI.getType(Src) == S64);
1287 
1288  // result = trunc(src)
1289  // if (src > 0.0 && src != result)
1290  // result += 1.0
1291 
1292  auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
1293 
1294  const auto Zero = B.buildFConstant(S64, 0.0);
1295  const auto One = B.buildFConstant(S64, 1.0);
1296  auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
1297  auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
1298  auto And = B.buildAnd(S1, Lt0, NeTrunc);
1299  auto Add = B.buildSelect(S64, And, One, Zero);
1300 
1301  // TODO: Should this propagate fast-math-flags?
1302  B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
1303  return true;
1304 }
1305 
1307  MachineIRBuilder &B) {
1308  const unsigned FractBits = 52;
1309  const unsigned ExpBits = 11;
1310  LLT S32 = LLT::scalar(32);
1311 
1312  auto Const0 = B.buildConstant(S32, FractBits - 32);
1313  auto Const1 = B.buildConstant(S32, ExpBits);
1314 
1315  auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
1316  .addUse(Const0.getReg(0))
1317  .addUse(Const1.getReg(0));
1318 
1319  return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
1320 }
1321 
1324  MachineIRBuilder &B) const {
1325  B.setInstr(MI);
1326 
1327  const LLT S1 = LLT::scalar(1);
1328  const LLT S32 = LLT::scalar(32);
1329  const LLT S64 = LLT::scalar(64);
1330 
1331  Register Src = MI.getOperand(1).getReg();
1332  assert(MRI.getType(Src) == S64);
1333 
1334  // TODO: Should this use extract since the low half is unused?
1335  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1336  Register Hi = Unmerge.getReg(1);
1337 
1338  // Extract the upper half, since this is where we will find the sign and
1339  // exponent.
1340  auto Exp = extractF64Exponent(Hi, B);
1341 
1342  const unsigned FractBits = 52;
1343 
1344  // Extract the sign bit.
1345  const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
1346  auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
1347 
1348  const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
1349 
1350  const auto Zero32 = B.buildConstant(S32, 0);
1351 
1352  // Extend back to 64-bits.
1353  auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
1354 
1355  auto Shr = B.buildAShr(S64, FractMask, Exp);
1356  auto Not = B.buildNot(S64, Shr);
1357  auto Tmp0 = B.buildAnd(S64, Src, Not);
1358  auto FiftyOne = B.buildConstant(S32, FractBits - 1);
1359 
1360  auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
1361  auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
1362 
1363  auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
1364  B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
1365  return true;
1366 }
1367 
1370  MachineIRBuilder &B, bool Signed) const {
1371  B.setInstr(MI);
1372 
1373  Register Dst = MI.getOperand(0).getReg();
1374  Register Src = MI.getOperand(1).getReg();
1375 
1376  const LLT S64 = LLT::scalar(64);
1377  const LLT S32 = LLT::scalar(32);
1378 
1379  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
1380 
1381  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1382 
1383  auto CvtHi = Signed ?
1384  B.buildSITOFP(S64, Unmerge.getReg(1)) :
1385  B.buildUITOFP(S64, Unmerge.getReg(1));
1386 
1387  auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
1388 
1389  auto ThirtyTwo = B.buildConstant(S32, 32);
1390  auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
1391  .addUse(CvtHi.getReg(0))
1392  .addUse(ThirtyTwo.getReg(0));
1393 
1394  // TODO: Should this propagate fast-math-flags?
1395  B.buildFAdd(Dst, LdExp, CvtLo);
1396  MI.eraseFromParent();
1397  return true;
1398 }
1399 
1402  MachineIRBuilder &B) const {
1403  MachineFunction &MF = B.getMF();
1405 
1406  const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
1407  MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
1408 
1409  // With ieee_mode disabled, the instructions have the correct behavior
1410  // already for G_FMINNUM/G_FMAXNUM
1411  if (!MFI->getMode().IEEE)
1412  return !IsIEEEOp;
1413 
1414  if (IsIEEEOp)
1415  return true;
1416 
1417  MachineIRBuilder HelperBuilder(MI);
1418  GISelObserverWrapper DummyObserver;
1419  LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1420  HelperBuilder.setInstr(MI);
1421  return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
1422 }
1423 
1426  MachineIRBuilder &B) const {
1427  // TODO: Should move some of this into LegalizerHelper.
1428 
1429  // TODO: Promote dynamic indexing of s16 to s32
1430  // TODO: Dynamic s64 indexing is only legal for SGPR.
1432  if (!IdxVal) // Dynamic case will be selected to register indexing.
1433  return true;
1434 
1435  Register Dst = MI.getOperand(0).getReg();
1436  Register Vec = MI.getOperand(1).getReg();
1437 
1438  LLT VecTy = MRI.getType(Vec);
1439  LLT EltTy = VecTy.getElementType();
1440  assert(EltTy == MRI.getType(Dst));
1441 
1442  B.setInstr(MI);
1443 
1444  if (IdxVal.getValue() < VecTy.getNumElements())
1445  B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
1446  else
1447  B.buildUndef(Dst);
1448 
1449  MI.eraseFromParent();
1450  return true;
1451 }
1452 
1455  MachineIRBuilder &B) const {
1456  // TODO: Should move some of this into LegalizerHelper.
1457 
1458  // TODO: Promote dynamic indexing of s16 to s32
1459  // TODO: Dynamic s64 indexing is only legal for SGPR.
1461  if (!IdxVal) // Dynamic case will be selected to register indexing.
1462  return true;
1463 
1464  Register Dst = MI.getOperand(0).getReg();
1465  Register Vec = MI.getOperand(1).getReg();
1466  Register Ins = MI.getOperand(2).getReg();
1467 
1468  LLT VecTy = MRI.getType(Vec);
1469  LLT EltTy = VecTy.getElementType();
1470  assert(EltTy == MRI.getType(Ins));
1471 
1472  B.setInstr(MI);
1473 
1474  if (IdxVal.getValue() < VecTy.getNumElements())
1475  B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
1476  else
1477  B.buildUndef(Dst);
1478 
1479  MI.eraseFromParent();
1480  return true;
1481 }
1482 
1485  MachineIRBuilder &B) const {
1486  B.setInstr(MI);
1487 
1488  Register DstReg = MI.getOperand(0).getReg();
1489  Register SrcReg = MI.getOperand(1).getReg();
1490  LLT Ty = MRI.getType(DstReg);
1491  unsigned Flags = MI.getFlags();
1492 
1493  Register TrigVal;
1494  auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
1495  if (ST.hasTrigReducedRange()) {
1496  auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
1497  TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
1498  .addUse(MulVal.getReg(0))
1499  .setMIFlags(Flags).getReg(0);
1500  } else
1501  TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
1502 
1503  Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
1504  Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
1505  B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
1506  .addUse(TrigVal)
1507  .setMIFlags(Flags);
1508  MI.eraseFromParent();
1509  return true;
1510 }
1511 
1514  MachineIRBuilder &B) const {
1515  Register DstReg = MI.getOperand(0).getReg();
1516  LLT Ty = MRI.getType(DstReg);
1517  unsigned AS = Ty.getAddressSpace();
1518 
1519  const GlobalValue *GV = MI.getOperand(1).getGlobal();
1520  MachineFunction &MF = B.getMF();
1522 
1524  B.setInstr(MI);
1525 
1526  if (!MFI->isEntryFunction()) {
1527  const Function &Fn = MF.getFunction();
1528  DiagnosticInfoUnsupported BadLDSDecl(
1529  Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
1530  Fn.getContext().diagnose(BadLDSDecl);
1531  }
1532 
1533  // TODO: We could emit code to handle the initialization somewhere.
1535  B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
1536  MI.eraseFromParent();
1537  return true;
1538  }
1539  } else
1540  return false;
1541 
1542  const Function &Fn = MF.getFunction();
1543  DiagnosticInfoUnsupported BadInit(
1544  Fn, "unsupported initializer for address space", MI.getDebugLoc());
1545  Fn.getContext().diagnose(BadInit);
1546  return true;
1547 }
1548 
1551  MachineIRBuilder &B, GISelChangeObserver &Observer) const {
1552  B.setInstr(MI);
1554  auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
1555  Observer.changingInstr(MI);
1556  MI.getOperand(1).setReg(Cast.getReg(0));
1557  Observer.changedInstr(MI);
1558  return true;
1559 }
1560 
1563  MachineIRBuilder &B) const {
1564  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1565  assert(Ty.isScalar());
1566 
1567  // TODO: Always legal with future ftz flag.
1568  if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals())
1569  return true;
1570  if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals())
1571  return true;
1572 
1573  MachineFunction &MF = B.getMF();
1574 
1575  MachineIRBuilder HelperBuilder(MI);
1576  GISelObserverWrapper DummyObserver;
1577  LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1578  HelperBuilder.setMBB(*MI.getParent());
1579  return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
1580 }
1581 
1582 // Return the use branch instruction, otherwise null if the usage is invalid.
1585  Register CondDef = MI.getOperand(0).getReg();
1586  if (!MRI.hasOneNonDBGUse(CondDef))
1587  return nullptr;
1588 
1589  MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
1590  return UseMI.getParent() == MI.getParent() &&
1591  UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
1592 }
1593 
1595  Register Reg, LLT Ty) const {
1596  Register LiveIn = MRI.getLiveInVirtReg(Reg);
1597  if (LiveIn)
1598  return LiveIn;
1599 
1600  Register NewReg = MRI.createGenericVirtualRegister(Ty);
1601  MRI.addLiveIn(Reg, NewReg);
1602  return NewReg;
1603 }
1604 
1606  const ArgDescriptor *Arg) const {
1607  if (!Arg->isRegister() || !Arg->getRegister().isValid())
1608  return false; // TODO: Handle these
1609 
1610  assert(Arg->getRegister().isPhysical());
1611 
1612  MachineRegisterInfo &MRI = *B.getMRI();
1613 
1614  LLT Ty = MRI.getType(DstReg);
1615  Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
1616 
1617  if (Arg->isMasked()) {
1618  // TODO: Should we try to emit this once in the entry block?
1619  const LLT S32 = LLT::scalar(32);
1620  const unsigned Mask = Arg->getMask();
1621  const unsigned Shift = countTrailingZeros<unsigned>(Mask);
1622 
1623  auto ShiftAmt = B.buildConstant(S32, Shift);
1624  auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
1625  B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
1626  } else
1627  B.buildCopy(DstReg, LiveIn);
1628 
1629  // Insert the argument copy if it doens't already exist.
1630  // FIXME: It seems EmitLiveInCopies isn't called anywhere?
1631  if (!MRI.getVRegDef(LiveIn)) {
1632  // FIXME: Should have scoped insert pt
1633  MachineBasicBlock &OrigInsBB = B.getMBB();
1634  auto OrigInsPt = B.getInsertPt();
1635 
1636  MachineBasicBlock &EntryMBB = B.getMF().front();
1637  EntryMBB.addLiveIn(Arg->getRegister());
1638  B.setInsertPt(EntryMBB, EntryMBB.begin());
1639  B.buildCopy(LiveIn, Arg->getRegister());
1640 
1641  B.setInsertPt(OrigInsBB, OrigInsPt);
1642  }
1643 
1644  return true;
1645 }
1646 
1648  MachineInstr &MI,
1651  AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
1652  B.setInstr(MI);
1653 
1655 
1656  const ArgDescriptor *Arg;
1657  const TargetRegisterClass *RC;
1658  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
1659  if (!Arg) {
1660  LLVM_DEBUG(dbgs() << "Required arg register missing\n");
1661  return false;
1662  }
1663 
1664  if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
1665  MI.eraseFromParent();
1666  return true;
1667  }
1668 
1669  return false;
1670 }
1671 
1674  MachineIRBuilder &B) const {
1675  B.setInstr(MI);
1676  Register Res = MI.getOperand(0).getReg();
1677  Register LHS = MI.getOperand(2).getReg();
1678  Register RHS = MI.getOperand(3).getReg();
1679  uint16_t Flags = MI.getFlags();
1680 
1681  LLT S32 = LLT::scalar(32);
1682  LLT S1 = LLT::scalar(1);
1683 
1684  auto Abs = B.buildFAbs(S32, RHS, Flags);
1685  const APFloat C0Val(1.0f);
1686 
1687  auto C0 = B.buildConstant(S32, 0x6f800000);
1688  auto C1 = B.buildConstant(S32, 0x2f800000);
1689  auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
1690 
1691  auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
1692  auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
1693 
1694  auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
1695 
1696  auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
1697  .addUse(Mul0.getReg(0))
1698  .setMIFlags(Flags);
1699 
1700  auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
1701 
1702  B.buildFMul(Res, Sel, Mul1, Flags);
1703 
1704  MI.eraseFromParent();
1705  return true;
1706 }
1707 
1710  MachineIRBuilder &B) const {
1712  if (!MFI->isEntryFunction()) {
1713  return legalizePreloadedArgIntrin(MI, MRI, B,
1715  }
1716 
1717  B.setInstr(MI);
1718 
1719  uint64_t Offset =
1720  ST.getTargetLowering()->getImplicitParameterOffset(
1722  Register DstReg = MI.getOperand(0).getReg();
1723  LLT DstTy = MRI.getType(DstReg);
1724  LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
1725 
1726  const ArgDescriptor *Arg;
1727  const TargetRegisterClass *RC;
1728  std::tie(Arg, RC)
1730  if (!Arg)
1731  return false;
1732 
1733  Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
1734  if (!loadInputValue(KernargPtrReg, B, Arg))
1735  return false;
1736 
1737  B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
1738  MI.eraseFromParent();
1739  return true;
1740 }
1741 
1745  unsigned AddrSpace) const {
1746  B.setInstr(MI);
1747  Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
1748  auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
1749  B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
1750  MI.eraseFromParent();
1751  return true;
1752 }
1753 
1756  MachineIRBuilder &B) const {
1757  // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
1758  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1759  case Intrinsic::amdgcn_if: {
1760  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1761  const SIRegisterInfo *TRI
1762  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1763 
1764  B.setInstr(*BrCond);
1765  Register Def = MI.getOperand(1).getReg();
1766  Register Use = MI.getOperand(3).getReg();
1767  B.buildInstr(AMDGPU::SI_IF)
1768  .addDef(Def)
1769  .addUse(Use)
1770  .addMBB(BrCond->getOperand(1).getMBB());
1771 
1772  MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
1773  MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
1774  MI.eraseFromParent();
1775  BrCond->eraseFromParent();
1776  return true;
1777  }
1778 
1779  return false;
1780  }
1781  case Intrinsic::amdgcn_loop: {
1782  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1783  const SIRegisterInfo *TRI
1784  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1785 
1786  B.setInstr(*BrCond);
1787  Register Reg = MI.getOperand(2).getReg();
1788  B.buildInstr(AMDGPU::SI_LOOP)
1789  .addUse(Reg)
1790  .addMBB(BrCond->getOperand(1).getMBB());
1791  MI.eraseFromParent();
1792  BrCond->eraseFromParent();
1793  MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
1794  return true;
1795  }
1796 
1797  return false;
1798  }
1799  case Intrinsic::amdgcn_kernarg_segment_ptr:
1802  case Intrinsic::amdgcn_implicitarg_ptr:
1803  return legalizeImplicitArgPtr(MI, MRI, B);
1804  case Intrinsic::amdgcn_workitem_id_x:
1805  return legalizePreloadedArgIntrin(MI, MRI, B,
1807  case Intrinsic::amdgcn_workitem_id_y:
1808  return legalizePreloadedArgIntrin(MI, MRI, B,
1810  case Intrinsic::amdgcn_workitem_id_z:
1811  return legalizePreloadedArgIntrin(MI, MRI, B,
1813  case Intrinsic::amdgcn_workgroup_id_x:
1814  return legalizePreloadedArgIntrin(MI, MRI, B,
1816  case Intrinsic::amdgcn_workgroup_id_y:
1817  return legalizePreloadedArgIntrin(MI, MRI, B,
1819  case Intrinsic::amdgcn_workgroup_id_z:
1820  return legalizePreloadedArgIntrin(MI, MRI, B,
1822  case Intrinsic::amdgcn_dispatch_ptr:
1823  return legalizePreloadedArgIntrin(MI, MRI, B,
1825  case Intrinsic::amdgcn_queue_ptr:
1826  return legalizePreloadedArgIntrin(MI, MRI, B,
1828  case Intrinsic::amdgcn_implicit_buffer_ptr:
1831  case Intrinsic::amdgcn_dispatch_id:
1832  return legalizePreloadedArgIntrin(MI, MRI, B,
1834  case Intrinsic::amdgcn_fdiv_fast:
1835  return legalizeFDIVFast(MI, MRI, B);
1836  case Intrinsic::amdgcn_is_shared:
1837  return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
1838  case Intrinsic::amdgcn_is_private:
1839  return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS);
1840  case Intrinsic::amdgcn_wavefrontsize: {
1841  B.setInstr(MI);
1842  B.buildConstant(MI.getOperand(0), ST.getWavefrontSize());
1843  MI.eraseFromParent();
1844  return true;
1845  }
1846  default:
1847  return true;
1848  }
1849 
1850  return true;
1851 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg) const
Register getRegister() const
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV)
bool hasApertureRegs() const
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Diagnostic information for unsupported feature in backend.
static bool hasDefinedInitializer(const GlobalValue *GV)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
unsigned getScalarSizeInBits() const
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const override
Return true if MI is either legal or has been legalized and false if not legal.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, Optional< unsigned > Flags=None)
Build and insert Res = G_FABS Op0.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:385
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer) const override
unsigned Reg
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
LLT getScalarType() const
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx)
unsigned const TargetRegisterInfo * TRI
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
static LegalizeMutation oneMoreElement(unsigned TypeIdx)
bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
LegalityPredicate isPointer(unsigned TypeIdx)
True iff the specified type index is a pointer (with any address space).
static LegalityPredicate isRegisterType(unsigned TypeIdx)
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
AMDGPU::SIModeRegisterDefaults getMode() const
Address space for private memory.
Definition: AMDGPU.h:275
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_FADD Op0, Op1.
static MachineInstr * verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI)
LegalizeRuleSet & custom()
Unconditionally custom lower.
bool isVector() const
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool Signed) const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT &Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type)
void verify(const MCInstrInfo &MII) const
Perform simple self-diagnostic and assert if there is anything obviously wrong with the actions set u...
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
unsigned getPointerSizeInBits(unsigned AS) const
void setReg(Register Reg)
Change the register this operand corresponds to.
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:605
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_ADDRSPACE_CAST Src.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
static MachineInstrBuilder extractF64Exponent(unsigned Hi, MachineIRBuilder &B)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
bool legalizePreloadedArgIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const
MachineInstrBuilder & UseMI
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
const GlobalValue * getGlobal() const
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
Helper class to build MachineInstr.
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx)
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size)
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the number of elements for the given vectors to at least MinTy&#39;s number of elements and at most...
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
unsigned getAddressSpace() const
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
AMDGPUFunctionArgInfo & getArgInfo()
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
std::function< std::pair< unsigned, LLT >(const LegalityQuery &)> LegalizeMutation
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
const MachineBasicBlock & front() const
bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
static LegalityPredicate isMultiple32(unsigned TypeIdx, unsigned MaxSize=512)
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
This class contains a discriminated union of information about pointers in memory operands...
LegalizeRuleSet & unsupportedIfMemSizeNotPow2()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
signed greater than
Definition: InstrTypes.h:759
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
Address space for local memory.
Definition: AMDGPU.h:274
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
bool hasFlatAddressSpace() const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
static unsigned getIntrinsicID(const SDNode *N)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
signed less than
Definition: InstrTypes.h:761
Promote Memory to Register
Definition: Mem2Reg.cpp:109
const TargetInstrInfo & getTII()
LegalizeResult lowerFMad(MachineInstr &MI)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_FCMP PredOp0, Op1.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:207
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool isValid() const
Definition: Register.h:115
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
Representation of each machine instruction.
Definition: MachineInstr.h:64
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
Instruction has been legalized and the MachineFunction changed.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx)
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
LegalizeRuleSet & lower()
The instruction is lowered.
ArrayRef< LLT > Types
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
bool allowsMisalignedMemoryAccessesImpl(unsigned Size, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const
The memory access always returns the same value (or traps).
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LegalizeRuleSet & alwaysLegal()
uint32_t Size
Definition: Profile.cpp:46
const DataLayout & getDataLayout() const
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
Address space for flat memory.
Definition: AMDGPU.h:269
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:292
const TargetRegisterClass * getWaveMaskRegClass() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
LLVM Value Representation.
Definition: Value.h:73
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
std::function< bool(const LegalityQuery &)> LegalityPredicate
static LegalityPredicate numElementsNotEven(unsigned TypeIdx)
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
const SITargetLowering * getTargetLowering() const override
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
void setAction(const InstrAspect &Aspect, LegalizeAction Action)
More friendly way to set an action for common types that have an LLT representation.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size)
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the specified type indices are both the same bit size.