LLVM  9.0.0svn
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "SIMachineFunctionInfo.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "amdgpu-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
33 
34 static LegalityPredicate isMultiple32(unsigned TypeIdx,
35  unsigned MaxSize = 512) {
36  return [=](const LegalityQuery &Query) {
37  const LLT Ty = Query.Types[TypeIdx];
38  const LLT EltTy = Ty.getScalarType();
39  return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
40  };
41 }
42 
43 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
44  return [=](const LegalityQuery &Query) {
45  const LLT Ty = Query.Types[TypeIdx];
46  return Ty.isVector() &&
47  Ty.getNumElements() % 2 != 0 &&
48  Ty.getElementType().getSizeInBits() < 32;
49  };
50 }
51 
52 static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
53  return [=](const LegalityQuery &Query) {
54  const LLT Ty = Query.Types[TypeIdx];
55  const LLT EltTy = Ty.getElementType();
56  return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
57  };
58 }
59 
60 static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
61  return [=](const LegalityQuery &Query) {
62  const LLT Ty = Query.Types[TypeIdx];
63  const LLT EltTy = Ty.getElementType();
64  unsigned Size = Ty.getSizeInBits();
65  unsigned Pieces = (Size + 63) / 64;
66  unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
67  return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
68  };
69 }
70 
71 static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
72  return [=](const LegalityQuery &Query) {
73  const LLT QueryTy = Query.Types[TypeIdx];
74  return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
75  };
76 }
77 
78 static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
79  return [=](const LegalityQuery &Query) {
80  const LLT QueryTy = Query.Types[TypeIdx];
81  return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
82  };
83 }
84 
85 // Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
86 // v2s16.
87 static LegalityPredicate isRegisterType(unsigned TypeIdx) {
88  return [=](const LegalityQuery &Query) {
89  const LLT Ty = Query.Types[TypeIdx];
90  if (Ty.isVector()) {
91  const int EltSize = Ty.getElementType().getSizeInBits();
92  return EltSize == 32 || EltSize == 64 ||
93  (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
94  EltSize == 128 || EltSize == 256;
95  }
96 
97  return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
98  };
99 }
100 
102  const GCNTargetMachine &TM)
103  : ST(ST_) {
104  using namespace TargetOpcode;
105 
106  auto GetAddrSpacePtr = [&TM](unsigned AS) {
107  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
108  };
109 
110  const LLT S1 = LLT::scalar(1);
111  const LLT S8 = LLT::scalar(8);
112  const LLT S16 = LLT::scalar(16);
113  const LLT S32 = LLT::scalar(32);
114  const LLT S64 = LLT::scalar(64);
115  const LLT S128 = LLT::scalar(128);
116  const LLT S256 = LLT::scalar(256);
117  const LLT S512 = LLT::scalar(512);
118 
119  const LLT V2S16 = LLT::vector(2, 16);
120  const LLT V4S16 = LLT::vector(4, 16);
121 
122  const LLT V2S32 = LLT::vector(2, 32);
123  const LLT V3S32 = LLT::vector(3, 32);
124  const LLT V4S32 = LLT::vector(4, 32);
125  const LLT V5S32 = LLT::vector(5, 32);
126  const LLT V6S32 = LLT::vector(6, 32);
127  const LLT V7S32 = LLT::vector(7, 32);
128  const LLT V8S32 = LLT::vector(8, 32);
129  const LLT V9S32 = LLT::vector(9, 32);
130  const LLT V10S32 = LLT::vector(10, 32);
131  const LLT V11S32 = LLT::vector(11, 32);
132  const LLT V12S32 = LLT::vector(12, 32);
133  const LLT V13S32 = LLT::vector(13, 32);
134  const LLT V14S32 = LLT::vector(14, 32);
135  const LLT V15S32 = LLT::vector(15, 32);
136  const LLT V16S32 = LLT::vector(16, 32);
137 
138  const LLT V2S64 = LLT::vector(2, 64);
139  const LLT V3S64 = LLT::vector(3, 64);
140  const LLT V4S64 = LLT::vector(4, 64);
141  const LLT V5S64 = LLT::vector(5, 64);
142  const LLT V6S64 = LLT::vector(6, 64);
143  const LLT V7S64 = LLT::vector(7, 64);
144  const LLT V8S64 = LLT::vector(8, 64);
145 
146  std::initializer_list<LLT> AllS32Vectors =
147  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
148  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
149  std::initializer_list<LLT> AllS64Vectors =
150  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
151 
152  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
153  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
154  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
155  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
156  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
157 
158  const LLT CodePtr = FlatPtr;
159 
160  const std::initializer_list<LLT> AddrSpaces64 = {
161  GlobalPtr, ConstantPtr, FlatPtr
162  };
163 
164  const std::initializer_list<LLT> AddrSpaces32 = {
165  LocalPtr, PrivatePtr
166  };
167 
168  const std::initializer_list<LLT> FPTypesBase = {
169  S32, S64
170  };
171 
172  const std::initializer_list<LLT> FPTypes16 = {
173  S32, S64, S16
174  };
175 
176  const std::initializer_list<LLT> FPTypesPK16 = {
177  S32, S64, S16, V2S16
178  };
179 
180  setAction({G_BRCOND, S1}, Legal);
181 
182  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
183  // elements for v3s16
185  .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
186  .legalFor(AllS32Vectors)
187  .legalFor(AllS64Vectors)
188  .legalFor(AddrSpaces64)
189  .legalFor(AddrSpaces32)
190  .clampScalar(0, S32, S256)
191  .widenScalarToNextPow2(0, 32)
192  .clampMaxNumElements(0, S32, 16)
194  .legalIf(isPointer(0));
195 
196  if (ST.has16BitInsts()) {
197  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
198  .legalFor({S32, S16})
199  .clampScalar(0, S16, S32)
200  .scalarize(0);
201  } else {
202  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
203  .legalFor({S32})
204  .clampScalar(0, S32, S32)
205  .scalarize(0);
206  }
207 
208  getActionDefinitionsBuilder({G_UMULH, G_SMULH})
209  .legalFor({S32})
210  .clampScalar(0, S32, S32)
211  .scalarize(0);
212 
213  // Report legal for any types we can handle anywhere. For the cases only legal
214  // on the SALU, RegBankSelect will be able to re-legalize.
215  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
216  .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
217  .clampScalar(0, S32, S64)
221  .scalarize(0);
222 
223  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
224  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
225  .legalFor({{S32, S1}})
226  .clampScalar(0, S32, S32);
227 
228  getActionDefinitionsBuilder(G_BITCAST)
229  .legalForCartesianProduct({S32, V2S16})
230  .legalForCartesianProduct({S64, V2S32, V4S16})
231  .legalForCartesianProduct({V2S64, V4S32})
232  // Don't worry about the size constraint.
233  .legalIf(all(isPointer(0), isPointer(1)));
234 
235  if (ST.has16BitInsts()) {
236  getActionDefinitionsBuilder(G_FCONSTANT)
237  .legalFor({S32, S64, S16})
238  .clampScalar(0, S16, S64);
239  } else {
240  getActionDefinitionsBuilder(G_FCONSTANT)
241  .legalFor({S32, S64})
242  .clampScalar(0, S32, S64);
243  }
244 
245  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
246  .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
247  ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
248  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
249  .clampScalarOrElt(0, S32, S512)
250  .legalIf(isMultiple32(0))
251  .widenScalarToNextPow2(0, 32)
252  .clampMaxNumElements(0, S32, 16);
253 
254 
255  // FIXME: i1 operands to intrinsics should always be legal, but other i1
256  // values may not be legal. We need to figure out how to distinguish
257  // between these two scenarios.
258  getActionDefinitionsBuilder(G_CONSTANT)
259  .legalFor({S1, S32, S64, GlobalPtr,
260  LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
261  .clampScalar(0, S32, S64)
263  .legalIf(isPointer(0));
264 
265  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
266 
267  auto &FPOpActions = getActionDefinitionsBuilder(
268  { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
269  .legalFor({S32, S64});
270 
271  if (ST.has16BitInsts()) {
272  if (ST.hasVOP3PInsts())
273  FPOpActions.legalFor({S16, V2S16});
274  else
275  FPOpActions.legalFor({S16});
276  }
277 
278  auto &MinNumMaxNum = getActionDefinitionsBuilder({
279  G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
280 
281  if (ST.hasVOP3PInsts()) {
282  MinNumMaxNum.customFor(FPTypesPK16)
283  .clampMaxNumElements(0, S16, 2)
284  .clampScalar(0, S16, S64)
285  .scalarize(0);
286  } else if (ST.has16BitInsts()) {
287  MinNumMaxNum.customFor(FPTypes16)
288  .clampScalar(0, S16, S64)
289  .scalarize(0);
290  } else {
291  MinNumMaxNum.customFor(FPTypesBase)
292  .clampScalar(0, S32, S64)
293  .scalarize(0);
294  }
295 
296  // TODO: Implement
297  getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
298 
299  if (ST.hasVOP3PInsts())
300  FPOpActions.clampMaxNumElements(0, S16, 2);
301  FPOpActions
302  .scalarize(0)
303  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
304 
305  if (ST.has16BitInsts()) {
307  .legalFor({S32, S64, S16})
308  .scalarize(0)
309  .clampScalar(0, S16, S64);
310  } else {
312  .legalFor({S32, S64})
313  .scalarize(0)
314  .clampScalar(0, S32, S64);
315  }
316 
317  getActionDefinitionsBuilder(G_FPTRUNC)
318  .legalFor({{S32, S64}, {S16, S32}})
319  .scalarize(0);
320 
322  .legalFor({{S64, S32}, {S32, S16}})
323  .lowerFor({{S64, S16}}) // FIXME: Implement
324  .scalarize(0);
325 
326  // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
327  getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
328 
330  // Use actual fsub instruction
331  .legalFor({S32})
332  // Must use fadd + fneg
333  .lowerFor({S64, S16, V2S16})
334  .scalarize(0)
335  .clampScalar(0, S32, S64);
336 
337  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
338  .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
339  {S32, S1}, {S64, S1}, {S16, S1},
340  // FIXME: Hack
341  {S64, LLT::scalar(33)},
342  {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
343  .scalarize(0);
344 
345  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
346  .legalFor({{S32, S32}, {S64, S32}})
347  .lowerFor({{S32, S64}})
348  .customFor({{S64, S64}})
349  .scalarize(0);
350 
351  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
352  .legalFor({{S32, S32}, {S32, S64}})
353  .scalarize(0);
354 
355  getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
356  .legalFor({S32, S64})
357  .scalarize(0);
358 
359  if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
360  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
361  .legalFor({S32, S64})
362  .clampScalar(0, S32, S64)
363  .scalarize(0);
364  } else {
365  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
366  .legalFor({S32})
367  .customFor({S64})
368  .clampScalar(0, S32, S64)
369  .scalarize(0);
370  }
371 
373  .legalForCartesianProduct(AddrSpaces64, {S64})
374  .legalForCartesianProduct(AddrSpaces32, {S32})
375  .scalarize(0);
376 
377  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
378 
379  auto &CmpBuilder =
382  {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
383  .legalFor({{S1, S32}, {S1, S64}});
384  if (ST.has16BitInsts()) {
385  CmpBuilder.legalFor({{S1, S16}});
386  }
387 
388  CmpBuilder
390  .clampScalar(1, S32, S64)
391  .scalarize(0)
392  .legalIf(all(typeIs(0, S1), isPointer(1)));
393 
395  .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
396  .widenScalarToNextPow2(1)
397  .clampScalar(1, S32, S64)
398  .scalarize(0);
399 
400  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
401  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
402  G_FLOG, G_FLOG2, G_FLOG10})
403  .legalFor({S32})
404  .scalarize(0);
405 
406  // The 64-bit versions produce 32-bit results, but only on the SALU.
407  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
408  G_CTTZ, G_CTTZ_ZERO_UNDEF,
409  G_CTPOP})
410  .legalFor({{S32, S32}, {S32, S64}})
411  .clampScalar(0, S32, S32)
412  .clampScalar(1, S32, S64)
413  .scalarize(0)
414  .widenScalarToNextPow2(0, 32)
415  .widenScalarToNextPow2(1, 32);
416 
417  // TODO: Expand for > s32
419  .legalFor({S32})
420  .clampScalar(0, S32, S32)
421  .scalarize(0);
422 
423  if (ST.has16BitInsts()) {
424  if (ST.hasVOP3PInsts()) {
425  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
426  .legalFor({S32, S16, V2S16})
427  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
428  .clampMaxNumElements(0, S16, 2)
429  .clampScalar(0, S16, S32)
431  .scalarize(0);
432  } else {
433  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
434  .legalFor({S32, S16})
435  .widenScalarToNextPow2(0)
436  .clampScalar(0, S16, S32)
437  .scalarize(0);
438  }
439  } else {
440  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
441  .legalFor({S32})
442  .clampScalar(0, S32, S32)
444  .scalarize(0);
445  }
446 
447  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
448  return [=](const LegalityQuery &Query) {
449  return Query.Types[TypeIdx0].getSizeInBits() <
450  Query.Types[TypeIdx1].getSizeInBits();
451  };
452  };
453 
454  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
455  return [=](const LegalityQuery &Query) {
456  return Query.Types[TypeIdx0].getSizeInBits() >
457  Query.Types[TypeIdx1].getSizeInBits();
458  };
459  };
460 
461  getActionDefinitionsBuilder(G_INTTOPTR)
462  // List the common cases
463  .legalForCartesianProduct(AddrSpaces64, {S64})
464  .legalForCartesianProduct(AddrSpaces32, {S32})
465  .scalarize(0)
466  // Accept any address space as long as the size matches
467  .legalIf(sameSize(0, 1))
468  .widenScalarIf(smallerThan(1, 0),
469  [](const LegalityQuery &Query) {
470  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
471  })
472  .narrowScalarIf(greaterThan(1, 0),
473  [](const LegalityQuery &Query) {
474  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
475  });
476 
477  getActionDefinitionsBuilder(G_PTRTOINT)
478  // List the common cases
479  .legalForCartesianProduct(AddrSpaces64, {S64})
480  .legalForCartesianProduct(AddrSpaces32, {S32})
481  .scalarize(0)
482  // Accept any address space as long as the size matches
483  .legalIf(sameSize(0, 1))
484  .widenScalarIf(smallerThan(0, 1),
485  [](const LegalityQuery &Query) {
486  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
487  })
488  .narrowScalarIf(
489  greaterThan(0, 1),
490  [](const LegalityQuery &Query) {
491  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
492  });
493 
494  if (ST.hasFlatAddressSpace()) {
495  getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
496  .scalarize(0)
497  .custom();
498  }
499 
500  // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
501  // handle some operations by just promoting the register during
502  // selection. There are also d16 loads on GFX9+ which preserve the high bits.
503  getActionDefinitionsBuilder({G_LOAD, G_STORE})
504  .narrowScalarIf([](const LegalityQuery &Query) {
505  unsigned Size = Query.Types[0].getSizeInBits();
506  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
507  return (Size > 32 && MemSize < Size);
508  },
509  [](const LegalityQuery &Query) {
510  return std::make_pair(0, LLT::scalar(32));
511  })
512  .fewerElementsIf([=](const LegalityQuery &Query) {
513  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
514  return (MemSize == 96) &&
515  Query.Types[0].isVector() &&
516  !ST.hasDwordx3LoadStores();
517  },
518  [=](const LegalityQuery &Query) {
519  return std::make_pair(0, V2S32);
520  })
521  .legalIf([=](const LegalityQuery &Query) {
522  const LLT &Ty0 = Query.Types[0];
523 
524  unsigned Size = Ty0.getSizeInBits();
525  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
526  if (Size < 32 || (Size > 32 && MemSize < Size))
527  return false;
528 
529  if (Ty0.isVector() && Size != MemSize)
530  return false;
531 
532  // TODO: Decompose private loads into 4-byte components.
533  // TODO: Illegal flat loads on SI
534  switch (MemSize) {
535  case 8:
536  case 16:
537  return Size == 32;
538  case 32:
539  case 64:
540  case 128:
541  return true;
542 
543  case 96:
544  return ST.hasDwordx3LoadStores();
545 
546  case 256:
547  case 512:
548  // TODO: Possibly support loads of i256 and i512 . This will require
549  // adding i256 and i512 types to MVT in order for to be able to use
550  // TableGen.
551  // TODO: Add support for other vector types, this will require
552  // defining more value mappings for the new types.
553  return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
554  Ty0.getScalarType().getSizeInBits() == 64);
555 
556  default:
557  return false;
558  }
559  })
560  .clampScalar(0, S32, S64);
561 
562 
563  // FIXME: Handle alignment requirements.
564  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
565  .legalForTypesWithMemDesc({
566  {S32, GlobalPtr, 8, 8},
567  {S32, GlobalPtr, 16, 8},
568  {S32, LocalPtr, 8, 8},
569  {S32, LocalPtr, 16, 8},
570  {S32, PrivatePtr, 8, 8},
571  {S32, PrivatePtr, 16, 8}});
572  if (ST.hasFlatAddressSpace()) {
573  ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
574  {S32, FlatPtr, 16, 8}});
575  }
576 
577  ExtLoads.clampScalar(0, S32, S32)
580  .lower();
581 
582  auto &Atomics = getActionDefinitionsBuilder(
583  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
584  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
585  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
586  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
587  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
588  {S64, GlobalPtr}, {S64, LocalPtr}});
589  if (ST.hasFlatAddressSpace()) {
590  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
591  }
592 
593  // TODO: Pointer types, any 32-bit or 64-bit vector
595  .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
596  GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
597  LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
598  .clampScalar(0, S16, S64)
601  .scalarize(1)
602  .clampMaxNumElements(0, S32, 2)
603  .clampMaxNumElements(0, LocalPtr, 2)
604  .clampMaxNumElements(0, PrivatePtr, 2)
605  .scalarize(0)
607  .legalIf(all(isPointer(0), typeIs(1, S1)));
608 
609  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
610  // be more flexible with the shift amount type.
611  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
612  .legalFor({{S32, S32}, {S64, S32}});
613  if (ST.has16BitInsts()) {
614  if (ST.hasVOP3PInsts()) {
615  Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
616  .clampMaxNumElements(0, S16, 2);
617  } else
618  Shifts.legalFor({{S16, S32}, {S16, S16}});
619 
620  Shifts.clampScalar(1, S16, S32);
621  Shifts.clampScalar(0, S16, S64);
622  Shifts.widenScalarToNextPow2(0, 16);
623  } else {
624  // Make sure we legalize the shift amount type first, as the general
625  // expansion for the shifted type will produce much worse code if it hasn't
626  // been truncated already.
627  Shifts.clampScalar(1, S32, S32);
628  Shifts.clampScalar(0, S32, S64);
629  Shifts.widenScalarToNextPow2(0, 32);
630  }
631  Shifts.scalarize(0);
632 
633  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
634  unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
635  unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
636  unsigned IdxTypeIdx = 2;
637 
639  .customIf([=](const LegalityQuery &Query) {
640  const LLT EltTy = Query.Types[EltTypeIdx];
641  const LLT VecTy = Query.Types[VecTypeIdx];
642  const LLT IdxTy = Query.Types[IdxTypeIdx];
643  return (EltTy.getSizeInBits() == 16 ||
644  EltTy.getSizeInBits() % 32 == 0) &&
645  VecTy.getSizeInBits() % 32 == 0 &&
646  VecTy.getSizeInBits() <= 512 &&
647  IdxTy.getSizeInBits() == 32;
648  })
649  .clampScalar(EltTypeIdx, S32, S64)
650  .clampScalar(VecTypeIdx, S32, S64)
651  .clampScalar(IdxTypeIdx, S32, S32);
652  }
653 
654  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
655  .unsupportedIf([=](const LegalityQuery &Query) {
656  const LLT &EltTy = Query.Types[1].getElementType();
657  return Query.Types[0] != EltTy;
658  });
659 
660  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
661  unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
662  unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
663 
664  // FIXME: Doesn't handle extract of illegal sizes.
666  .legalIf([=](const LegalityQuery &Query) {
667  const LLT BigTy = Query.Types[BigTyIdx];
668  const LLT LitTy = Query.Types[LitTyIdx];
669  return (BigTy.getSizeInBits() % 32 == 0) &&
670  (LitTy.getSizeInBits() % 16 == 0);
671  })
672  .widenScalarIf(
673  [=](const LegalityQuery &Query) {
674  const LLT BigTy = Query.Types[BigTyIdx];
675  return (BigTy.getScalarSizeInBits() < 16);
676  },
678  .widenScalarIf(
679  [=](const LegalityQuery &Query) {
680  const LLT LitTy = Query.Types[LitTyIdx];
681  return (LitTy.getScalarSizeInBits() < 16);
682  },
684  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
685  .widenScalarToNextPow2(BigTyIdx, 32);
686 
687  }
688 
689  getActionDefinitionsBuilder(G_BUILD_VECTOR)
690  .legalForCartesianProduct(AllS32Vectors, {S32})
691  .legalForCartesianProduct(AllS64Vectors, {S64})
692  .clampNumElements(0, V16S32, V16S32)
693  .clampNumElements(0, V2S64, V8S64)
694  .minScalarSameAs(1, 0)
696  .minScalarOrElt(0, S32);
697 
698  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
699  .legalIf(isRegisterType(0));
700 
701  // Merge/Unmerge
702  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
703  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
704  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
705 
706  auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
707  const LLT &Ty = Query.Types[TypeIdx];
708  if (Ty.isVector()) {
709  const LLT &EltTy = Ty.getElementType();
710  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
711  return true;
712  if (!isPowerOf2_32(EltTy.getSizeInBits()))
713  return true;
714  }
715  return false;
716  };
717 
719  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
720  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
721  // worth considering the multiples of 64 since 2*192 and 2*384 are not
722  // valid.
723  .clampScalar(LitTyIdx, S16, S256)
724  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
725 
726  // Break up vectors with weird elements into scalars
728  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
729  scalarize(0))
730  .fewerElementsIf(
731  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
732  scalarize(1))
733  .clampScalar(BigTyIdx, S32, S512)
734  .widenScalarIf(
735  [=](const LegalityQuery &Query) {
736  const LLT &Ty = Query.Types[BigTyIdx];
737  return !isPowerOf2_32(Ty.getSizeInBits()) &&
738  Ty.getSizeInBits() % 16 != 0;
739  },
740  [=](const LegalityQuery &Query) {
741  // Pick the next power of 2, or a multiple of 64 over 128.
742  // Whichever is smaller.
743  const LLT &Ty = Query.Types[BigTyIdx];
744  unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
745  if (NewSizeInBits >= 256) {
746  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
747  if (RoundedTo < NewSizeInBits)
748  NewSizeInBits = RoundedTo;
749  }
750  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
751  })
752  .legalIf([=](const LegalityQuery &Query) {
753  const LLT &BigTy = Query.Types[BigTyIdx];
754  const LLT &LitTy = Query.Types[LitTyIdx];
755 
756  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
757  return false;
758  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
759  return false;
760 
761  return BigTy.getSizeInBits() % 16 == 0 &&
762  LitTy.getSizeInBits() % 16 == 0 &&
763  BigTy.getSizeInBits() <= 512;
764  })
765  // Any vectors left are the wrong size. Scalarize them.
766  .scalarize(0)
767  .scalarize(1);
768  }
769 
770  computeTables();
771  verify(*ST.getInstrInfo());
772 }
773 
776  MachineIRBuilder &MIRBuilder,
777  GISelChangeObserver &Observer) const {
778  switch (MI.getOpcode()) {
779  case TargetOpcode::G_ADDRSPACE_CAST:
780  return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
781  case TargetOpcode::G_FRINT:
782  return legalizeFrint(MI, MRI, MIRBuilder);
783  case TargetOpcode::G_FCEIL:
784  return legalizeFceil(MI, MRI, MIRBuilder);
785  case TargetOpcode::G_INTRINSIC_TRUNC:
786  return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
787  case TargetOpcode::G_SITOFP:
788  return legalizeITOFP(MI, MRI, MIRBuilder, true);
789  case TargetOpcode::G_UITOFP:
790  return legalizeITOFP(MI, MRI, MIRBuilder, false);
791  case TargetOpcode::G_FMINNUM:
792  case TargetOpcode::G_FMAXNUM:
793  case TargetOpcode::G_FMINNUM_IEEE:
794  case TargetOpcode::G_FMAXNUM_IEEE:
795  return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
796  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
797  return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
798  case TargetOpcode::G_INSERT_VECTOR_ELT:
799  return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
800  default:
801  return false;
802  }
803 
804  llvm_unreachable("expected switch to return");
805 }
806 
808  unsigned AS,
810  MachineIRBuilder &MIRBuilder) const {
811  MachineFunction &MF = MIRBuilder.getMF();
812  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
813  const LLT S32 = LLT::scalar(32);
814 
815  if (ST.hasApertureRegs()) {
816  // FIXME: Use inline constants (src_{shared, private}_base) instead of
817  // getreg.
818  unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
821  unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
824  unsigned Encoding =
826  Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
828 
829  Register ApertureReg = MRI.createGenericVirtualRegister(S32);
830  Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
831 
832  MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
833  .addDef(GetReg)
834  .addImm(Encoding);
835  MRI.setType(GetReg, S32);
836 
837  auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
838  MIRBuilder.buildInstr(TargetOpcode::G_SHL)
839  .addDef(ApertureReg)
840  .addUse(GetReg)
841  .addUse(ShiftAmt.getReg(0));
842 
843  return ApertureReg;
844  }
845 
846  Register QueuePtr = MRI.createGenericVirtualRegister(
848 
849  // FIXME: Placeholder until we can track the input registers.
850  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
851 
852  // Offset into amd_queue_t for group_segment_aperture_base_hi /
853  // private_segment_aperture_base_hi.
854  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
855 
856  // FIXME: Don't use undef
860 
861  MachinePointerInfo PtrInfo(V, StructOffset);
863  PtrInfo,
867  4,
868  MinAlign(64, StructOffset));
869 
870  Register LoadResult = MRI.createGenericVirtualRegister(S32);
871  Register LoadAddr;
872 
873  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
874  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
875  return LoadResult;
876 }
877 
880  MachineIRBuilder &MIRBuilder) const {
881  MachineFunction &MF = MIRBuilder.getMF();
882 
883  MIRBuilder.setInstr(MI);
884 
885  Register Dst = MI.getOperand(0).getReg();
886  Register Src = MI.getOperand(1).getReg();
887 
888  LLT DstTy = MRI.getType(Dst);
889  LLT SrcTy = MRI.getType(Src);
890  unsigned DestAS = DstTy.getAddressSpace();
891  unsigned SrcAS = SrcTy.getAddressSpace();
892 
893  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
894  // vector element.
895  assert(!DstTy.isVector());
896 
897  const AMDGPUTargetMachine &TM
898  = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
899 
900  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
901  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
902  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
903  return true;
904  }
905 
906  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
907  assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
908  DestAS == AMDGPUAS::PRIVATE_ADDRESS);
909  unsigned NullVal = TM.getNullPointerValue(DestAS);
910 
911  auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
912  auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
913 
914  Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
915 
916  // Extract low 32-bits of the pointer.
917  MIRBuilder.buildExtract(PtrLo32, Src, 0);
918 
920  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
921  MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
922 
923  MI.eraseFromParent();
924  return true;
925  }
926 
927  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
928  SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
929 
930  auto SegmentNull =
931  MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
932  auto FlatNull =
933  MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
934 
935  Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
936 
938  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
939 
940  Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
941 
942  // Coerce the type of the low half of the result so we can use merge_values.
944  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
945  .addDef(SrcAsInt)
946  .addUse(Src);
947 
948  // TODO: Should we allow mismatched types but matching sizes in merges to
949  // avoid the ptrtoint?
950  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
951  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
952 
953  MI.eraseFromParent();
954  return true;
955 }
956 
959  MachineIRBuilder &MIRBuilder) const {
960  MIRBuilder.setInstr(MI);
961 
962  Register Src = MI.getOperand(1).getReg();
963  LLT Ty = MRI.getType(Src);
964  assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
965 
966  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
967  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
968 
969  auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
970  auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
971 
972  // TODO: Should this propagate fast-math-flags?
973  auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
974  auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
975 
976  auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
977  auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
978 
979  auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
980  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
981  return true;
982 }
983 
986  MachineIRBuilder &B) const {
987  B.setInstr(MI);
988 
989  const LLT S1 = LLT::scalar(1);
990  const LLT S64 = LLT::scalar(64);
991 
992  Register Src = MI.getOperand(1).getReg();
993  assert(MRI.getType(Src) == S64);
994 
995  // result = trunc(src)
996  // if (src > 0.0 && src != result)
997  // result += 1.0
998 
999  auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
1000 
1001  const auto Zero = B.buildFConstant(S64, 0.0);
1002  const auto One = B.buildFConstant(S64, 1.0);
1003  auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
1004  auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
1005  auto And = B.buildAnd(S1, Lt0, NeTrunc);
1006  auto Add = B.buildSelect(S64, And, One, Zero);
1007 
1008  // TODO: Should this propagate fast-math-flags?
1009  B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
1010  return true;
1011 }
1012 
1014  MachineIRBuilder &B) {
1015  const unsigned FractBits = 52;
1016  const unsigned ExpBits = 11;
1017  LLT S32 = LLT::scalar(32);
1018 
1019  auto Const0 = B.buildConstant(S32, FractBits - 32);
1020  auto Const1 = B.buildConstant(S32, ExpBits);
1021 
1022  auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
1023  .addUse(Const0.getReg(0))
1024  .addUse(Const1.getReg(0));
1025 
1026  return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
1027 }
1028 
1031  MachineIRBuilder &B) const {
1032  B.setInstr(MI);
1033 
1034  const LLT S1 = LLT::scalar(1);
1035  const LLT S32 = LLT::scalar(32);
1036  const LLT S64 = LLT::scalar(64);
1037 
1038  Register Src = MI.getOperand(1).getReg();
1039  assert(MRI.getType(Src) == S64);
1040 
1041  // TODO: Should this use extract since the low half is unused?
1042  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1043  Register Hi = Unmerge.getReg(1);
1044 
1045  // Extract the upper half, since this is where we will find the sign and
1046  // exponent.
1047  auto Exp = extractF64Exponent(Hi, B);
1048 
1049  const unsigned FractBits = 52;
1050 
1051  // Extract the sign bit.
1052  const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
1053  auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
1054 
1055  const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
1056 
1057  const auto Zero32 = B.buildConstant(S32, 0);
1058 
1059  // Extend back to 64-bits.
1060  auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
1061 
1062  auto Shr = B.buildAShr(S64, FractMask, Exp);
1063  auto Not = B.buildNot(S64, Shr);
1064  auto Tmp0 = B.buildAnd(S64, Src, Not);
1065  auto FiftyOne = B.buildConstant(S32, FractBits - 1);
1066 
1067  auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
1068  auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
1069 
1070  auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
1071  B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
1072  return true;
1073 }
1074 
1077  MachineIRBuilder &B, bool Signed) const {
1078  B.setInstr(MI);
1079 
1080  Register Dst = MI.getOperand(0).getReg();
1081  Register Src = MI.getOperand(1).getReg();
1082 
1083  const LLT S64 = LLT::scalar(64);
1084  const LLT S32 = LLT::scalar(32);
1085 
1086  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
1087 
1088  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1089 
1090  auto CvtHi = Signed ?
1091  B.buildSITOFP(S64, Unmerge.getReg(1)) :
1092  B.buildUITOFP(S64, Unmerge.getReg(1));
1093 
1094  auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
1095 
1096  auto ThirtyTwo = B.buildConstant(S32, 32);
1097  auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
1098  .addUse(CvtHi.getReg(0))
1099  .addUse(ThirtyTwo.getReg(0));
1100 
1101  // TODO: Should this propagate fast-math-flags?
1102  B.buildFAdd(Dst, LdExp, CvtLo);
1103  MI.eraseFromParent();
1104  return true;
1105 }
1106 
1109  MachineIRBuilder &B) const {
1110  MachineFunction &MF = B.getMF();
1112 
1113  const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
1114  MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
1115 
1116  // With ieee_mode disabled, the instructions have the correct behavior
1117  // already for G_FMINNUM/G_FMAXNUM
1118  if (!MFI->getMode().IEEE)
1119  return !IsIEEEOp;
1120 
1121  if (IsIEEEOp)
1122  return true;
1123 
1124  MachineIRBuilder HelperBuilder(MI);
1125  GISelObserverWrapper DummyObserver;
1126  LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1127  HelperBuilder.setMBB(*MI.getParent());
1128  return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
1129 }
1130 
1133  MachineIRBuilder &B) const {
1134  // TODO: Should move some of this into LegalizerHelper.
1135 
1136  // TODO: Promote dynamic indexing of s16 to s32
1137  // TODO: Dynamic s64 indexing is only legal for SGPR.
1139  if (!IdxVal) // Dynamic case will be selected to register indexing.
1140  return true;
1141 
1142  Register Dst = MI.getOperand(0).getReg();
1143  Register Vec = MI.getOperand(1).getReg();
1144 
1145  LLT VecTy = MRI.getType(Vec);
1146  LLT EltTy = VecTy.getElementType();
1147  assert(EltTy == MRI.getType(Dst));
1148 
1149  B.setInstr(MI);
1150 
1151  if (IdxVal.getValue() < VecTy.getNumElements())
1152  B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
1153  else
1154  B.buildUndef(Dst);
1155 
1156  MI.eraseFromParent();
1157  return true;
1158 }
1159 
1162  MachineIRBuilder &B) const {
1163  // TODO: Should move some of this into LegalizerHelper.
1164 
1165  // TODO: Promote dynamic indexing of s16 to s32
1166  // TODO: Dynamic s64 indexing is only legal for SGPR.
1168  if (!IdxVal) // Dynamic case will be selected to register indexing.
1169  return true;
1170 
1171  Register Dst = MI.getOperand(0).getReg();
1172  Register Vec = MI.getOperand(1).getReg();
1173  Register Ins = MI.getOperand(2).getReg();
1174 
1175  LLT VecTy = MRI.getType(Vec);
1176  LLT EltTy = VecTy.getElementType();
1177  assert(EltTy == MRI.getType(Ins));
1178 
1179  B.setInstr(MI);
1180 
1181  if (IdxVal.getValue() < VecTy.getNumElements())
1182  B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
1183  else
1184  B.buildUndef(Dst);
1185 
1186  MI.eraseFromParent();
1187  return true;
1188 }
1189 
1190 // Return the use branch instruction, otherwise null if the usage is invalid.
1193  Register CondDef = MI.getOperand(0).getReg();
1194  if (!MRI.hasOneNonDBGUse(CondDef))
1195  return nullptr;
1196 
1197  MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
1198  return UseMI.getParent() == MI.getParent() &&
1199  UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
1200 }
1201 
1203  Register Reg, LLT Ty) const {
1204  Register LiveIn = MRI.getLiveInVirtReg(Reg);
1205  if (LiveIn)
1206  return LiveIn;
1207 
1208  Register NewReg = MRI.createGenericVirtualRegister(Ty);
1209  MRI.addLiveIn(Reg, NewReg);
1210  return NewReg;
1211 }
1212 
1214  const ArgDescriptor *Arg) const {
1215  if (!Arg->isRegister())
1216  return false; // TODO: Handle these
1217 
1218  assert(Arg->getRegister() != 0);
1219  assert(Arg->getRegister().isPhysical());
1220 
1221  MachineRegisterInfo &MRI = *B.getMRI();
1222 
1223  LLT Ty = MRI.getType(DstReg);
1224  Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
1225 
1226  if (Arg->isMasked()) {
1227  // TODO: Should we try to emit this once in the entry block?
1228  const LLT S32 = LLT::scalar(32);
1229  const unsigned Mask = Arg->getMask();
1230  const unsigned Shift = countTrailingZeros<unsigned>(Mask);
1231 
1232  auto ShiftAmt = B.buildConstant(S32, Shift);
1233  auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
1234  B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
1235  } else
1236  B.buildCopy(DstReg, LiveIn);
1237 
1238  // Insert the argument copy if it doens't already exist.
1239  // FIXME: It seems EmitLiveInCopies isn't called anywhere?
1240  if (!MRI.getVRegDef(LiveIn)) {
1241  MachineBasicBlock &EntryMBB = B.getMF().front();
1242  EntryMBB.addLiveIn(Arg->getRegister());
1243  B.setInsertPt(EntryMBB, EntryMBB.begin());
1244  B.buildCopy(LiveIn, Arg->getRegister());
1245  }
1246 
1247  return true;
1248 }
1249 
1251  MachineInstr &MI,
1254  AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
1255  B.setInstr(MI);
1256 
1258 
1259  const ArgDescriptor *Arg;
1260  const TargetRegisterClass *RC;
1261  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
1262  if (!Arg) {
1263  LLVM_DEBUG(dbgs() << "Required arg register missing\n");
1264  return false;
1265  }
1266 
1267  if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
1268  MI.eraseFromParent();
1269  return true;
1270  }
1271 
1272  return false;
1273 }
1274 
1277  MachineIRBuilder &B) const {
1279  if (!MFI->isEntryFunction()) {
1280  return legalizePreloadedArgIntrin(MI, MRI, B,
1282  }
1283 
1284  B.setInstr(MI);
1285 
1286  uint64_t Offset =
1287  ST.getTargetLowering()->getImplicitParameterOffset(
1289  Register DstReg = MI.getOperand(0).getReg();
1290  LLT DstTy = MRI.getType(DstReg);
1291  LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
1292 
1293  const ArgDescriptor *Arg;
1294  const TargetRegisterClass *RC;
1295  std::tie(Arg, RC)
1297  if (!Arg)
1298  return false;
1299 
1300  Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
1301  if (!loadInputValue(KernargPtrReg, B, Arg))
1302  return false;
1303 
1304  B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
1305  MI.eraseFromParent();
1306  return true;
1307 }
1308 
1311  MachineIRBuilder &B) const {
1312  // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
1313  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1314  case Intrinsic::amdgcn_if: {
1315  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1316  const SIRegisterInfo *TRI
1317  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1318 
1319  B.setInstr(*BrCond);
1320  Register Def = MI.getOperand(1).getReg();
1321  Register Use = MI.getOperand(3).getReg();
1322  B.buildInstr(AMDGPU::SI_IF)
1323  .addDef(Def)
1324  .addUse(Use)
1325  .addMBB(BrCond->getOperand(1).getMBB());
1326 
1327  MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
1328  MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
1329  MI.eraseFromParent();
1330  BrCond->eraseFromParent();
1331  return true;
1332  }
1333 
1334  return false;
1335  }
1336  case Intrinsic::amdgcn_loop: {
1337  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1338  const SIRegisterInfo *TRI
1339  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1340 
1341  B.setInstr(*BrCond);
1342  Register Reg = MI.getOperand(2).getReg();
1343  B.buildInstr(AMDGPU::SI_LOOP)
1344  .addUse(Reg)
1345  .addMBB(BrCond->getOperand(1).getMBB());
1346  MI.eraseFromParent();
1347  BrCond->eraseFromParent();
1348  MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
1349  return true;
1350  }
1351 
1352  return false;
1353  }
1354  case Intrinsic::amdgcn_kernarg_segment_ptr:
1357  case Intrinsic::amdgcn_implicitarg_ptr:
1358  return legalizeImplicitArgPtr(MI, MRI, B);
1359  case Intrinsic::amdgcn_workitem_id_x:
1360  return legalizePreloadedArgIntrin(MI, MRI, B,
1362  case Intrinsic::amdgcn_workitem_id_y:
1363  return legalizePreloadedArgIntrin(MI, MRI, B,
1365  case Intrinsic::amdgcn_workitem_id_z:
1366  return legalizePreloadedArgIntrin(MI, MRI, B,
1368  case Intrinsic::amdgcn_workgroup_id_x:
1369  return legalizePreloadedArgIntrin(MI, MRI, B,
1371  case Intrinsic::amdgcn_workgroup_id_y:
1372  return legalizePreloadedArgIntrin(MI, MRI, B,
1374  case Intrinsic::amdgcn_workgroup_id_z:
1375  return legalizePreloadedArgIntrin(MI, MRI, B,
1377  case Intrinsic::amdgcn_dispatch_ptr:
1378  return legalizePreloadedArgIntrin(MI, MRI, B,
1380  case Intrinsic::amdgcn_queue_ptr:
1381  return legalizePreloadedArgIntrin(MI, MRI, B,
1383  case Intrinsic::amdgcn_implicit_buffer_ptr:
1386  case Intrinsic::amdgcn_dispatch_id:
1387  return legalizePreloadedArgIntrin(MI, MRI, B,
1389  default:
1390  return true;
1391  }
1392 
1393  return true;
1394 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg) const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:266
Register getRegister() const
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
bool hasApertureRegs() const
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getScalarSizeInBits() const
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx)
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
unsigned Reg
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
LLT getScalarType() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
unsigned const TargetRegisterInfo * TRI
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
static LegalizeMutation oneMoreElement(unsigned TypeIdx)
LegalityPredicate isPointer(unsigned TypeIdx)
True iff the specified type index is a pointer (with any address space).
static LegalityPredicate isRegisterType(unsigned TypeIdx)
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
AMDGPU::SIModeRegisterDefaults getMode() const
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
static MachineInstr * verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI)
LegalizeRuleSet & custom()
Unconditionally custom lower.
bool isVector() const
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT &Ty)
Ensure the scalar or element is at least as wide as Ty.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
void verify(const MCInstrInfo &MII) const
Perform simple self-diagnostic and assert if there is anything obviously wrong with the actions set u...
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FSUB Op0, Op1.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:32
Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
unsigned getPointerSizeInBits(unsigned AS) const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, bool Signed) const
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
static MachineInstrBuilder extractF64Exponent(unsigned Hi, MachineIRBuilder &B)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
bool legalizePreloadedArgIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const
MachineInstrBuilder & UseMI
Helper class to build MachineInstr.
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FADD Op0, Op1.
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size)
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the number of elements for the given vectors to at least MinTy&#39;s number of elements and at most...
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
unsigned getAddressSpace() const
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
std::function< std::pair< unsigned, LLT >(const LegalityQuery &)> LegalizeMutation
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
const MachineBasicBlock & front() const
static LegalityPredicate isMultiple32(unsigned TypeIdx, unsigned MaxSize=512)
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
This class contains a discriminated union of information about pointers in memory operands...
LegalizeRuleSet & unsupportedIfMemSizeNotPow2()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
signed greater than
Definition: InstrTypes.h:759
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
static unsigned getIntrinsicID(const SDNode *N)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
signed less than
Definition: InstrTypes.h:761
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override
Return true if MI is either legal or has been legalized and false if not legal.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
Address space for local memory.
Definition: AMDGPU.h:270
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:210
bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Address space for flat memory.
Definition: AMDGPU.h:265
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
Instruction has been legalized and the MachineFunction changed.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx)
LegalizeRuleSet & lower()
The instruction is lowered.
ArrayRef< LLT > Types
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
The memory access always returns the same value (or traps).
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
uint32_t Size
Definition: Profile.cpp:46
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
Address space for constant memory (VTX2).
Definition: AMDGPU.h:269
const TargetRegisterClass * getWaveMaskRegClass() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
LLVM Value Representation.
Definition: Value.h:72
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
std::function< bool(const LegalityQuery &)> LegalityPredicate
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
static LegalityPredicate numElementsNotEven(unsigned TypeIdx)
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
Address space for private memory.
Definition: AMDGPU.h:271
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
const SITargetLowering * getTargetLowering() const override
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
void setAction(const InstrAspect &Aspect, LegalizeAction Action)
More friendly way to set an action for common types that have an LLT representation.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the specified type indices are both the same bit size.