LLVM  15.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // AMDGPU target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPUInstrInfo.h"
19 #include "GCNSubtarget.h"
20 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "AMDGPUtti"
26 
27 namespace {
28 
29 struct AMDGPUImageDMaskIntrinsic {
30  unsigned Intr;
31 };
32 
33 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
34 #include "InstCombineTables.inc"
35 
36 } // end anonymous namespace
37 
38 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
39 //
40 // A single NaN input is folded to minnum, so we rely on that folding for
41 // handling NaNs.
42 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
43  const APFloat &Src2) {
44  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
45 
46  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
47  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
48  if (Cmp0 == APFloat::cmpEqual)
49  return maxnum(Src1, Src2);
50 
51  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
52  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
53  if (Cmp1 == APFloat::cmpEqual)
54  return maxnum(Src0, Src2);
55 
56  return maxnum(Src0, Src1);
57 }
58 
59 // Check if a value can be converted to a 16-bit value without losing
60 // precision.
61 // The value is expected to be either a float (IsFloat = true) or an unsigned
62 // integer (IsFloat = false).
63 static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
64  Type *VTy = V.getType();
65  if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
66  // The value is already 16-bit, so we don't want to convert to 16-bit again!
67  return false;
68  }
69  if (IsFloat) {
70  if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
71  // We need to check that if we cast the index down to a half, we do not
72  // lose precision.
73  APFloat FloatValue(ConstFloat->getValueAPF());
74  bool LosesInfo = true;
76  &LosesInfo);
77  return !LosesInfo;
78  }
79  } else {
80  if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
81  // We need to check that if we cast the index down to an i16, we do not
82  // lose precision.
83  APInt IntValue(ConstInt->getValue());
84  return IntValue.getActiveBits() <= 16;
85  }
86  }
87 
88  Value *CastSrc;
89  bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
90  : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
91  if (IsExt) {
92  Type *CastSrcTy = CastSrc->getType();
93  if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
94  return true;
95  }
96 
97  return false;
98 }
99 
100 // Convert a value to 16-bit.
102  Type *VTy = V.getType();
103  if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
104  return cast<Instruction>(&V)->getOperand(0);
105  if (VTy->isIntegerTy())
106  return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
107  if (VTy->isFloatingPointTy())
108  return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
109 
110  llvm_unreachable("Should never be called!");
111 }
112 
113 /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
114 /// modified arguments (based on OldIntr) and replaces InstToReplace with
115 /// this newly created intrinsic call.
117  IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
118  InstCombiner &IC,
120  Func) {
121  SmallVector<Type *, 4> ArgTys;
122  if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
123  return None;
124 
125  SmallVector<Value *, 8> Args(OldIntr.args());
126 
127  // Modify arguments and types
128  Func(Args, ArgTys);
129 
130  Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
131 
132  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
133  NewCall->takeName(&OldIntr);
134  NewCall->copyMetadata(OldIntr);
135  if (isa<FPMathOperator>(NewCall))
136  NewCall->copyFastMathFlags(&OldIntr);
137 
138  // Erase and replace uses
139  if (!InstToReplace.getType()->isVoidTy())
140  IC.replaceInstUsesWith(InstToReplace, NewCall);
141 
142  bool RemoveOldIntr = &OldIntr != &InstToReplace;
143 
144  auto RetValue = IC.eraseInstFromFunction(InstToReplace);
145  if (RemoveOldIntr)
146  IC.eraseInstFromFunction(OldIntr);
147 
148  return RetValue;
149 }
150 
153  const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
154  IntrinsicInst &II, InstCombiner &IC) {
155  // Optimize _L to _LZ when _L is zero
156  if (const auto *LZMappingInfo =
157  AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
158  if (auto *ConstantLod =
159  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
160  if (ConstantLod->isZero() || ConstantLod->isNegative()) {
161  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
163  ImageDimIntr->Dim);
164  return modifyIntrinsicCall(
165  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
166  Args.erase(Args.begin() + ImageDimIntr->LodIndex);
167  });
168  }
169  }
170  }
171 
172  // Optimize _mip away, when 'lod' is zero
173  if (const auto *MIPMappingInfo =
174  AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
175  if (auto *ConstantMip =
176  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
177  if (ConstantMip->isZero()) {
178  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
179  AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
180  ImageDimIntr->Dim);
181  return modifyIntrinsicCall(
182  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
183  Args.erase(Args.begin() + ImageDimIntr->MipIndex);
184  });
185  }
186  }
187  }
188 
189  // Optimize _bias away when 'bias' is zero
190  if (const auto *BiasMappingInfo =
191  AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
192  if (auto *ConstantBias =
193  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
194  if (ConstantBias->isZero()) {
195  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
196  AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
197  ImageDimIntr->Dim);
198  return modifyIntrinsicCall(
199  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
200  Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
201  ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
202  });
203  }
204  }
205  }
206 
207  // Optimize _offset away when 'offset' is zero
208  if (const auto *OffsetMappingInfo =
209  AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
210  if (auto *ConstantOffset =
211  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
212  if (ConstantOffset->isZero()) {
213  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
215  OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
216  return modifyIntrinsicCall(
217  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
218  Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
219  });
220  }
221  }
222  }
223 
224  // Try to use D16
225  if (ST->hasD16Images()) {
226 
227  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
228  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
229 
230  if (BaseOpcode->HasD16) {
231 
232  // If the only use of image intrinsic is a fptrunc (with conversion to
233  // half) then both fptrunc and image intrinsic will be replaced with image
234  // intrinsic with D16 flag.
235  if (II.hasOneUse()) {
236  Instruction *User = II.user_back();
237 
238  if (User->getOpcode() == Instruction::FPTrunc &&
239  User->getType()->getScalarType()->isHalfTy()) {
240 
241  return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
242  [&](auto &Args, auto &ArgTys) {
243  // Change return type of image intrinsic.
244  // Set it to return type of fptrunc.
245  ArgTys[0] = User->getType();
246  });
247  }
248  }
249  }
250  }
251 
252  // Try to use A16 or G16
253  if (!ST->hasA16() && !ST->hasG16())
254  return None;
255 
256  // Address is interpreted as float if the instruction has a sampler or as
257  // unsigned int if there is no sampler.
258  bool HasSampler =
259  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
260  bool FloatCoord = false;
261  // true means derivatives can be converted to 16 bit, coordinates not
262  bool OnlyDerivatives = false;
263 
264  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
265  OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
266  Value *Coord = II.getOperand(OperandIndex);
267  // If the values are not derived from 16-bit values, we cannot optimize.
268  if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
269  if (OperandIndex < ImageDimIntr->CoordStart ||
270  ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
271  return None;
272  }
273  // All gradients can be converted, so convert only them
274  OnlyDerivatives = true;
275  break;
276  }
277 
278  assert(OperandIndex == ImageDimIntr->GradientStart ||
279  FloatCoord == Coord->getType()->isFloatingPointTy());
280  FloatCoord = Coord->getType()->isFloatingPointTy();
281  }
282 
283  if (!OnlyDerivatives && !ST->hasA16())
284  OnlyDerivatives = true; // Only supports G16
285 
286  // Check if there is a bias parameter and if it can be converted to f16
287  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
288  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
289  assert(HasSampler &&
290  "Only image instructions with a sampler can have a bias");
291  if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
292  OnlyDerivatives = true;
293  }
294 
295  if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
296  ImageDimIntr->CoordStart))
297  return None;
298 
299  Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
301 
302  return modifyIntrinsicCall(
303  II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
304  ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
305  if (!OnlyDerivatives) {
306  ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
307 
308  // Change the bias type
309  if (ImageDimIntr->NumBiasArgs != 0)
310  ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
311  }
312 
313  unsigned EndIndex =
314  OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
315  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
316  OperandIndex < EndIndex; OperandIndex++) {
317  Args[OperandIndex] =
318  convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
319  }
320 
321  // Convert the bias
322  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
323  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
324  Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
325  }
326  });
327 }
328 
329 bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
330  InstCombiner &IC) const {
331  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
332  // infinity, gives +0.0. If we can prove we don't have one of the special
333  // cases then we can use a normal multiply instead.
334  // TODO: Create and use isKnownFiniteNonZero instead of just matching
335  // constants here.
336  if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
338  // One operand is not zero or infinity or NaN.
339  return true;
340  }
341  auto *TLI = &IC.getTargetLibraryInfo();
342  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
343  isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
344  // Neither operand is infinity or NaN.
345  return true;
346  }
347  return false;
348 }
349 
352  Intrinsic::ID IID = II.getIntrinsicID();
353  switch (IID) {
354  case Intrinsic::amdgcn_rcp: {
355  Value *Src = II.getArgOperand(0);
356 
357  // TODO: Move to ConstantFolding/InstSimplify?
358  if (isa<UndefValue>(Src)) {
359  Type *Ty = II.getType();
360  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
361  return IC.replaceInstUsesWith(II, QNaN);
362  }
363 
364  if (II.isStrictFP())
365  break;
366 
367  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
368  const APFloat &ArgVal = C->getValueAPF();
369  APFloat Val(ArgVal.getSemantics(), 1);
371 
372  // This is more precise than the instruction may give.
373  //
374  // TODO: The instruction always flushes denormal results (except for f16),
375  // should this also?
376  return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
377  }
378 
379  break;
380  }
381  case Intrinsic::amdgcn_rsq: {
382  Value *Src = II.getArgOperand(0);
383 
384  // TODO: Move to ConstantFolding/InstSimplify?
385  if (isa<UndefValue>(Src)) {
386  Type *Ty = II.getType();
387  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
388  return IC.replaceInstUsesWith(II, QNaN);
389  }
390 
391  break;
392  }
393  case Intrinsic::amdgcn_frexp_mant:
394  case Intrinsic::amdgcn_frexp_exp: {
395  Value *Src = II.getArgOperand(0);
396  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
397  int Exp;
398  APFloat Significand =
399  frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
400 
401  if (IID == Intrinsic::amdgcn_frexp_mant) {
402  return IC.replaceInstUsesWith(
403  II, ConstantFP::get(II.getContext(), Significand));
404  }
405 
406  // Match instruction special case behavior.
407  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
408  Exp = 0;
409 
410  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
411  }
412 
413  if (isa<UndefValue>(Src)) {
414  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
415  }
416 
417  break;
418  }
419  case Intrinsic::amdgcn_class: {
420  enum {
421  S_NAN = 1 << 0, // Signaling NaN
422  Q_NAN = 1 << 1, // Quiet NaN
423  N_INFINITY = 1 << 2, // Negative infinity
424  N_NORMAL = 1 << 3, // Negative normal
425  N_SUBNORMAL = 1 << 4, // Negative subnormal
426  N_ZERO = 1 << 5, // Negative zero
427  P_ZERO = 1 << 6, // Positive zero
428  P_SUBNORMAL = 1 << 7, // Positive subnormal
429  P_NORMAL = 1 << 8, // Positive normal
430  P_INFINITY = 1 << 9 // Positive infinity
431  };
432 
433  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
436 
437  Value *Src0 = II.getArgOperand(0);
438  Value *Src1 = II.getArgOperand(1);
439  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
440  if (!CMask) {
441  if (isa<UndefValue>(Src0)) {
442  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
443  }
444 
445  if (isa<UndefValue>(Src1)) {
446  return IC.replaceInstUsesWith(II,
447  ConstantInt::get(II.getType(), false));
448  }
449  break;
450  }
451 
452  uint32_t Mask = CMask->getZExtValue();
453 
454  // If all tests are made, it doesn't matter what the value is.
455  if ((Mask & FullMask) == FullMask) {
456  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
457  }
458 
459  if ((Mask & FullMask) == 0) {
460  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
461  }
462 
463  if (Mask == (S_NAN | Q_NAN)) {
464  // Equivalent of isnan. Replace with standard fcmp.
465  Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
466  FCmp->takeName(&II);
467  return IC.replaceInstUsesWith(II, FCmp);
468  }
469 
470  if (Mask == (N_ZERO | P_ZERO)) {
471  // Equivalent of == 0.
472  Value *FCmp =
473  IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
474 
475  FCmp->takeName(&II);
476  return IC.replaceInstUsesWith(II, FCmp);
477  }
478 
479  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
480  if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
481  isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
482  return IC.replaceOperand(
483  II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
484  }
485 
486  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
487  if (!CVal) {
488  if (isa<UndefValue>(Src0)) {
489  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
490  }
491 
492  // Clamp mask to used bits
493  if ((Mask & FullMask) != Mask) {
494  CallInst *NewCall = IC.Builder.CreateCall(
495  II.getCalledFunction(),
496  {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
497 
498  NewCall->takeName(&II);
499  return IC.replaceInstUsesWith(II, NewCall);
500  }
501 
502  break;
503  }
504 
505  const APFloat &Val = CVal->getValueAPF();
506 
507  bool Result =
508  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
509  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
510  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
511  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
512  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
513  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
514  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
515  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
516  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
517  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
518 
519  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
520  }
521  case Intrinsic::amdgcn_cvt_pkrtz: {
522  Value *Src0 = II.getArgOperand(0);
523  Value *Src1 = II.getArgOperand(1);
524  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
525  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
526  const fltSemantics &HalfSem =
528  bool LosesInfo;
529  APFloat Val0 = C0->getValueAPF();
530  APFloat Val1 = C1->getValueAPF();
531  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
532  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
533 
534  Constant *Folded =
536  ConstantFP::get(II.getContext(), Val1)});
537  return IC.replaceInstUsesWith(II, Folded);
538  }
539  }
540 
541  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
542  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
543  }
544 
545  break;
546  }
547  case Intrinsic::amdgcn_cvt_pknorm_i16:
548  case Intrinsic::amdgcn_cvt_pknorm_u16:
549  case Intrinsic::amdgcn_cvt_pk_i16:
550  case Intrinsic::amdgcn_cvt_pk_u16: {
551  Value *Src0 = II.getArgOperand(0);
552  Value *Src1 = II.getArgOperand(1);
553 
554  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
555  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
556  }
557 
558  break;
559  }
560  case Intrinsic::amdgcn_ubfe:
561  case Intrinsic::amdgcn_sbfe: {
562  // Decompose simple cases into standard shifts.
563  Value *Src = II.getArgOperand(0);
564  if (isa<UndefValue>(Src)) {
565  return IC.replaceInstUsesWith(II, Src);
566  }
567 
568  unsigned Width;
569  Type *Ty = II.getType();
570  unsigned IntSize = Ty->getIntegerBitWidth();
571 
572  ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
573  if (CWidth) {
574  Width = CWidth->getZExtValue();
575  if ((Width & (IntSize - 1)) == 0) {
577  }
578 
579  // Hardware ignores high bits, so remove those.
580  if (Width >= IntSize) {
581  return IC.replaceOperand(
582  II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
583  }
584  }
585 
586  unsigned Offset;
587  ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
588  if (COffset) {
589  Offset = COffset->getZExtValue();
590  if (Offset >= IntSize) {
591  return IC.replaceOperand(
592  II, 1,
593  ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
594  }
595  }
596 
597  bool Signed = IID == Intrinsic::amdgcn_sbfe;
598 
599  if (!CWidth || !COffset)
600  break;
601 
602  // The case of Width == 0 is handled above, which makes this transformation
603  // safe. If Width == 0, then the ashr and lshr instructions become poison
604  // value since the shift amount would be equal to the bit size.
605  assert(Width != 0);
606 
607  // TODO: This allows folding to undef when the hardware has specific
608  // behavior?
609  if (Offset + Width < IntSize) {
610  Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
611  Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
612  : IC.Builder.CreateLShr(Shl, IntSize - Width);
613  RightShift->takeName(&II);
614  return IC.replaceInstUsesWith(II, RightShift);
615  }
616 
617  Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
618  : IC.Builder.CreateLShr(Src, Offset);
619 
620  RightShift->takeName(&II);
621  return IC.replaceInstUsesWith(II, RightShift);
622  }
623  case Intrinsic::amdgcn_exp:
624  case Intrinsic::amdgcn_exp_row:
625  case Intrinsic::amdgcn_exp_compr: {
626  ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
627  unsigned EnBits = En->getZExtValue();
628  if (EnBits == 0xf)
629  break; // All inputs enabled.
630 
631  bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
632  bool Changed = false;
633  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
634  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
635  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
636  Value *Src = II.getArgOperand(I + 2);
637  if (!isa<UndefValue>(Src)) {
638  IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
639  Changed = true;
640  }
641  }
642  }
643 
644  if (Changed) {
645  return &II;
646  }
647 
648  break;
649  }
650  case Intrinsic::amdgcn_fmed3: {
651  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
652  // for the shader.
653 
654  Value *Src0 = II.getArgOperand(0);
655  Value *Src1 = II.getArgOperand(1);
656  Value *Src2 = II.getArgOperand(2);
657 
658  // Checking for NaN before canonicalization provides better fidelity when
659  // mapping other operations onto fmed3 since the order of operands is
660  // unchanged.
661  CallInst *NewCall = nullptr;
662  if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
663  NewCall = IC.Builder.CreateMinNum(Src1, Src2);
664  } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
665  NewCall = IC.Builder.CreateMinNum(Src0, Src2);
666  } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
667  NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
668  }
669 
670  if (NewCall) {
671  NewCall->copyFastMathFlags(&II);
672  NewCall->takeName(&II);
673  return IC.replaceInstUsesWith(II, NewCall);
674  }
675 
676  bool Swap = false;
677  // Canonicalize constants to RHS operands.
678  //
679  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
680  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
681  std::swap(Src0, Src1);
682  Swap = true;
683  }
684 
685  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
686  std::swap(Src1, Src2);
687  Swap = true;
688  }
689 
690  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
691  std::swap(Src0, Src1);
692  Swap = true;
693  }
694 
695  if (Swap) {
696  II.setArgOperand(0, Src0);
697  II.setArgOperand(1, Src1);
698  II.setArgOperand(2, Src2);
699  return &II;
700  }
701 
702  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
703  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
704  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
705  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
706  C2->getValueAPF());
707  return IC.replaceInstUsesWith(
708  II, ConstantFP::get(IC.Builder.getContext(), Result));
709  }
710  }
711  }
712 
713  break;
714  }
715  case Intrinsic::amdgcn_icmp:
716  case Intrinsic::amdgcn_fcmp: {
717  const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
718  // Guard against invalid arguments.
719  int64_t CCVal = CC->getZExtValue();
720  bool IsInteger = IID == Intrinsic::amdgcn_icmp;
721  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
722  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
723  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
725  break;
726 
727  Value *Src0 = II.getArgOperand(0);
728  Value *Src1 = II.getArgOperand(1);
729 
730  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
731  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
732  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
733  if (CCmp->isNullValue()) {
734  return IC.replaceInstUsesWith(
735  II, ConstantExpr::getSExt(CCmp, II.getType()));
736  }
737 
738  // The result of V_ICMP/V_FCMP assembly instructions (which this
739  // intrinsic exposes) is one bit per thread, masked with the EXEC
740  // register (which contains the bitmask of live threads). So a
741  // comparison that always returns true is the same as a read of the
742  // EXEC register.
744  II.getModule(), Intrinsic::read_register, II.getType());
745  Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
746  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
747  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
748  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
750  NewCall->takeName(&II);
751  return IC.replaceInstUsesWith(II, NewCall);
752  }
753 
754  // Canonicalize constants to RHS.
755  CmpInst::Predicate SwapPred =
757  II.setArgOperand(0, Src1);
758  II.setArgOperand(1, Src0);
759  II.setArgOperand(
760  2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
761  return &II;
762  }
763 
764  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
765  break;
766 
767  // Canonicalize compare eq with true value to compare != 0
768  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
769  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
770  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
771  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
772  Value *ExtSrc;
773  if (CCVal == CmpInst::ICMP_EQ &&
774  ((match(Src1, PatternMatch::m_One()) &&
775  match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
776  (match(Src1, PatternMatch::m_AllOnes()) &&
777  match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
778  ExtSrc->getType()->isIntegerTy(1)) {
780  IC.replaceOperand(II, 2,
782  return &II;
783  }
784 
785  CmpInst::Predicate SrcPred;
786  Value *SrcLHS;
787  Value *SrcRHS;
788 
789  // Fold compare eq/ne with 0 from a compare result as the predicate to the
790  // intrinsic. The typical use is a wave vote function in the library, which
791  // will be fed from a user code condition compared with 0. Fold in the
792  // redundant compare.
793 
794  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
795  // -> llvm.amdgcn.[if]cmp(a, b, pred)
796  //
797  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
798  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
799  if (match(Src1, PatternMatch::m_Zero()) &&
801  m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
802  PatternMatch::m_Value(SrcRHS))))) {
803  if (CCVal == CmpInst::ICMP_EQ)
804  SrcPred = CmpInst::getInversePredicate(SrcPred);
805 
806  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
807  ? Intrinsic::amdgcn_fcmp
808  : Intrinsic::amdgcn_icmp;
809 
810  Type *Ty = SrcLHS->getType();
811  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
812  // Promote to next legal integer type.
813  unsigned Width = CmpType->getBitWidth();
814  unsigned NewWidth = Width;
815 
816  // Don't do anything for i1 comparisons.
817  if (Width == 1)
818  break;
819 
820  if (Width <= 16)
821  NewWidth = 16;
822  else if (Width <= 32)
823  NewWidth = 32;
824  else if (Width <= 64)
825  NewWidth = 64;
826  else if (Width > 64)
827  break; // Can't handle this.
828 
829  if (Width != NewWidth) {
830  IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
831  if (CmpInst::isSigned(SrcPred)) {
832  SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
833  SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
834  } else {
835  SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
836  SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
837  }
838  }
839  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
840  break;
841 
843  II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
844  Value *Args[] = {SrcLHS, SrcRHS,
845  ConstantInt::get(CC->getType(), SrcPred)};
846  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
847  NewCall->takeName(&II);
848  return IC.replaceInstUsesWith(II, NewCall);
849  }
850 
851  break;
852  }
853  case Intrinsic::amdgcn_ballot: {
854  if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
855  if (Src->isZero()) {
856  // amdgcn.ballot(i1 0) is zero.
858  }
859 
860  if (Src->isOne()) {
861  // amdgcn.ballot(i1 1) is exec.
862  const char *RegName = "exec";
863  if (II.getType()->isIntegerTy(32))
864  RegName = "exec_lo";
865  else if (!II.getType()->isIntegerTy(64))
866  break;
867 
869  II.getModule(), Intrinsic::read_register, II.getType());
870  Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
871  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
872  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
873  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
875  NewCall->takeName(&II);
876  return IC.replaceInstUsesWith(II, NewCall);
877  }
878  }
879  break;
880  }
881  case Intrinsic::amdgcn_wqm_vote: {
882  // wqm_vote is identity when the argument is constant.
883  if (!isa<Constant>(II.getArgOperand(0)))
884  break;
885 
886  return IC.replaceInstUsesWith(II, II.getArgOperand(0));
887  }
888  case Intrinsic::amdgcn_kill: {
889  const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
890  if (!C || !C->getZExtValue())
891  break;
892 
893  // amdgcn.kill(i1 1) is a no-op
894  return IC.eraseInstFromFunction(II);
895  }
896  case Intrinsic::amdgcn_update_dpp: {
897  Value *Old = II.getArgOperand(0);
898 
899  auto *BC = cast<ConstantInt>(II.getArgOperand(5));
900  auto *RM = cast<ConstantInt>(II.getArgOperand(3));
901  auto *BM = cast<ConstantInt>(II.getArgOperand(4));
902  if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
903  BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
904  break;
905 
906  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
907  return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
908  }
909  case Intrinsic::amdgcn_permlane16:
910  case Intrinsic::amdgcn_permlanex16: {
911  // Discard vdst_in if it's not going to be read.
912  Value *VDstIn = II.getArgOperand(0);
913  if (isa<UndefValue>(VDstIn))
914  break;
915 
916  ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
917  ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
918  if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
919  break;
920 
921  return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
922  }
923  case Intrinsic::amdgcn_permlane64:
924  // A constant value is trivially uniform.
925  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
926  return IC.replaceInstUsesWith(II, C);
927  }
928  break;
929  case Intrinsic::amdgcn_readfirstlane:
930  case Intrinsic::amdgcn_readlane: {
931  // A constant value is trivially uniform.
932  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
933  return IC.replaceInstUsesWith(II, C);
934  }
935 
936  // The rest of these may not be safe if the exec may not be the same between
937  // the def and use.
938  Value *Src = II.getArgOperand(0);
939  Instruction *SrcInst = dyn_cast<Instruction>(Src);
940  if (SrcInst && SrcInst->getParent() != II.getParent())
941  break;
942 
943  // readfirstlane (readfirstlane x) -> readfirstlane x
944  // readlane (readfirstlane x), y -> readfirstlane x
945  if (match(Src,
946  PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
947  return IC.replaceInstUsesWith(II, Src);
948  }
949 
950  if (IID == Intrinsic::amdgcn_readfirstlane) {
951  // readfirstlane (readlane x, y) -> readlane x, y
952  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
953  return IC.replaceInstUsesWith(II, Src);
954  }
955  } else {
956  // readlane (readlane x, y), y -> readlane x, y
957  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
960  return IC.replaceInstUsesWith(II, Src);
961  }
962  }
963 
964  break;
965  }
966  case Intrinsic::amdgcn_ldexp: {
967  // FIXME: This doesn't introduce new instructions and belongs in
968  // InstructionSimplify.
969  Type *Ty = II.getType();
970  Value *Op0 = II.getArgOperand(0);
971  Value *Op1 = II.getArgOperand(1);
972 
973  // Folding undef to qnan is safe regardless of the FP mode.
974  if (isa<UndefValue>(Op0)) {
975  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
976  return IC.replaceInstUsesWith(II, QNaN);
977  }
978 
979  const APFloat *C = nullptr;
981 
982  // FIXME: Should flush denorms depending on FP mode, but that's ignored
983  // everywhere else.
984  //
985  // These cases should be safe, even with strictfp.
986  // ldexp(0.0, x) -> 0.0
987  // ldexp(-0.0, x) -> -0.0
988  // ldexp(inf, x) -> inf
989  // ldexp(-inf, x) -> -inf
990  if (C && (C->isZero() || C->isInfinity())) {
991  return IC.replaceInstUsesWith(II, Op0);
992  }
993 
994  // With strictfp, be more careful about possibly needing to flush denormals
995  // or not, and snan behavior depends on ieee_mode.
996  if (II.isStrictFP())
997  break;
998 
999  if (C && C->isNaN()) {
1000  // FIXME: We just need to make the nan quiet here, but that's unavailable
1001  // on APFloat, only IEEEfloat
1002  auto *Quieted =
1004  return IC.replaceInstUsesWith(II, Quieted);
1005  }
1006 
1007  // ldexp(x, 0) -> x
1008  // ldexp(x, undef) -> x
1009  if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
1010  return IC.replaceInstUsesWith(II, Op0);
1011  }
1012 
1013  break;
1014  }
1015  case Intrinsic::amdgcn_fmul_legacy: {
1016  Value *Op0 = II.getArgOperand(0);
1017  Value *Op1 = II.getArgOperand(1);
1018 
1019  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1020  // infinity, gives +0.0.
1021  // TODO: Move to InstSimplify?
1022  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1025 
1026  // If we can prove we don't have one of the special cases then we can use a
1027  // normal fmul instruction instead.
1028  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1029  auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1030  FMul->takeName(&II);
1031  return IC.replaceInstUsesWith(II, FMul);
1032  }
1033  break;
1034  }
1035  case Intrinsic::amdgcn_fma_legacy: {
1036  Value *Op0 = II.getArgOperand(0);
1037  Value *Op1 = II.getArgOperand(1);
1038  Value *Op2 = II.getArgOperand(2);
1039 
1040  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1041  // infinity, gives +0.0.
1042  // TODO: Move to InstSimplify?
1043  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1044  match(Op1, PatternMatch::m_AnyZeroFP())) {
1045  // It's tempting to just return Op2 here, but that would give the wrong
1046  // result if Op2 was -0.0.
1047  auto *Zero = ConstantFP::getNullValue(II.getType());
1048  auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1049  FAdd->takeName(&II);
1050  return IC.replaceInstUsesWith(II, FAdd);
1051  }
1052 
1053  // If we can prove we don't have one of the special cases then we can use a
1054  // normal fma instead.
1055  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1057  II.getModule(), Intrinsic::fma, II.getType()));
1058  return &II;
1059  }
1060  break;
1061  }
1062  case Intrinsic::amdgcn_is_shared:
1063  case Intrinsic::amdgcn_is_private: {
1064  if (isa<UndefValue>(II.getArgOperand(0)))
1065  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1066 
1067  if (isa<ConstantPointerNull>(II.getArgOperand(0)))
1068  return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
1069  break;
1070  }
1071  default: {
1072  if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1074  return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1075  }
1076  }
1077  }
1078  return None;
1079 }
1080 
1081 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1082 ///
1083 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1084 /// struct returns.
1086  IntrinsicInst &II,
1087  APInt DemandedElts,
1088  int DMaskIdx = -1) {
1089 
1090  auto *IIVTy = cast<FixedVectorType>(II.getType());
1091  unsigned VWidth = IIVTy->getNumElements();
1092  if (VWidth == 1)
1093  return nullptr;
1094 
1096  IC.Builder.SetInsertPoint(&II);
1097 
1098  // Assume the arguments are unchanged and later override them, if needed.
1100 
1101  if (DMaskIdx < 0) {
1102  // Buffer case.
1103 
1104  const unsigned ActiveBits = DemandedElts.getActiveBits();
1105  const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
1106 
1107  // Start assuming the prefix of elements is demanded, but possibly clear
1108  // some other bits if there are trailing zeros (unused components at front)
1109  // and update offset.
1110  DemandedElts = (1 << ActiveBits) - 1;
1111 
1112  if (UnusedComponentsAtFront > 0) {
1113  static const unsigned InvalidOffsetIdx = 0xf;
1114 
1115  unsigned OffsetIdx;
1116  switch (II.getIntrinsicID()) {
1117  case Intrinsic::amdgcn_raw_buffer_load:
1118  OffsetIdx = 1;
1119  break;
1120  case Intrinsic::amdgcn_s_buffer_load:
1121  // If resulting type is vec3, there is no point in trimming the
1122  // load with updated offset, as the vec3 would most likely be widened to
1123  // vec4 anyway during lowering.
1124  if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1125  OffsetIdx = InvalidOffsetIdx;
1126  else
1127  OffsetIdx = 1;
1128  break;
1129  case Intrinsic::amdgcn_struct_buffer_load:
1130  OffsetIdx = 2;
1131  break;
1132  default:
1133  // TODO: handle tbuffer* intrinsics.
1134  OffsetIdx = InvalidOffsetIdx;
1135  break;
1136  }
1137 
1138  if (OffsetIdx != InvalidOffsetIdx) {
1139  // Clear demanded bits and update the offset.
1140  DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1141  auto *Offset = II.getArgOperand(OffsetIdx);
1142  unsigned SingleComponentSizeInBits =
1144  unsigned OffsetAdd =
1145  UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1146  auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1147  Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1148  }
1149  }
1150  } else {
1151  // Image case.
1152 
1153  ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
1154  unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1155 
1156  // Mask off values that are undefined because the dmask doesn't cover them
1157  DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
1158 
1159  unsigned NewDMaskVal = 0;
1160  unsigned OrigLoadIdx = 0;
1161  for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1162  const unsigned Bit = 1 << SrcIdx;
1163  if (!!(DMaskVal & Bit)) {
1164  if (!!DemandedElts[OrigLoadIdx])
1165  NewDMaskVal |= Bit;
1166  OrigLoadIdx++;
1167  }
1168  }
1169 
1170  if (DMaskVal != NewDMaskVal)
1171  Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1172  }
1173 
1174  unsigned NewNumElts = DemandedElts.countPopulation();
1175  if (!NewNumElts)
1176  return UndefValue::get(II.getType());
1177 
1178  if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1179  if (DMaskIdx >= 0)
1180  II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1181  return nullptr;
1182  }
1183 
1184  // Validate function argument and return types, extracting overloaded types
1185  // along the way.
1186  SmallVector<Type *, 6> OverloadTys;
1187  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1188  return nullptr;
1189 
1190  Module *M = II.getParent()->getParent()->getParent();
1191  Type *EltTy = IIVTy->getElementType();
1192  Type *NewTy =
1193  (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1194 
1195  OverloadTys[0] = NewTy;
1196  Function *NewIntrin =
1197  Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1198 
1199  CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1200  NewCall->takeName(&II);
1201  NewCall->copyMetadata(II);
1202 
1203  if (NewNumElts == 1) {
1205  NewCall,
1206  DemandedElts.countTrailingZeros());
1207  }
1208 
1209  SmallVector<int, 8> EltMask;
1210  unsigned NewLoadIdx = 0;
1211  for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1212  if (!!DemandedElts[OrigLoadIdx])
1213  EltMask.push_back(NewLoadIdx++);
1214  else
1215  EltMask.push_back(NewNumElts);
1216  }
1217 
1218  Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1219 
1220  return Shuffle;
1221 }
1222 
1224  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1225  APInt &UndefElts2, APInt &UndefElts3,
1226  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1227  SimplifyAndSetOp) const {
1228  switch (II.getIntrinsicID()) {
1229  case Intrinsic::amdgcn_buffer_load:
1230  case Intrinsic::amdgcn_buffer_load_format:
1231  case Intrinsic::amdgcn_raw_buffer_load:
1232  case Intrinsic::amdgcn_raw_buffer_load_format:
1233  case Intrinsic::amdgcn_raw_tbuffer_load:
1234  case Intrinsic::amdgcn_s_buffer_load:
1235  case Intrinsic::amdgcn_struct_buffer_load:
1236  case Intrinsic::amdgcn_struct_buffer_load_format:
1237  case Intrinsic::amdgcn_struct_tbuffer_load:
1238  case Intrinsic::amdgcn_tbuffer_load:
1239  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1240  default: {
1241  if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1242  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1243  }
1244  break;
1245  }
1246  }
1247  return None;
1248 }
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1216
llvm::InstCombiner::getTargetLibraryInfo
TargetLibraryInfo & getTargetLibraryInfo() const
Definition: InstCombiner.h:369
llvm::APFloat::isInfinity
bool isInfinity() const
Definition: APFloat.h:1212
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4637
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:849
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:179
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::RecurKind::FMul
@ FMul
Product of floats.
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1419
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::APFloatBase::IEK_NaN
@ IEK_NaN
Definition: APFloat.h:231
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition: APInt.h:469
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::IRBuilderBase::CreateFCmpOEQ
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2106
llvm::PatternMatch::m_NaN
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:604
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2075
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.cpp:67
llvm::AMDGPU::ImageDimIntrinsicInfo
Definition: AMDGPUInstrInfo.h:47
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:289
llvm::APFloat::isZero
bool isZero() const
Definition: APFloat.h:1211
llvm::APFloat::divide
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:997
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1845
llvm::IRBuilderBase::CreateMaxNum
CallInst * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:896
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2310
llvm::APFloatBase::IEK_Inf
@ IEK_Inf
Definition: APFloat.h:232
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::Optional
Definition: APInt.h:33
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2288
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:841
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::IRBuilderBase::CreateAShr
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1329
llvm::APFloat::getSemantics
const fltSemantics & getSemantics() const
Definition: APFloat.h:1223
llvm::Intrinsic::getIntrinsicSignature
bool getIntrinsicSignature(Function *F, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1764
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1384
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1571
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
modifyIntrinsicCall
static Optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
Definition: AMDGPUInstCombineIntrinsic.cpp:116
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:438
llvm::GCNTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: AMDGPUInstCombineIntrinsic.cpp:351
fmed3AMDGCN
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition: AMDGPUInstCombineIntrinsic.cpp:42
llvm::IRBuilderBase::CreateFMulFMF
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1474
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:826
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1642
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::CmpInst::FIRST_FCMP_PREDICATE
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:737
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
GCNSubtarget.h
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::SIInstrFlags::N_INFINITY
@ N_INFINITY
Definition: SIDefines.h:140
llvm::User
Definition: User.h:44
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2375
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::AMDGPU::getMIMGOffsetMappingInfo
const LLVM_READONLY MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:514
llvm::CmpInst::FIRST_ICMP_PREDICATE
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:750
llvm::APFloat::isNaN
bool isNaN() const
Definition: APFloat.h:1213
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1623
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::APFloat::isNegative
bool isNegative() const
Definition: APFloat.h:1215
llvm::Instruction
Definition: Instruction.h:42
llvm::CallBase::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1506
llvm::InstCombiner::eraseInstFromFunction
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:175
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1710
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::AMDGPU::getMIMGLZMappingInfo
const LLVM_READONLY MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1543
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::None
const NoneType None
Definition: None.h:24
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:511
simplifyAMDGCNMemoryIntrinsicDemanded
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition: AMDGPUInstCombineIntrinsic.cpp:1085
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
AMDGPUTargetTransformInfo.h
llvm::SIInstrFlags::Q_NAN
@ Q_NAN
Definition: SIDefines.h:139
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1860
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::APFloat::getQNaN
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:916
llvm::SIInstrFlags::N_NORMAL
@ N_NORMAL
Definition: SIDefines.h:141
llvm::APFloat
Definition: APFloat.h:701
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2420
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:531
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::IRBuilderBase::CreateFAddFMF
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1420
llvm::APFloat::isNormal
bool isNormal() const
Definition: APFloat.h:1219
llvm::SIInstrFlags::S_NAN
@ S_NAN
Definition: SIDefines.h:138
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::APFloatBase::cmpResult
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:180
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::APFloatBase::cmpUnordered
@ cmpUnordered
Definition: APFloat.h:184
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:447
I
#define I(x, y, z)
Definition: MD5.cpp:58
convertTo16Bit
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition: AMDGPUInstCombineIntrinsic.cpp:101
llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1200
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
simplifyAMDGCNImageIntrinsic
static Optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition: AMDGPUInstCombineIntrinsic.cpp:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::AMDGPU::getMIMGBaseOpcodeInfo
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
llvm::AMDGPU::getMIMGMIPMappingInfo
const LLVM_READONLY MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1617
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:471
llvm::scalbn
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Definition: APFloat.h:1262
llvm::APFloat::isSignaling
bool isSignaling() const
Definition: APFloat.h:1217
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::PatternMatch::m_FiniteNonZero
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:646
llvm::AMDGPU::getMIMGBiasMappingInfo
const LLVM_READONLY MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:350
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:244
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
llvm::isKnownNeverInfinity
bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
Definition: ValueTracking.cpp:3723
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:3787
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:417
llvm::SIInstrFlags::P_SUBNORMAL
@ P_SUBNORMAL
Definition: SIDefines.h:145
llvm::SIInstrFlags::P_INFINITY
@ P_INFINITY
Definition: SIDefines.h:147
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1681
AMDGPUInstrInfo.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:194
llvm::SIInstrFlags::P_ZERO
@ P_ZERO
Definition: SIDefines.h:144
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
canSafelyConvertTo16Bit
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Definition: AMDGPUInstCombineIntrinsic.cpp:63
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::MetadataAsValue::get
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:102
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:926
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::CmpInst::isSigned
bool isSigned() const
Definition: InstrTypes.h:947
llvm::AMDGPU::getImageDimIntrinsicInfo
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:518
llvm::frexp
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1274
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::CallBase::setCalledOperand
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1432
llvm::PatternMatch::m_AnyZeroFP
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:658
llvm::IRBuilderBase::CreateMinNum
CallInst * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:891
llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1864
llvm::SIInstrFlags::N_ZERO
@ N_ZERO
Definition: SIDefines.h:143
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4843
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:766
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1310
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::IRBuilderBase::CreateFCmpUNO
Value * CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2141
llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1289
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1435
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SmallVectorImpl< Value * >
llvm::SIInstrFlags::N_SUBNORMAL
@ N_SUBNORMAL
Definition: SIDefines.h:142
llvm::CmpInst::LAST_FCMP_PREDICATE
@ LAST_FCMP_PREDICATE
Definition: InstrTypes.h:738
RegName
#define RegName(no)
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1471
llvm::APFloat::compare
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1170
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition: AMDGPUInstCombineIntrinsic.cpp:329
llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: AMDGPUInstCombineIntrinsic.cpp:1223
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:182
llvm::SIInstrFlags::P_NORMAL
@ P_NORMAL
Definition: SIDefines.h:146
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2229
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1332
llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler
bool Sampler
Definition: AMDGPUBaseInfo.h:305
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::CmpInst::LAST_ICMP_PREDICATE
@ LAST_ICMP_PREDICATE
Definition: InstrTypes.h:751