LLVM  16.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // AMDGPU target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPUInstrInfo.h"
19 #include "GCNSubtarget.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include <optional>
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "AMDGPUtti"
28 
29 namespace {
30 
31 struct AMDGPUImageDMaskIntrinsic {
32  unsigned Intr;
33 };
34 
35 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
36 #include "InstCombineTables.inc"
37 
38 } // end anonymous namespace
39 
40 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
41 //
42 // A single NaN input is folded to minnum, so we rely on that folding for
43 // handling NaNs.
44 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
45  const APFloat &Src2) {
46  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
47 
48  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
49  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
50  if (Cmp0 == APFloat::cmpEqual)
51  return maxnum(Src1, Src2);
52 
53  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
54  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
55  if (Cmp1 == APFloat::cmpEqual)
56  return maxnum(Src0, Src2);
57 
58  return maxnum(Src0, Src1);
59 }
60 
61 // Check if a value can be converted to a 16-bit value without losing
62 // precision.
63 // The value is expected to be either a float (IsFloat = true) or an unsigned
64 // integer (IsFloat = false).
65 static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
66  Type *VTy = V.getType();
67  if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
68  // The value is already 16-bit, so we don't want to convert to 16-bit again!
69  return false;
70  }
71  if (IsFloat) {
72  if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
73  // We need to check that if we cast the index down to a half, we do not
74  // lose precision.
75  APFloat FloatValue(ConstFloat->getValueAPF());
76  bool LosesInfo = true;
78  &LosesInfo);
79  return !LosesInfo;
80  }
81  } else {
82  if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
83  // We need to check that if we cast the index down to an i16, we do not
84  // lose precision.
85  APInt IntValue(ConstInt->getValue());
86  return IntValue.getActiveBits() <= 16;
87  }
88  }
89 
90  Value *CastSrc;
91  bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
92  : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
93  if (IsExt) {
94  Type *CastSrcTy = CastSrc->getType();
95  if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
96  return true;
97  }
98 
99  return false;
100 }
101 
102 // Convert a value to 16-bit.
104  Type *VTy = V.getType();
105  if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
106  return cast<Instruction>(&V)->getOperand(0);
107  if (VTy->isIntegerTy())
108  return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
109  if (VTy->isFloatingPointTy())
110  return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
111 
112  llvm_unreachable("Should never be called!");
113 }
114 
115 /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
116 /// modified arguments (based on OldIntr) and replaces InstToReplace with
117 /// this newly created intrinsic call.
118 static std::optional<Instruction *> modifyIntrinsicCall(
119  IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
120  InstCombiner &IC,
122  Func) {
123  SmallVector<Type *, 4> ArgTys;
124  if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
125  return std::nullopt;
126 
127  SmallVector<Value *, 8> Args(OldIntr.args());
128 
129  // Modify arguments and types
130  Func(Args, ArgTys);
131 
132  Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
133 
134  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
135  NewCall->takeName(&OldIntr);
136  NewCall->copyMetadata(OldIntr);
137  if (isa<FPMathOperator>(NewCall))
138  NewCall->copyFastMathFlags(&OldIntr);
139 
140  // Erase and replace uses
141  if (!InstToReplace.getType()->isVoidTy())
142  IC.replaceInstUsesWith(InstToReplace, NewCall);
143 
144  bool RemoveOldIntr = &OldIntr != &InstToReplace;
145 
146  auto RetValue = IC.eraseInstFromFunction(InstToReplace);
147  if (RemoveOldIntr)
148  IC.eraseInstFromFunction(OldIntr);
149 
150  return RetValue;
151 }
152 
153 static std::optional<Instruction *>
155  const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
156  IntrinsicInst &II, InstCombiner &IC) {
157  // Optimize _L to _LZ when _L is zero
158  if (const auto *LZMappingInfo =
159  AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
160  if (auto *ConstantLod =
161  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
162  if (ConstantLod->isZero() || ConstantLod->isNegative()) {
163  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
165  ImageDimIntr->Dim);
166  return modifyIntrinsicCall(
167  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
168  Args.erase(Args.begin() + ImageDimIntr->LodIndex);
169  });
170  }
171  }
172  }
173 
174  // Optimize _mip away, when 'lod' is zero
175  if (const auto *MIPMappingInfo =
176  AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
177  if (auto *ConstantMip =
178  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
179  if (ConstantMip->isZero()) {
180  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
181  AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
182  ImageDimIntr->Dim);
183  return modifyIntrinsicCall(
184  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
185  Args.erase(Args.begin() + ImageDimIntr->MipIndex);
186  });
187  }
188  }
189  }
190 
191  // Optimize _bias away when 'bias' is zero
192  if (const auto *BiasMappingInfo =
193  AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
194  if (auto *ConstantBias =
195  dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
196  if (ConstantBias->isZero()) {
197  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
198  AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
199  ImageDimIntr->Dim);
200  return modifyIntrinsicCall(
201  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
202  Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203  ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
204  });
205  }
206  }
207  }
208 
209  // Optimize _offset away when 'offset' is zero
210  if (const auto *OffsetMappingInfo =
211  AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
212  if (auto *ConstantOffset =
213  dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
214  if (ConstantOffset->isZero()) {
215  const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
217  OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
218  return modifyIntrinsicCall(
219  II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
220  Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
221  });
222  }
223  }
224  }
225 
226  // Try to use D16
227  if (ST->hasD16Images()) {
228 
229  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
230  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
231 
232  if (BaseOpcode->HasD16) {
233 
234  // If the only use of image intrinsic is a fptrunc (with conversion to
235  // half) then both fptrunc and image intrinsic will be replaced with image
236  // intrinsic with D16 flag.
237  if (II.hasOneUse()) {
238  Instruction *User = II.user_back();
239 
240  if (User->getOpcode() == Instruction::FPTrunc &&
241  User->getType()->getScalarType()->isHalfTy()) {
242 
243  return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
244  [&](auto &Args, auto &ArgTys) {
245  // Change return type of image intrinsic.
246  // Set it to return type of fptrunc.
247  ArgTys[0] = User->getType();
248  });
249  }
250  }
251  }
252  }
253 
254  // Try to use A16 or G16
255  if (!ST->hasA16() && !ST->hasG16())
256  return std::nullopt;
257 
258  // Address is interpreted as float if the instruction has a sampler or as
259  // unsigned int if there is no sampler.
260  bool HasSampler =
261  AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
262  bool FloatCoord = false;
263  // true means derivatives can be converted to 16 bit, coordinates not
264  bool OnlyDerivatives = false;
265 
266  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
267  OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
268  Value *Coord = II.getOperand(OperandIndex);
269  // If the values are not derived from 16-bit values, we cannot optimize.
270  if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
271  if (OperandIndex < ImageDimIntr->CoordStart ||
272  ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
273  return std::nullopt;
274  }
275  // All gradients can be converted, so convert only them
276  OnlyDerivatives = true;
277  break;
278  }
279 
280  assert(OperandIndex == ImageDimIntr->GradientStart ||
281  FloatCoord == Coord->getType()->isFloatingPointTy());
282  FloatCoord = Coord->getType()->isFloatingPointTy();
283  }
284 
285  if (!OnlyDerivatives && !ST->hasA16())
286  OnlyDerivatives = true; // Only supports G16
287 
288  // Check if there is a bias parameter and if it can be converted to f16
289  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
290  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
291  assert(HasSampler &&
292  "Only image instructions with a sampler can have a bias");
293  if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
294  OnlyDerivatives = true;
295  }
296 
297  if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
298  ImageDimIntr->CoordStart))
299  return std::nullopt;
300 
301  Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
303 
304  return modifyIntrinsicCall(
305  II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
306  ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
307  if (!OnlyDerivatives) {
308  ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
309 
310  // Change the bias type
311  if (ImageDimIntr->NumBiasArgs != 0)
312  ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
313  }
314 
315  unsigned EndIndex =
316  OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
317  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
318  OperandIndex < EndIndex; OperandIndex++) {
319  Args[OperandIndex] =
320  convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
321  }
322 
323  // Convert the bias
324  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
325  Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
326  Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
327  }
328  });
329 }
330 
331 bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
332  InstCombiner &IC) const {
333  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
334  // infinity, gives +0.0. If we can prove we don't have one of the special
335  // cases then we can use a normal multiply instead.
336  // TODO: Create and use isKnownFiniteNonZero instead of just matching
337  // constants here.
338  if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
340  // One operand is not zero or infinity or NaN.
341  return true;
342  }
343  auto *TLI = &IC.getTargetLibraryInfo();
344  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
345  isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
346  // Neither operand is infinity or NaN.
347  return true;
348  }
349  return false;
350 }
351 
352 std::optional<Instruction *>
354  Intrinsic::ID IID = II.getIntrinsicID();
355  switch (IID) {
356  case Intrinsic::amdgcn_rcp: {
357  Value *Src = II.getArgOperand(0);
358 
359  // TODO: Move to ConstantFolding/InstSimplify?
360  if (isa<UndefValue>(Src)) {
361  Type *Ty = II.getType();
362  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
363  return IC.replaceInstUsesWith(II, QNaN);
364  }
365 
366  if (II.isStrictFP())
367  break;
368 
369  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
370  const APFloat &ArgVal = C->getValueAPF();
371  APFloat Val(ArgVal.getSemantics(), 1);
373 
374  // This is more precise than the instruction may give.
375  //
376  // TODO: The instruction always flushes denormal results (except for f16),
377  // should this also?
378  return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
379  }
380 
381  break;
382  }
383  case Intrinsic::amdgcn_sqrt:
384  case Intrinsic::amdgcn_rsq: {
385  Value *Src = II.getArgOperand(0);
386 
387  // TODO: Move to ConstantFolding/InstSimplify?
388  if (isa<UndefValue>(Src)) {
389  Type *Ty = II.getType();
390  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
391  return IC.replaceInstUsesWith(II, QNaN);
392  }
393 
394  break;
395  }
396  case Intrinsic::amdgcn_frexp_mant:
397  case Intrinsic::amdgcn_frexp_exp: {
398  Value *Src = II.getArgOperand(0);
399  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
400  int Exp;
401  APFloat Significand =
402  frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
403 
404  if (IID == Intrinsic::amdgcn_frexp_mant) {
405  return IC.replaceInstUsesWith(
406  II, ConstantFP::get(II.getContext(), Significand));
407  }
408 
409  // Match instruction special case behavior.
410  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
411  Exp = 0;
412 
413  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
414  }
415 
416  if (isa<UndefValue>(Src)) {
417  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
418  }
419 
420  break;
421  }
422  case Intrinsic::amdgcn_class: {
423  Value *Src0 = II.getArgOperand(0);
424  Value *Src1 = II.getArgOperand(1);
425  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
426  if (!CMask) {
427  if (isa<UndefValue>(Src0)) {
428  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
429  }
430 
431  if (isa<UndefValue>(Src1)) {
432  return IC.replaceInstUsesWith(II,
433  ConstantInt::get(II.getType(), false));
434  }
435  break;
436  }
437 
438  uint32_t Mask = CMask->getZExtValue();
439 
440  // If all tests are made, it doesn't matter what the value is.
441  if ((Mask & fcAllFlags) == fcAllFlags) {
442  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
443  }
444 
445  if ((Mask & fcAllFlags) == 0) {
446  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
447  }
448 
449  if (Mask == fcNan && !II.isStrictFP()) {
450  // Equivalent of isnan. Replace with standard fcmp.
451  Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
452  FCmp->takeName(&II);
453  return IC.replaceInstUsesWith(II, FCmp);
454  }
455 
456  if (Mask == fcZero && !II.isStrictFP()) {
457  // Equivalent of == 0.
458  Value *FCmp =
459  IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
460 
461  FCmp->takeName(&II);
462  return IC.replaceInstUsesWith(II, FCmp);
463  }
464 
465  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
466  if ((Mask & fcNan) && isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
467  return IC.replaceOperand(
468  II, 1, ConstantInt::get(Src1->getType(), Mask & ~fcNan));
469  }
470 
471  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
472  if (!CVal) {
473  if (isa<UndefValue>(Src0)) {
474  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
475  }
476 
477  // Clamp mask to used bits
478  if ((Mask & fcAllFlags) != Mask) {
479  CallInst *NewCall = IC.Builder.CreateCall(
480  II.getCalledFunction(),
481  {Src0, ConstantInt::get(Src1->getType(), Mask & fcAllFlags)});
482 
483  NewCall->takeName(&II);
484  return IC.replaceInstUsesWith(II, NewCall);
485  }
486 
487  break;
488  }
489 
490  const APFloat &Val = CVal->getValueAPF();
491 
492  bool Result =
493  ((Mask & fcSNan) && Val.isNaN() && Val.isSignaling()) ||
494  ((Mask & fcQNan) && Val.isNaN() && !Val.isSignaling()) ||
495  ((Mask & fcNegInf) && Val.isInfinity() && Val.isNegative()) ||
496  ((Mask & fcNegNormal) && Val.isNormal() && Val.isNegative()) ||
497  ((Mask & fcNegSubnormal) && Val.isDenormal() && Val.isNegative()) ||
498  ((Mask & fcNegZero) && Val.isZero() && Val.isNegative()) ||
499  ((Mask & fcPosZero) && Val.isZero() && !Val.isNegative()) ||
500  ((Mask & fcPosSubnormal) && Val.isDenormal() && !Val.isNegative()) ||
501  ((Mask & fcPosNormal) && Val.isNormal() && !Val.isNegative()) ||
502  ((Mask & fcPosInf) && Val.isInfinity() && !Val.isNegative());
503 
504  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
505  }
506  case Intrinsic::amdgcn_cvt_pkrtz: {
507  Value *Src0 = II.getArgOperand(0);
508  Value *Src1 = II.getArgOperand(1);
509  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
510  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
511  const fltSemantics &HalfSem =
513  bool LosesInfo;
514  APFloat Val0 = C0->getValueAPF();
515  APFloat Val1 = C1->getValueAPF();
516  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
517  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
518 
519  Constant *Folded =
521  ConstantFP::get(II.getContext(), Val1)});
522  return IC.replaceInstUsesWith(II, Folded);
523  }
524  }
525 
526  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
527  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
528  }
529 
530  break;
531  }
532  case Intrinsic::amdgcn_cvt_pknorm_i16:
533  case Intrinsic::amdgcn_cvt_pknorm_u16:
534  case Intrinsic::amdgcn_cvt_pk_i16:
535  case Intrinsic::amdgcn_cvt_pk_u16: {
536  Value *Src0 = II.getArgOperand(0);
537  Value *Src1 = II.getArgOperand(1);
538 
539  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
540  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
541  }
542 
543  break;
544  }
545  case Intrinsic::amdgcn_ubfe:
546  case Intrinsic::amdgcn_sbfe: {
547  // Decompose simple cases into standard shifts.
548  Value *Src = II.getArgOperand(0);
549  if (isa<UndefValue>(Src)) {
550  return IC.replaceInstUsesWith(II, Src);
551  }
552 
553  unsigned Width;
554  Type *Ty = II.getType();
555  unsigned IntSize = Ty->getIntegerBitWidth();
556 
557  ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
558  if (CWidth) {
559  Width = CWidth->getZExtValue();
560  if ((Width & (IntSize - 1)) == 0) {
562  }
563 
564  // Hardware ignores high bits, so remove those.
565  if (Width >= IntSize) {
566  return IC.replaceOperand(
567  II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
568  }
569  }
570 
571  unsigned Offset;
572  ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
573  if (COffset) {
574  Offset = COffset->getZExtValue();
575  if (Offset >= IntSize) {
576  return IC.replaceOperand(
577  II, 1,
578  ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
579  }
580  }
581 
582  bool Signed = IID == Intrinsic::amdgcn_sbfe;
583 
584  if (!CWidth || !COffset)
585  break;
586 
587  // The case of Width == 0 is handled above, which makes this transformation
588  // safe. If Width == 0, then the ashr and lshr instructions become poison
589  // value since the shift amount would be equal to the bit size.
590  assert(Width != 0);
591 
592  // TODO: This allows folding to undef when the hardware has specific
593  // behavior?
594  if (Offset + Width < IntSize) {
595  Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
596  Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
597  : IC.Builder.CreateLShr(Shl, IntSize - Width);
598  RightShift->takeName(&II);
599  return IC.replaceInstUsesWith(II, RightShift);
600  }
601 
602  Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
603  : IC.Builder.CreateLShr(Src, Offset);
604 
605  RightShift->takeName(&II);
606  return IC.replaceInstUsesWith(II, RightShift);
607  }
608  case Intrinsic::amdgcn_exp:
609  case Intrinsic::amdgcn_exp_row:
610  case Intrinsic::amdgcn_exp_compr: {
611  ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
612  unsigned EnBits = En->getZExtValue();
613  if (EnBits == 0xf)
614  break; // All inputs enabled.
615 
616  bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
617  bool Changed = false;
618  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
619  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
620  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
621  Value *Src = II.getArgOperand(I + 2);
622  if (!isa<UndefValue>(Src)) {
623  IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
624  Changed = true;
625  }
626  }
627  }
628 
629  if (Changed) {
630  return &II;
631  }
632 
633  break;
634  }
635  case Intrinsic::amdgcn_fmed3: {
636  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
637  // for the shader.
638 
639  Value *Src0 = II.getArgOperand(0);
640  Value *Src1 = II.getArgOperand(1);
641  Value *Src2 = II.getArgOperand(2);
642 
643  // Checking for NaN before canonicalization provides better fidelity when
644  // mapping other operations onto fmed3 since the order of operands is
645  // unchanged.
646  CallInst *NewCall = nullptr;
647  if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
648  NewCall = IC.Builder.CreateMinNum(Src1, Src2);
649  } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
650  NewCall = IC.Builder.CreateMinNum(Src0, Src2);
651  } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
652  NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
653  }
654 
655  if (NewCall) {
656  NewCall->copyFastMathFlags(&II);
657  NewCall->takeName(&II);
658  return IC.replaceInstUsesWith(II, NewCall);
659  }
660 
661  bool Swap = false;
662  // Canonicalize constants to RHS operands.
663  //
664  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
665  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
666  std::swap(Src0, Src1);
667  Swap = true;
668  }
669 
670  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
671  std::swap(Src1, Src2);
672  Swap = true;
673  }
674 
675  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
676  std::swap(Src0, Src1);
677  Swap = true;
678  }
679 
680  if (Swap) {
681  II.setArgOperand(0, Src0);
682  II.setArgOperand(1, Src1);
683  II.setArgOperand(2, Src2);
684  return &II;
685  }
686 
687  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
688  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
689  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
690  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
691  C2->getValueAPF());
692  return IC.replaceInstUsesWith(
693  II, ConstantFP::get(IC.Builder.getContext(), Result));
694  }
695  }
696  }
697 
698  break;
699  }
700  case Intrinsic::amdgcn_icmp:
701  case Intrinsic::amdgcn_fcmp: {
702  const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
703  // Guard against invalid arguments.
704  int64_t CCVal = CC->getZExtValue();
705  bool IsInteger = IID == Intrinsic::amdgcn_icmp;
706  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
707  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
708  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
710  break;
711 
712  Value *Src0 = II.getArgOperand(0);
713  Value *Src1 = II.getArgOperand(1);
714 
715  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
716  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
717  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
718  if (CCmp->isNullValue()) {
719  return IC.replaceInstUsesWith(
720  II, ConstantExpr::getSExt(CCmp, II.getType()));
721  }
722 
723  // The result of V_ICMP/V_FCMP assembly instructions (which this
724  // intrinsic exposes) is one bit per thread, masked with the EXEC
725  // register (which contains the bitmask of live threads). So a
726  // comparison that always returns true is the same as a read of the
727  // EXEC register.
729  II.getModule(), Intrinsic::read_register, II.getType());
730  Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
731  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
732  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
733  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
735  NewCall->takeName(&II);
736  return IC.replaceInstUsesWith(II, NewCall);
737  }
738 
739  // Canonicalize constants to RHS.
740  CmpInst::Predicate SwapPred =
742  II.setArgOperand(0, Src1);
743  II.setArgOperand(1, Src0);
744  II.setArgOperand(
745  2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
746  return &II;
747  }
748 
749  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
750  break;
751 
752  // Canonicalize compare eq with true value to compare != 0
753  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
754  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
755  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
756  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
757  Value *ExtSrc;
758  if (CCVal == CmpInst::ICMP_EQ &&
759  ((match(Src1, PatternMatch::m_One()) &&
760  match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
761  (match(Src1, PatternMatch::m_AllOnes()) &&
762  match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
763  ExtSrc->getType()->isIntegerTy(1)) {
765  IC.replaceOperand(II, 2,
766  ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
767  return &II;
768  }
769 
770  CmpInst::Predicate SrcPred;
771  Value *SrcLHS;
772  Value *SrcRHS;
773 
774  // Fold compare eq/ne with 0 from a compare result as the predicate to the
775  // intrinsic. The typical use is a wave vote function in the library, which
776  // will be fed from a user code condition compared with 0. Fold in the
777  // redundant compare.
778 
779  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
780  // -> llvm.amdgcn.[if]cmp(a, b, pred)
781  //
782  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
783  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
784  if (match(Src1, PatternMatch::m_Zero()) &&
786  m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
787  PatternMatch::m_Value(SrcRHS))))) {
788  if (CCVal == CmpInst::ICMP_EQ)
789  SrcPred = CmpInst::getInversePredicate(SrcPred);
790 
791  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
792  ? Intrinsic::amdgcn_fcmp
793  : Intrinsic::amdgcn_icmp;
794 
795  Type *Ty = SrcLHS->getType();
796  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
797  // Promote to next legal integer type.
798  unsigned Width = CmpType->getBitWidth();
799  unsigned NewWidth = Width;
800 
801  // Don't do anything for i1 comparisons.
802  if (Width == 1)
803  break;
804 
805  if (Width <= 16)
806  NewWidth = 16;
807  else if (Width <= 32)
808  NewWidth = 32;
809  else if (Width <= 64)
810  NewWidth = 64;
811  else if (Width > 64)
812  break; // Can't handle this.
813 
814  if (Width != NewWidth) {
815  IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
816  if (CmpInst::isSigned(SrcPred)) {
817  SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
818  SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
819  } else {
820  SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
821  SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
822  }
823  }
824  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
825  break;
826 
828  II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
829  Value *Args[] = {SrcLHS, SrcRHS,
830  ConstantInt::get(CC->getType(), SrcPred)};
831  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
832  NewCall->takeName(&II);
833  return IC.replaceInstUsesWith(II, NewCall);
834  }
835 
836  break;
837  }
838  case Intrinsic::amdgcn_ballot: {
839  if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
840  if (Src->isZero()) {
841  // amdgcn.ballot(i1 0) is zero.
843  }
844 
845  if (Src->isOne()) {
846  // amdgcn.ballot(i1 1) is exec.
847  const char *RegName = "exec";
848  if (II.getType()->isIntegerTy(32))
849  RegName = "exec_lo";
850  else if (!II.getType()->isIntegerTy(64))
851  break;
852 
854  II.getModule(), Intrinsic::read_register, II.getType());
855  Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
856  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
857  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
858  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
860  NewCall->takeName(&II);
861  return IC.replaceInstUsesWith(II, NewCall);
862  }
863  }
864  break;
865  }
866  case Intrinsic::amdgcn_wqm_vote: {
867  // wqm_vote is identity when the argument is constant.
868  if (!isa<Constant>(II.getArgOperand(0)))
869  break;
870 
871  return IC.replaceInstUsesWith(II, II.getArgOperand(0));
872  }
873  case Intrinsic::amdgcn_kill: {
874  const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
875  if (!C || !C->getZExtValue())
876  break;
877 
878  // amdgcn.kill(i1 1) is a no-op
879  return IC.eraseInstFromFunction(II);
880  }
881  case Intrinsic::amdgcn_update_dpp: {
882  Value *Old = II.getArgOperand(0);
883 
884  auto *BC = cast<ConstantInt>(II.getArgOperand(5));
885  auto *RM = cast<ConstantInt>(II.getArgOperand(3));
886  auto *BM = cast<ConstantInt>(II.getArgOperand(4));
887  if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
888  BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
889  break;
890 
891  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
892  return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
893  }
894  case Intrinsic::amdgcn_permlane16:
895  case Intrinsic::amdgcn_permlanex16: {
896  // Discard vdst_in if it's not going to be read.
897  Value *VDstIn = II.getArgOperand(0);
898  if (isa<UndefValue>(VDstIn))
899  break;
900 
901  ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
902  ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
903  if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
904  break;
905 
906  return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
907  }
908  case Intrinsic::amdgcn_permlane64:
909  // A constant value is trivially uniform.
910  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
911  return IC.replaceInstUsesWith(II, C);
912  }
913  break;
914  case Intrinsic::amdgcn_readfirstlane:
915  case Intrinsic::amdgcn_readlane: {
916  // A constant value is trivially uniform.
917  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
918  return IC.replaceInstUsesWith(II, C);
919  }
920 
921  // The rest of these may not be safe if the exec may not be the same between
922  // the def and use.
923  Value *Src = II.getArgOperand(0);
924  Instruction *SrcInst = dyn_cast<Instruction>(Src);
925  if (SrcInst && SrcInst->getParent() != II.getParent())
926  break;
927 
928  // readfirstlane (readfirstlane x) -> readfirstlane x
929  // readlane (readfirstlane x), y -> readfirstlane x
930  if (match(Src,
931  PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
932  return IC.replaceInstUsesWith(II, Src);
933  }
934 
935  if (IID == Intrinsic::amdgcn_readfirstlane) {
936  // readfirstlane (readlane x, y) -> readlane x, y
937  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
938  return IC.replaceInstUsesWith(II, Src);
939  }
940  } else {
941  // readlane (readlane x, y), y -> readlane x, y
942  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
945  return IC.replaceInstUsesWith(II, Src);
946  }
947  }
948 
949  break;
950  }
951  case Intrinsic::amdgcn_ldexp: {
952  // FIXME: This doesn't introduce new instructions and belongs in
953  // InstructionSimplify.
954  Type *Ty = II.getType();
955  Value *Op0 = II.getArgOperand(0);
956  Value *Op1 = II.getArgOperand(1);
957 
958  // Folding undef to qnan is safe regardless of the FP mode.
959  if (isa<UndefValue>(Op0)) {
960  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
961  return IC.replaceInstUsesWith(II, QNaN);
962  }
963 
964  const APFloat *C = nullptr;
966 
967  // FIXME: Should flush denorms depending on FP mode, but that's ignored
968  // everywhere else.
969  //
970  // These cases should be safe, even with strictfp.
971  // ldexp(0.0, x) -> 0.0
972  // ldexp(-0.0, x) -> -0.0
973  // ldexp(inf, x) -> inf
974  // ldexp(-inf, x) -> -inf
975  if (C && (C->isZero() || C->isInfinity())) {
976  return IC.replaceInstUsesWith(II, Op0);
977  }
978 
979  // With strictfp, be more careful about possibly needing to flush denormals
980  // or not, and snan behavior depends on ieee_mode.
981  if (II.isStrictFP())
982  break;
983 
984  if (C && C->isNaN()) {
985  // FIXME: We just need to make the nan quiet here, but that's unavailable
986  // on APFloat, only IEEEfloat
987  auto *Quieted =
989  return IC.replaceInstUsesWith(II, Quieted);
990  }
991 
992  // ldexp(x, 0) -> x
993  // ldexp(x, undef) -> x
994  if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
995  return IC.replaceInstUsesWith(II, Op0);
996  }
997 
998  break;
999  }
1000  case Intrinsic::amdgcn_fmul_legacy: {
1001  Value *Op0 = II.getArgOperand(0);
1002  Value *Op1 = II.getArgOperand(1);
1003 
1004  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1005  // infinity, gives +0.0.
1006  // TODO: Move to InstSimplify?
1007  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1010 
1011  // If we can prove we don't have one of the special cases then we can use a
1012  // normal fmul instruction instead.
1013  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1014  auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1015  FMul->takeName(&II);
1016  return IC.replaceInstUsesWith(II, FMul);
1017  }
1018  break;
1019  }
1020  case Intrinsic::amdgcn_fma_legacy: {
1021  Value *Op0 = II.getArgOperand(0);
1022  Value *Op1 = II.getArgOperand(1);
1023  Value *Op2 = II.getArgOperand(2);
1024 
1025  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1026  // infinity, gives +0.0.
1027  // TODO: Move to InstSimplify?
1028  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1029  match(Op1, PatternMatch::m_AnyZeroFP())) {
1030  // It's tempting to just return Op2 here, but that would give the wrong
1031  // result if Op2 was -0.0.
1032  auto *Zero = ConstantFP::getNullValue(II.getType());
1033  auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1034  FAdd->takeName(&II);
1035  return IC.replaceInstUsesWith(II, FAdd);
1036  }
1037 
1038  // If we can prove we don't have one of the special cases then we can use a
1039  // normal fma instead.
1040  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1042  II.getModule(), Intrinsic::fma, II.getType()));
1043  return &II;
1044  }
1045  break;
1046  }
1047  case Intrinsic::amdgcn_is_shared:
1048  case Intrinsic::amdgcn_is_private: {
1049  if (isa<UndefValue>(II.getArgOperand(0)))
1050  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1051 
1052  if (isa<ConstantPointerNull>(II.getArgOperand(0)))
1053  return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
1054  break;
1055  }
1056  default: {
1057  if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1059  return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1060  }
1061  }
1062  }
1063  return std::nullopt;
1064 }
1065 
1066 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1067 ///
1068 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1069 /// struct returns.
1071  IntrinsicInst &II,
1072  APInt DemandedElts,
1073  int DMaskIdx = -1) {
1074 
1075  auto *IIVTy = cast<FixedVectorType>(II.getType());
1076  unsigned VWidth = IIVTy->getNumElements();
1077  if (VWidth == 1)
1078  return nullptr;
1079 
1081  IC.Builder.SetInsertPoint(&II);
1082 
1083  // Assume the arguments are unchanged and later override them, if needed.
1085 
1086  if (DMaskIdx < 0) {
1087  // Buffer case.
1088 
1089  const unsigned ActiveBits = DemandedElts.getActiveBits();
1090  const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
1091 
1092  // Start assuming the prefix of elements is demanded, but possibly clear
1093  // some other bits if there are trailing zeros (unused components at front)
1094  // and update offset.
1095  DemandedElts = (1 << ActiveBits) - 1;
1096 
1097  if (UnusedComponentsAtFront > 0) {
1098  static const unsigned InvalidOffsetIdx = 0xf;
1099 
1100  unsigned OffsetIdx;
1101  switch (II.getIntrinsicID()) {
1102  case Intrinsic::amdgcn_raw_buffer_load:
1103  OffsetIdx = 1;
1104  break;
1105  case Intrinsic::amdgcn_s_buffer_load:
1106  // If resulting type is vec3, there is no point in trimming the
1107  // load with updated offset, as the vec3 would most likely be widened to
1108  // vec4 anyway during lowering.
1109  if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1110  OffsetIdx = InvalidOffsetIdx;
1111  else
1112  OffsetIdx = 1;
1113  break;
1114  case Intrinsic::amdgcn_struct_buffer_load:
1115  OffsetIdx = 2;
1116  break;
1117  default:
1118  // TODO: handle tbuffer* intrinsics.
1119  OffsetIdx = InvalidOffsetIdx;
1120  break;
1121  }
1122 
1123  if (OffsetIdx != InvalidOffsetIdx) {
1124  // Clear demanded bits and update the offset.
1125  DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1126  auto *Offset = II.getArgOperand(OffsetIdx);
1127  unsigned SingleComponentSizeInBits =
1129  unsigned OffsetAdd =
1130  UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1131  auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1132  Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1133  }
1134  }
1135  } else {
1136  // Image case.
1137 
1138  ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
1139  unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1140 
1141  // Mask off values that are undefined because the dmask doesn't cover them
1142  DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
1143 
1144  unsigned NewDMaskVal = 0;
1145  unsigned OrigLoadIdx = 0;
1146  for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1147  const unsigned Bit = 1 << SrcIdx;
1148  if (!!(DMaskVal & Bit)) {
1149  if (!!DemandedElts[OrigLoadIdx])
1150  NewDMaskVal |= Bit;
1151  OrigLoadIdx++;
1152  }
1153  }
1154 
1155  if (DMaskVal != NewDMaskVal)
1156  Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1157  }
1158 
1159  unsigned NewNumElts = DemandedElts.countPopulation();
1160  if (!NewNumElts)
1161  return UndefValue::get(II.getType());
1162 
1163  if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1164  if (DMaskIdx >= 0)
1165  II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1166  return nullptr;
1167  }
1168 
1169  // Validate function argument and return types, extracting overloaded types
1170  // along the way.
1171  SmallVector<Type *, 6> OverloadTys;
1172  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1173  return nullptr;
1174 
1175  Module *M = II.getParent()->getParent()->getParent();
1176  Type *EltTy = IIVTy->getElementType();
1177  Type *NewTy =
1178  (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1179 
1180  OverloadTys[0] = NewTy;
1181  Function *NewIntrin =
1182  Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1183 
1184  CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1185  NewCall->takeName(&II);
1186  NewCall->copyMetadata(II);
1187 
1188  if (NewNumElts == 1) {
1190  NewCall,
1191  DemandedElts.countTrailingZeros());
1192  }
1193 
1194  SmallVector<int, 8> EltMask;
1195  unsigned NewLoadIdx = 0;
1196  for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1197  if (!!DemandedElts[OrigLoadIdx])
1198  EltMask.push_back(NewLoadIdx++);
1199  else
1200  EltMask.push_back(NewNumElts);
1201  }
1202 
1203  Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1204 
1205  return Shuffle;
1206 }
1207 
1209  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1210  APInt &UndefElts2, APInt &UndefElts3,
1211  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1212  SimplifyAndSetOp) const {
1213  switch (II.getIntrinsicID()) {
1214  case Intrinsic::amdgcn_buffer_load:
1215  case Intrinsic::amdgcn_buffer_load_format:
1216  case Intrinsic::amdgcn_raw_buffer_load:
1217  case Intrinsic::amdgcn_raw_buffer_load_format:
1218  case Intrinsic::amdgcn_raw_tbuffer_load:
1219  case Intrinsic::amdgcn_s_buffer_load:
1220  case Intrinsic::amdgcn_struct_buffer_load:
1221  case Intrinsic::amdgcn_struct_buffer_load_format:
1222  case Intrinsic::amdgcn_struct_tbuffer_load:
1223  case Intrinsic::amdgcn_tbuffer_load:
1224  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1225  default: {
1226  if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1227  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1228  }
1229  break;
1230  }
1231  }
1232  return std::nullopt;
1233 }
FloatingPointMode.h
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1231
llvm::InstCombiner::getTargetLibraryInfo
TargetLibraryInfo & getTargetLibraryInfo() const
Definition: InstCombiner.h:369
llvm::APFloat::isInfinity
bool isInfinity() const
Definition: APFloat.h:1227
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4715
fcQNan
@ fcQNan
Definition: FloatingPointMode.h:201
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:849
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::RecurKind::FMul
@ FMul
Product of floats.
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::APFloatBase::IEK_NaN
@ IEK_NaN
Definition: APFloat.h:241
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition: APInt.h:469
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
llvm::IRBuilderBase::CreateFCmpOEQ
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2145
llvm::PatternMatch::m_NaN
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:610
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2078
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.cpp:67
llvm::AMDGPU::ImageDimIntrinsicInfo
Definition: AMDGPUInstrInfo.h:47
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
fcPosZero
@ fcPosZero
Definition: FloatingPointMode.h:206
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
llvm::APFloat::isZero
bool isZero() const
Definition: APFloat.h:1226
llvm::APFloat::divide
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1012
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1845
llvm::IRBuilderBase::CreateMaxNum
CallInst * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:933
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2358
llvm::APFloatBase::IEK_Inf
@ IEK_Inf
Definition: APFloat.h:242
fcNan
@ fcNan
Definition: FloatingPointMode.h:211
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2336
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:874
fcPosNormal
@ fcPosNormal
Definition: FloatingPointMode.h:208
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::IRBuilderBase::CreateAShr
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1368
llvm::APFloat::getSemantics
const fltSemantics & getSemantics() const
Definition: APFloat.h:1238
fcPosInf
@ fcPosInf
Definition: FloatingPointMode.h:209
llvm::Intrinsic::getIntrinsicSignature
bool getIntrinsicSignature(Function *F, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1825
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1591
fcNegZero
@ fcNegZero
Definition: FloatingPointMode.h:205
llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: AMDGPUInstCombineIntrinsic.cpp:1208
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:438
fcAllFlags
@ fcAllFlags
Definition: FloatingPointMode.h:219
fmed3AMDGCN
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition: AMDGPUInstCombineIntrinsic.cpp:44
llvm::IRBuilderBase::CreateFMulFMF
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1513
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:826
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1648
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::CmpInst::FIRST_FCMP_PREDICATE
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:737
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
GCNSubtarget.h
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:195
llvm::User
Definition: User.h:44
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2586
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::AMDGPU::getMIMGOffsetMappingInfo
const LLVM_READONLY MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:525
llvm::CmpInst::FIRST_ICMP_PREDICATE
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:750
llvm::APFloat::isNaN
bool isNaN() const
Definition: APFloat.h:1228
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::APFloat::isNegative
bool isNegative() const
Definition: APFloat.h:1230
llvm::Instruction
Definition: Instruction.h:42
llvm::CallBase::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1506
llvm::InstCombiner::eraseInstFromFunction
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:176
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::AMDGPU::getMIMGLZMappingInfo
const LLVM_READONLY MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1563
fcNegSubnormal
@ fcNegSubnormal
Definition: FloatingPointMode.h:204
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::GCNTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: AMDGPUInstCombineIntrinsic.cpp:353
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
simplifyAMDGCNMemoryIntrinsicDemanded
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition: AMDGPUInstCombineIntrinsic.cpp:1070
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1322
AMDGPUTargetTransformInfo.h
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1899
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::APFloat::getQNaN
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:931
llvm::APFloat
Definition: APFloat.h:716
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2392
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:501
llvm::IRBuilderBase::CreateFAddFMF
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1459
llvm::APFloat::isNormal
bool isNormal() const
Definition: APFloat.h:1234
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
llvm::APFloatBase::cmpResult
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:190
fcZero
@ fcZero
Definition: FloatingPointMode.h:215
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::APFloatBase::cmpUnordered
@ cmpUnordered
Definition: APFloat.h:194
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:453
I
#define I(x, y, z)
Definition: MD5.cpp:58
fcPosSubnormal
@ fcPosSubnormal
Definition: FloatingPointMode.h:207
convertTo16Bit
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition: AMDGPUInstCombineIntrinsic.cpp:103
llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1239
fcSNan
@ fcSNan
Definition: FloatingPointMode.h:200
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:498
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::AMDGPU::getMIMGBaseOpcodeInfo
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
llvm::AMDGPU::getMIMGMIPMappingInfo
const LLVM_READONLY MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:493
llvm::scalbn
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Definition: APFloat.h:1277
llvm::APFloat::isSignaling
bool isSignaling() const
Definition: APFloat.h:1232
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::PatternMatch::m_FiniteNonZero
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:652
llvm::AMDGPU::getMIMGBiasMappingInfo
const LLVM_READONLY MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:361
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:281
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
llvm::isKnownNeverInfinity
bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
Definition: ValueTracking.cpp:3752
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:3833
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:417
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1687
AMDGPUInstrInfo.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:204
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
canSafelyConvertTo16Bit
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Definition: AMDGPUInstCombineIntrinsic.cpp:65
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::MetadataAsValue::get
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:926
llvm::fltSemantics
Definition: APFloat.cpp:71
llvm::CmpInst::isSigned
bool isSigned() const
Definition: InstrTypes.h:947
llvm::AMDGPU::getImageDimIntrinsicInfo
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::frexp
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1289
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:436
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
llvm::CallBase::setCalledOperand
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1432
fcNegNormal
@ fcNegNormal
Definition: FloatingPointMode.h:203
llvm::PatternMatch::m_AnyZeroFP
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:664
llvm::IRBuilderBase::CreateMinNum
CallInst * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:928
llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1903
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:200
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
fcNegInf
@ fcNegInf
Definition: FloatingPointMode.h:202
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5083
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2276
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1349
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::IRBuilderBase::CreateFCmpUNO
Value * CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2180
simplifyAMDGCNImageIntrinsic
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition: AMDGPUInstCombineIntrinsic.cpp:154
llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1328
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1455
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SmallVectorImpl< Value * >
llvm::CmpInst::LAST_FCMP_PREDICATE
@ LAST_FCMP_PREDICATE
Definition: InstrTypes.h:738
RegName
#define RegName(no)
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
modifyIntrinsicCall
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
Definition: AMDGPUInstCombineIntrinsic.cpp:118
llvm::APFloat::compare
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1185
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition: AMDGPUInstCombineIntrinsic.cpp:331
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:192
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1332
llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler
bool Sampler
Definition: AMDGPUBaseInfo.h:322
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::CmpInst::LAST_ICMP_PREDICATE
@ LAST_ICMP_PREDICATE
Definition: InstrTypes.h:751