LLVM  14.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file implements a TargetTransformInfo analysis pass specific to the
11 // AMDGPU target machine. It uses the target's detailed information to provide
12 // more precise answers to certain TTI queries, while letting the target
13 // independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPUInstrInfo.h"
19 #include "GCNSubtarget.h"
20 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "AMDGPUtti"
26 
27 namespace {
28 
29 struct AMDGPUImageDMaskIntrinsic {
30  unsigned Intr;
31 };
32 
33 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
34 #include "InstCombineTables.inc"
35 
36 } // end anonymous namespace
37 
38 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
39 //
40 // A single NaN input is folded to minnum, so we rely on that folding for
41 // handling NaNs.
42 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
43  const APFloat &Src2) {
44  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
45 
46  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
47  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
48  if (Cmp0 == APFloat::cmpEqual)
49  return maxnum(Src1, Src2);
50 
51  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
52  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
53  if (Cmp1 == APFloat::cmpEqual)
54  return maxnum(Src0, Src2);
55 
56  return maxnum(Src0, Src1);
57 }
58 
59 // Check if a value can be converted to a 16-bit value without losing
60 // precision.
61 static bool canSafelyConvertTo16Bit(Value &V) {
62  Type *VTy = V.getType();
63  if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
64  // The value is already 16-bit, so we don't want to convert to 16-bit again!
65  return false;
66  }
67  if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
68  // We need to check that if we cast the index down to a half, we do not lose
69  // precision.
70  APFloat FloatValue(ConstFloat->getValueAPF());
71  bool LosesInfo = true;
72  FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
73  return !LosesInfo;
74  }
75  Value *CastSrc;
76  if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
77  match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
78  match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
79  Type *CastSrcTy = CastSrc->getType();
80  if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
81  return true;
82  }
83 
84  return false;
85 }
86 
87 // Convert a value to 16-bit.
89  Type *VTy = V.getType();
90  if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
91  return cast<Instruction>(&V)->getOperand(0);
92  if (VTy->isIntegerTy())
93  return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
94  if (VTy->isFloatingPointTy())
95  return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
96 
97  llvm_unreachable("Should never be called!");
98 }
99 
102  const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
103  IntrinsicInst &II, InstCombiner &IC) {
104  if (!ST->hasA16() && !ST->hasG16())
105  return None;
106 
107  bool FloatCoord = false;
108  // true means derivatives can be converted to 16 bit, coordinates not
109  bool OnlyDerivatives = false;
110 
111  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
112  OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
113  Value *Coord = II.getOperand(OperandIndex);
114  // If the values are not derived from 16-bit values, we cannot optimize.
115  if (!canSafelyConvertTo16Bit(*Coord)) {
116  if (OperandIndex < ImageDimIntr->CoordStart ||
117  ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
118  return None;
119  }
120  // All gradients can be converted, so convert only them
121  OnlyDerivatives = true;
122  break;
123  }
124 
125  assert(OperandIndex == ImageDimIntr->GradientStart ||
126  FloatCoord == Coord->getType()->isFloatingPointTy());
127  FloatCoord = Coord->getType()->isFloatingPointTy();
128  }
129 
130  if (OnlyDerivatives) {
131  if (!ST->hasG16())
132  return None;
133  } else {
134  if (!ST->hasA16())
135  OnlyDerivatives = true; // Only supports G16
136  }
137 
138  Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
140 
141  SmallVector<Type *, 4> ArgTys;
143  return None;
144 
145  ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
146  if (!OnlyDerivatives)
147  ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
148  Function *I =
150 
152 
153  unsigned EndIndex =
154  OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
155  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
156  OperandIndex < EndIndex; OperandIndex++) {
157  Args[OperandIndex] =
158  convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
159  }
160 
161  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
162  NewCall->takeName(&II);
163  NewCall->copyMetadata(II);
164  if (isa<FPMathOperator>(NewCall))
165  NewCall->copyFastMathFlags(&II);
166  return IC.replaceInstUsesWith(II, NewCall);
167 }
168 
170  InstCombiner &IC) const {
171  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
172  // infinity, gives +0.0. If we can prove we don't have one of the special
173  // cases then we can use a normal multiply instead.
174  // TODO: Create and use isKnownFiniteNonZero instead of just matching
175  // constants here.
176  if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
178  // One operand is not zero or infinity or NaN.
179  return true;
180  }
181  auto *TLI = &IC.getTargetLibraryInfo();
182  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
183  isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
184  // Neither operand is infinity or NaN.
185  return true;
186  }
187  return false;
188 }
189 
192  Intrinsic::ID IID = II.getIntrinsicID();
193  switch (IID) {
194  case Intrinsic::amdgcn_rcp: {
195  Value *Src = II.getArgOperand(0);
196 
197  // TODO: Move to ConstantFolding/InstSimplify?
198  if (isa<UndefValue>(Src)) {
199  Type *Ty = II.getType();
200  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
201  return IC.replaceInstUsesWith(II, QNaN);
202  }
203 
204  if (II.isStrictFP())
205  break;
206 
207  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
208  const APFloat &ArgVal = C->getValueAPF();
209  APFloat Val(ArgVal.getSemantics(), 1);
211 
212  // This is more precise than the instruction may give.
213  //
214  // TODO: The instruction always flushes denormal results (except for f16),
215  // should this also?
216  return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
217  }
218 
219  break;
220  }
221  case Intrinsic::amdgcn_rsq: {
222  Value *Src = II.getArgOperand(0);
223 
224  // TODO: Move to ConstantFolding/InstSimplify?
225  if (isa<UndefValue>(Src)) {
226  Type *Ty = II.getType();
227  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
228  return IC.replaceInstUsesWith(II, QNaN);
229  }
230 
231  break;
232  }
233  case Intrinsic::amdgcn_frexp_mant:
234  case Intrinsic::amdgcn_frexp_exp: {
235  Value *Src = II.getArgOperand(0);
236  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
237  int Exp;
238  APFloat Significand =
239  frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
240 
241  if (IID == Intrinsic::amdgcn_frexp_mant) {
242  return IC.replaceInstUsesWith(
243  II, ConstantFP::get(II.getContext(), Significand));
244  }
245 
246  // Match instruction special case behavior.
247  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
248  Exp = 0;
249 
250  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
251  }
252 
253  if (isa<UndefValue>(Src)) {
254  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
255  }
256 
257  break;
258  }
259  case Intrinsic::amdgcn_class: {
260  enum {
261  S_NAN = 1 << 0, // Signaling NaN
262  Q_NAN = 1 << 1, // Quiet NaN
263  N_INFINITY = 1 << 2, // Negative infinity
264  N_NORMAL = 1 << 3, // Negative normal
265  N_SUBNORMAL = 1 << 4, // Negative subnormal
266  N_ZERO = 1 << 5, // Negative zero
267  P_ZERO = 1 << 6, // Positive zero
268  P_SUBNORMAL = 1 << 7, // Positive subnormal
269  P_NORMAL = 1 << 8, // Positive normal
270  P_INFINITY = 1 << 9 // Positive infinity
271  };
272 
273  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
276 
277  Value *Src0 = II.getArgOperand(0);
278  Value *Src1 = II.getArgOperand(1);
279  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
280  if (!CMask) {
281  if (isa<UndefValue>(Src0)) {
282  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
283  }
284 
285  if (isa<UndefValue>(Src1)) {
286  return IC.replaceInstUsesWith(II,
287  ConstantInt::get(II.getType(), false));
288  }
289  break;
290  }
291 
292  uint32_t Mask = CMask->getZExtValue();
293 
294  // If all tests are made, it doesn't matter what the value is.
295  if ((Mask & FullMask) == FullMask) {
296  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
297  }
298 
299  if ((Mask & FullMask) == 0) {
300  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
301  }
302 
303  if (Mask == (S_NAN | Q_NAN)) {
304  // Equivalent of isnan. Replace with standard fcmp.
305  Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
306  FCmp->takeName(&II);
307  return IC.replaceInstUsesWith(II, FCmp);
308  }
309 
310  if (Mask == (N_ZERO | P_ZERO)) {
311  // Equivalent of == 0.
312  Value *FCmp =
313  IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
314 
315  FCmp->takeName(&II);
316  return IC.replaceInstUsesWith(II, FCmp);
317  }
318 
319  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
320  if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
321  isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
322  return IC.replaceOperand(
323  II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
324  }
325 
326  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
327  if (!CVal) {
328  if (isa<UndefValue>(Src0)) {
329  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
330  }
331 
332  // Clamp mask to used bits
333  if ((Mask & FullMask) != Mask) {
334  CallInst *NewCall = IC.Builder.CreateCall(
335  II.getCalledFunction(),
336  {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
337 
338  NewCall->takeName(&II);
339  return IC.replaceInstUsesWith(II, NewCall);
340  }
341 
342  break;
343  }
344 
345  const APFloat &Val = CVal->getValueAPF();
346 
347  bool Result =
348  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
349  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
350  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
351  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
352  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
353  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
354  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
355  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
356  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
357  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
358 
359  return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
360  }
361  case Intrinsic::amdgcn_cvt_pkrtz: {
362  Value *Src0 = II.getArgOperand(0);
363  Value *Src1 = II.getArgOperand(1);
364  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
365  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
366  const fltSemantics &HalfSem =
368  bool LosesInfo;
369  APFloat Val0 = C0->getValueAPF();
370  APFloat Val1 = C1->getValueAPF();
371  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
372  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
373 
374  Constant *Folded =
376  ConstantFP::get(II.getContext(), Val1)});
377  return IC.replaceInstUsesWith(II, Folded);
378  }
379  }
380 
381  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
382  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
383  }
384 
385  break;
386  }
387  case Intrinsic::amdgcn_cvt_pknorm_i16:
388  case Intrinsic::amdgcn_cvt_pknorm_u16:
389  case Intrinsic::amdgcn_cvt_pk_i16:
390  case Intrinsic::amdgcn_cvt_pk_u16: {
391  Value *Src0 = II.getArgOperand(0);
392  Value *Src1 = II.getArgOperand(1);
393 
394  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
395  return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
396  }
397 
398  break;
399  }
400  case Intrinsic::amdgcn_ubfe:
401  case Intrinsic::amdgcn_sbfe: {
402  // Decompose simple cases into standard shifts.
403  Value *Src = II.getArgOperand(0);
404  if (isa<UndefValue>(Src)) {
405  return IC.replaceInstUsesWith(II, Src);
406  }
407 
408  unsigned Width;
409  Type *Ty = II.getType();
410  unsigned IntSize = Ty->getIntegerBitWidth();
411 
412  ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
413  if (CWidth) {
414  Width = CWidth->getZExtValue();
415  if ((Width & (IntSize - 1)) == 0) {
417  }
418 
419  // Hardware ignores high bits, so remove those.
420  if (Width >= IntSize) {
421  return IC.replaceOperand(
422  II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
423  }
424  }
425 
426  unsigned Offset;
427  ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
428  if (COffset) {
429  Offset = COffset->getZExtValue();
430  if (Offset >= IntSize) {
431  return IC.replaceOperand(
432  II, 1,
433  ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
434  }
435  }
436 
437  bool Signed = IID == Intrinsic::amdgcn_sbfe;
438 
439  if (!CWidth || !COffset)
440  break;
441 
442  // The case of Width == 0 is handled above, which makes this tranformation
443  // safe. If Width == 0, then the ashr and lshr instructions become poison
444  // value since the shift amount would be equal to the bit size.
445  assert(Width != 0);
446 
447  // TODO: This allows folding to undef when the hardware has specific
448  // behavior?
449  if (Offset + Width < IntSize) {
450  Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
451  Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
452  : IC.Builder.CreateLShr(Shl, IntSize - Width);
453  RightShift->takeName(&II);
454  return IC.replaceInstUsesWith(II, RightShift);
455  }
456 
457  Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
458  : IC.Builder.CreateLShr(Src, Offset);
459 
460  RightShift->takeName(&II);
461  return IC.replaceInstUsesWith(II, RightShift);
462  }
463  case Intrinsic::amdgcn_exp:
464  case Intrinsic::amdgcn_exp_compr: {
465  ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
466  unsigned EnBits = En->getZExtValue();
467  if (EnBits == 0xf)
468  break; // All inputs enabled.
469 
470  bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
471  bool Changed = false;
472  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
473  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
474  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
475  Value *Src = II.getArgOperand(I + 2);
476  if (!isa<UndefValue>(Src)) {
477  IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
478  Changed = true;
479  }
480  }
481  }
482 
483  if (Changed) {
484  return &II;
485  }
486 
487  break;
488  }
489  case Intrinsic::amdgcn_fmed3: {
490  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
491  // for the shader.
492 
493  Value *Src0 = II.getArgOperand(0);
494  Value *Src1 = II.getArgOperand(1);
495  Value *Src2 = II.getArgOperand(2);
496 
497  // Checking for NaN before canonicalization provides better fidelity when
498  // mapping other operations onto fmed3 since the order of operands is
499  // unchanged.
500  CallInst *NewCall = nullptr;
501  if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
502  NewCall = IC.Builder.CreateMinNum(Src1, Src2);
503  } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
504  NewCall = IC.Builder.CreateMinNum(Src0, Src2);
505  } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
506  NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
507  }
508 
509  if (NewCall) {
510  NewCall->copyFastMathFlags(&II);
511  NewCall->takeName(&II);
512  return IC.replaceInstUsesWith(II, NewCall);
513  }
514 
515  bool Swap = false;
516  // Canonicalize constants to RHS operands.
517  //
518  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
519  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
520  std::swap(Src0, Src1);
521  Swap = true;
522  }
523 
524  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
525  std::swap(Src1, Src2);
526  Swap = true;
527  }
528 
529  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
530  std::swap(Src0, Src1);
531  Swap = true;
532  }
533 
534  if (Swap) {
535  II.setArgOperand(0, Src0);
536  II.setArgOperand(1, Src1);
537  II.setArgOperand(2, Src2);
538  return &II;
539  }
540 
541  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
542  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
543  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
544  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
545  C2->getValueAPF());
546  return IC.replaceInstUsesWith(
547  II, ConstantFP::get(IC.Builder.getContext(), Result));
548  }
549  }
550  }
551 
552  break;
553  }
554  case Intrinsic::amdgcn_icmp:
555  case Intrinsic::amdgcn_fcmp: {
556  const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
557  // Guard against invalid arguments.
558  int64_t CCVal = CC->getZExtValue();
559  bool IsInteger = IID == Intrinsic::amdgcn_icmp;
560  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
561  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
562  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
564  break;
565 
566  Value *Src0 = II.getArgOperand(0);
567  Value *Src1 = II.getArgOperand(1);
568 
569  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
570  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
571  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
572  if (CCmp->isNullValue()) {
573  return IC.replaceInstUsesWith(
574  II, ConstantExpr::getSExt(CCmp, II.getType()));
575  }
576 
577  // The result of V_ICMP/V_FCMP assembly instructions (which this
578  // intrinsic exposes) is one bit per thread, masked with the EXEC
579  // register (which contains the bitmask of live threads). So a
580  // comparison that always returns true is the same as a read of the
581  // EXEC register.
583  II.getModule(), Intrinsic::read_register, II.getType());
584  Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
585  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
586  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
587  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
589  NewCall->takeName(&II);
590  return IC.replaceInstUsesWith(II, NewCall);
591  }
592 
593  // Canonicalize constants to RHS.
594  CmpInst::Predicate SwapPred =
596  II.setArgOperand(0, Src1);
597  II.setArgOperand(1, Src0);
598  II.setArgOperand(
599  2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
600  return &II;
601  }
602 
603  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
604  break;
605 
606  // Canonicalize compare eq with true value to compare != 0
607  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
608  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
609  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
610  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
611  Value *ExtSrc;
612  if (CCVal == CmpInst::ICMP_EQ &&
613  ((match(Src1, PatternMatch::m_One()) &&
614  match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
615  (match(Src1, PatternMatch::m_AllOnes()) &&
616  match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
617  ExtSrc->getType()->isIntegerTy(1)) {
619  IC.replaceOperand(II, 2,
621  return &II;
622  }
623 
624  CmpInst::Predicate SrcPred;
625  Value *SrcLHS;
626  Value *SrcRHS;
627 
628  // Fold compare eq/ne with 0 from a compare result as the predicate to the
629  // intrinsic. The typical use is a wave vote function in the library, which
630  // will be fed from a user code condition compared with 0. Fold in the
631  // redundant compare.
632 
633  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
634  // -> llvm.amdgcn.[if]cmp(a, b, pred)
635  //
636  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
637  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
638  if (match(Src1, PatternMatch::m_Zero()) &&
640  m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
641  PatternMatch::m_Value(SrcRHS))))) {
642  if (CCVal == CmpInst::ICMP_EQ)
643  SrcPred = CmpInst::getInversePredicate(SrcPred);
644 
645  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
646  ? Intrinsic::amdgcn_fcmp
647  : Intrinsic::amdgcn_icmp;
648 
649  Type *Ty = SrcLHS->getType();
650  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
651  // Promote to next legal integer type.
652  unsigned Width = CmpType->getBitWidth();
653  unsigned NewWidth = Width;
654 
655  // Don't do anything for i1 comparisons.
656  if (Width == 1)
657  break;
658 
659  if (Width <= 16)
660  NewWidth = 16;
661  else if (Width <= 32)
662  NewWidth = 32;
663  else if (Width <= 64)
664  NewWidth = 64;
665  else if (Width > 64)
666  break; // Can't handle this.
667 
668  if (Width != NewWidth) {
669  IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
670  if (CmpInst::isSigned(SrcPred)) {
671  SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
672  SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
673  } else {
674  SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
675  SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
676  }
677  }
678  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
679  break;
680 
682  II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
683  Value *Args[] = {SrcLHS, SrcRHS,
684  ConstantInt::get(CC->getType(), SrcPred)};
685  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
686  NewCall->takeName(&II);
687  return IC.replaceInstUsesWith(II, NewCall);
688  }
689 
690  break;
691  }
692  case Intrinsic::amdgcn_ballot: {
693  if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
694  if (Src->isZero()) {
695  // amdgcn.ballot(i1 0) is zero.
697  }
698 
699  if (Src->isOne()) {
700  // amdgcn.ballot(i1 1) is exec.
701  const char *RegName = "exec";
702  if (II.getType()->isIntegerTy(32))
703  RegName = "exec_lo";
704  else if (!II.getType()->isIntegerTy(64))
705  break;
706 
708  II.getModule(), Intrinsic::read_register, II.getType());
709  Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
710  MDNode *MD = MDNode::get(II.getContext(), MDArgs);
711  Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
712  CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
714  NewCall->takeName(&II);
715  return IC.replaceInstUsesWith(II, NewCall);
716  }
717  }
718  break;
719  }
720  case Intrinsic::amdgcn_wqm_vote: {
721  // wqm_vote is identity when the argument is constant.
722  if (!isa<Constant>(II.getArgOperand(0)))
723  break;
724 
725  return IC.replaceInstUsesWith(II, II.getArgOperand(0));
726  }
727  case Intrinsic::amdgcn_kill: {
728  const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
729  if (!C || !C->getZExtValue())
730  break;
731 
732  // amdgcn.kill(i1 1) is a no-op
733  return IC.eraseInstFromFunction(II);
734  }
735  case Intrinsic::amdgcn_update_dpp: {
736  Value *Old = II.getArgOperand(0);
737 
738  auto *BC = cast<ConstantInt>(II.getArgOperand(5));
739  auto *RM = cast<ConstantInt>(II.getArgOperand(3));
740  auto *BM = cast<ConstantInt>(II.getArgOperand(4));
741  if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
742  BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
743  break;
744 
745  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
746  return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
747  }
748  case Intrinsic::amdgcn_permlane16:
749  case Intrinsic::amdgcn_permlanex16: {
750  // Discard vdst_in if it's not going to be read.
751  Value *VDstIn = II.getArgOperand(0);
752  if (isa<UndefValue>(VDstIn))
753  break;
754 
755  ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
756  ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
757  if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
758  break;
759 
760  return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
761  }
762  case Intrinsic::amdgcn_readfirstlane:
763  case Intrinsic::amdgcn_readlane: {
764  // A constant value is trivially uniform.
765  if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
766  return IC.replaceInstUsesWith(II, C);
767  }
768 
769  // The rest of these may not be safe if the exec may not be the same between
770  // the def and use.
771  Value *Src = II.getArgOperand(0);
772  Instruction *SrcInst = dyn_cast<Instruction>(Src);
773  if (SrcInst && SrcInst->getParent() != II.getParent())
774  break;
775 
776  // readfirstlane (readfirstlane x) -> readfirstlane x
777  // readlane (readfirstlane x), y -> readfirstlane x
778  if (match(Src,
779  PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
780  return IC.replaceInstUsesWith(II, Src);
781  }
782 
783  if (IID == Intrinsic::amdgcn_readfirstlane) {
784  // readfirstlane (readlane x, y) -> readlane x, y
785  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
786  return IC.replaceInstUsesWith(II, Src);
787  }
788  } else {
789  // readlane (readlane x, y), y -> readlane x, y
790  if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
793  return IC.replaceInstUsesWith(II, Src);
794  }
795  }
796 
797  break;
798  }
799  case Intrinsic::amdgcn_ldexp: {
800  // FIXME: This doesn't introduce new instructions and belongs in
801  // InstructionSimplify.
802  Type *Ty = II.getType();
803  Value *Op0 = II.getArgOperand(0);
804  Value *Op1 = II.getArgOperand(1);
805 
806  // Folding undef to qnan is safe regardless of the FP mode.
807  if (isa<UndefValue>(Op0)) {
808  auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
809  return IC.replaceInstUsesWith(II, QNaN);
810  }
811 
812  const APFloat *C = nullptr;
814 
815  // FIXME: Should flush denorms depending on FP mode, but that's ignored
816  // everywhere else.
817  //
818  // These cases should be safe, even with strictfp.
819  // ldexp(0.0, x) -> 0.0
820  // ldexp(-0.0, x) -> -0.0
821  // ldexp(inf, x) -> inf
822  // ldexp(-inf, x) -> -inf
823  if (C && (C->isZero() || C->isInfinity())) {
824  return IC.replaceInstUsesWith(II, Op0);
825  }
826 
827  // With strictfp, be more careful about possibly needing to flush denormals
828  // or not, and snan behavior depends on ieee_mode.
829  if (II.isStrictFP())
830  break;
831 
832  if (C && C->isNaN()) {
833  // FIXME: We just need to make the nan quiet here, but that's unavailable
834  // on APFloat, only IEEEfloat
835  auto *Quieted =
837  return IC.replaceInstUsesWith(II, Quieted);
838  }
839 
840  // ldexp(x, 0) -> x
841  // ldexp(x, undef) -> x
842  if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
843  return IC.replaceInstUsesWith(II, Op0);
844  }
845 
846  break;
847  }
848  case Intrinsic::amdgcn_fmul_legacy: {
849  Value *Op0 = II.getArgOperand(0);
850  Value *Op1 = II.getArgOperand(1);
851 
852  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
853  // infinity, gives +0.0.
854  // TODO: Move to InstSimplify?
855  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
858 
859  // If we can prove we don't have one of the special cases then we can use a
860  // normal fmul instruction instead.
861  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
862  auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
863  FMul->takeName(&II);
864  return IC.replaceInstUsesWith(II, FMul);
865  }
866  break;
867  }
868  case Intrinsic::amdgcn_fma_legacy: {
869  Value *Op0 = II.getArgOperand(0);
870  Value *Op1 = II.getArgOperand(1);
871  Value *Op2 = II.getArgOperand(2);
872 
873  // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
874  // infinity, gives +0.0.
875  // TODO: Move to InstSimplify?
876  if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
878  // It's tempting to just return Op2 here, but that would give the wrong
879  // result if Op2 was -0.0.
880  auto *Zero = ConstantFP::getNullValue(II.getType());
881  auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
882  FAdd->takeName(&II);
883  return IC.replaceInstUsesWith(II, FAdd);
884  }
885 
886  // If we can prove we don't have one of the special cases then we can use a
887  // normal fma instead.
888  if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
890  II.getModule(), Intrinsic::fma, II.getType()));
891  return &II;
892  }
893  break;
894  }
895  default: {
896  if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
898  return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
899  }
900  }
901  }
902  return None;
903 }
904 
905 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
906 ///
907 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
908 /// struct returns.
910  IntrinsicInst &II,
911  APInt DemandedElts,
912  int DMaskIdx = -1) {
913 
914  auto *IIVTy = cast<FixedVectorType>(II.getType());
915  unsigned VWidth = IIVTy->getNumElements();
916  if (VWidth == 1)
917  return nullptr;
918 
920  IC.Builder.SetInsertPoint(&II);
921 
922  // Assume the arguments are unchanged and later override them, if needed.
924 
925  if (DMaskIdx < 0) {
926  // Buffer case.
927 
928  const unsigned ActiveBits = DemandedElts.getActiveBits();
929  const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
930 
931  // Start assuming the prefix of elements is demanded, but possibly clear
932  // some other bits if there are trailing zeros (unused components at front)
933  // and update offset.
934  DemandedElts = (1 << ActiveBits) - 1;
935 
936  if (UnusedComponentsAtFront > 0) {
937  static const unsigned InvalidOffsetIdx = 0xf;
938 
939  unsigned OffsetIdx;
940  switch (II.getIntrinsicID()) {
941  case Intrinsic::amdgcn_raw_buffer_load:
942  OffsetIdx = 1;
943  break;
944  case Intrinsic::amdgcn_s_buffer_load:
945  // If resulting type is vec3, there is no point in trimming the
946  // load with updated offset, as the vec3 would most likely be widened to
947  // vec4 anyway during lowering.
948  if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
949  OffsetIdx = InvalidOffsetIdx;
950  else
951  OffsetIdx = 1;
952  break;
953  case Intrinsic::amdgcn_struct_buffer_load:
954  OffsetIdx = 2;
955  break;
956  default:
957  // TODO: handle tbuffer* intrinsics.
958  OffsetIdx = InvalidOffsetIdx;
959  break;
960  }
961 
962  if (OffsetIdx != InvalidOffsetIdx) {
963  // Clear demanded bits and update the offset.
964  DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
965  auto *Offset = II.getArgOperand(OffsetIdx);
966  unsigned SingleComponentSizeInBits =
968  unsigned OffsetAdd =
969  UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
970  auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
971  Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
972  }
973  }
974  } else {
975  // Image case.
976 
977  ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
978  unsigned DMaskVal = DMask->getZExtValue() & 0xf;
979 
980  // Mask off values that are undefined because the dmask doesn't cover them
981  DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
982 
983  unsigned NewDMaskVal = 0;
984  unsigned OrigLoadIdx = 0;
985  for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
986  const unsigned Bit = 1 << SrcIdx;
987  if (!!(DMaskVal & Bit)) {
988  if (!!DemandedElts[OrigLoadIdx])
989  NewDMaskVal |= Bit;
990  OrigLoadIdx++;
991  }
992  }
993 
994  if (DMaskVal != NewDMaskVal)
995  Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
996  }
997 
998  unsigned NewNumElts = DemandedElts.countPopulation();
999  if (!NewNumElts)
1000  return UndefValue::get(II.getType());
1001 
1002  if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1003  if (DMaskIdx >= 0)
1004  II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1005  return nullptr;
1006  }
1007 
1008  // Validate function argument and return types, extracting overloaded types
1009  // along the way.
1010  SmallVector<Type *, 6> OverloadTys;
1011  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
1012  return nullptr;
1013 
1014  Module *M = II.getParent()->getParent()->getParent();
1015  Type *EltTy = IIVTy->getElementType();
1016  Type *NewTy =
1017  (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1018 
1019  OverloadTys[0] = NewTy;
1020  Function *NewIntrin =
1021  Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
1022 
1023  CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1024  NewCall->takeName(&II);
1025  NewCall->copyMetadata(II);
1026 
1027  if (NewNumElts == 1) {
1029  NewCall,
1030  DemandedElts.countTrailingZeros());
1031  }
1032 
1033  SmallVector<int, 8> EltMask;
1034  unsigned NewLoadIdx = 0;
1035  for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1036  if (!!DemandedElts[OrigLoadIdx])
1037  EltMask.push_back(NewLoadIdx++);
1038  else
1039  EltMask.push_back(NewNumElts);
1040  }
1041 
1042  Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1043 
1044  return Shuffle;
1045 }
1046 
1048  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1049  APInt &UndefElts2, APInt &UndefElts3,
1050  std::function<void(Instruction *, unsigned, APInt, APInt &)>
1051  SimplifyAndSetOp) const {
1052  switch (II.getIntrinsicID()) {
1053  case Intrinsic::amdgcn_buffer_load:
1054  case Intrinsic::amdgcn_buffer_load_format:
1055  case Intrinsic::amdgcn_raw_buffer_load:
1056  case Intrinsic::amdgcn_raw_buffer_load_format:
1057  case Intrinsic::amdgcn_raw_tbuffer_load:
1058  case Intrinsic::amdgcn_s_buffer_load:
1059  case Intrinsic::amdgcn_struct_buffer_load:
1060  case Intrinsic::amdgcn_struct_buffer_load_format:
1061  case Intrinsic::amdgcn_struct_tbuffer_load:
1062  case Intrinsic::amdgcn_tbuffer_load:
1063  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1064  default: {
1065  if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1066  return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1067  }
1068  break;
1069  }
1070  }
1071  return None;
1072 }
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1218
llvm::InstCombiner::getTargetLibraryInfo
TargetLibraryInfo & getTargetLibraryInfo() const
Definition: InstCombiner.h:367
llvm::APFloat::isInfinity
bool isInfinity() const
Definition: APFloat.h:1214
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:836
llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:184
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
InstCombiner.h
llvm::RecurKind::FMul
@ FMul
Product of floats.
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::APFloatBase::IEK_NaN
@ IEK_NaN
Definition: APFloat.h:231
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::AMDGPU::ImageDimIntrinsicInfo::CoordTyArg
uint8_t CoordTyArg
Definition: AMDGPUInstrInfo.h:75
llvm::APInt::isMask
bool isMask(unsigned numBits) const
Definition: APInt.h:462
llvm::Function
Definition: Function.h:61
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::IRBuilderBase::CreateFCmpOEQ
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2271
llvm::PatternMatch::m_NaN
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:638
llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:655
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2084
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:56
llvm::AMDGPU::ImageDimIntrinsicInfo
Definition: AMDGPUInstrInfo.h:50
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:820
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:287
llvm::APFloat::isZero
bool isZero() const
Definition: APFloat.h:1213
llvm::APFloat::divide
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:999
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1805
llvm::IRBuilderBase::CreateMaxNum
CallInst * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:901
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::IRBuilderBase::CreateShuffleVector
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2469
llvm::APFloatBase::IEK_Inf
@ IEK_Inf
Definition: APFloat.h:232
llvm::AMDGPU::ImageDimIntrinsicInfo::GradientTyArg
uint8_t GradientTyArg
Definition: AMDGPUInstrInfo.h:74
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:829
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::IRBuilderBase::CreateAShr
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1342
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::APFloat::getSemantics
const fltSemantics & getSemantics() const
Definition: APFloat.h:1225
llvm::Intrinsic::getIntrinsicSignature
bool getIntrinsicSignature(Function *F, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1680
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1203
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1555
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::InstCombiner::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:436
llvm::GCNTTIImpl::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: AMDGPUInstCombineIntrinsic.cpp:191
fmed3AMDGCN
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
Definition: AMDGPUInstCombineIntrinsic.cpp:42
llvm::IRBuilderBase::CreateFMulFMF
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1490
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:813
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::CmpInst::FIRST_FCMP_PREDICATE
@ FIRST_FCMP_PREDICATE
Definition: InstrTypes.h:738
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
GCNSubtarget.h
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::SIInstrFlags::N_INFINITY
@ N_INFINITY
Definition: SIDefines.h:130
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:1987
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::AMDGPU::ImageDimIntrinsicInfo::CoordStart
uint8_t CoordStart
Definition: AMDGPUInstrInfo.h:64
llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:531
llvm::CmpInst::FIRST_ICMP_PREDICATE
@ FIRST_ICMP_PREDICATE
Definition: InstrTypes.h:751
llvm::APFloat::isNaN
bool isNaN() const
Definition: APFloat.h:1215
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.h:169
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::APFloat::isNegative
bool isNegative() const
Definition: APFloat.h:1217
llvm::Instruction
Definition: Instruction.h:45
llvm::CallBase::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1500
llvm::InstCombiner::eraseInstFromFunction
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:180
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1527
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:648
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::None
const NoneType None
Definition: None.h:23
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
simplifyAMDGCNMemoryIntrinsicDemanded
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
Definition: AMDGPUInstCombineIntrinsic.cpp:909
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1309
AMDGPUTargetTransformInfo.h
llvm::SIInstrFlags::Q_NAN
@ Q_NAN
Definition: SIDefines.h:129
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2025
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::APFloat::getQNaN
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:916
llvm::SIInstrFlags::N_NORMAL
@ N_NORMAL
Definition: SIDefines.h:131
llvm::APFloat
Definition: APFloat.h:701
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2373
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::IRBuilderBase::CreateFAddFMF
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1440
llvm::APFloat::isNormal
bool isNormal() const
Definition: APFloat.h:1221
llvm::SIInstrFlags::S_NAN
@ S_NAN
Definition: SIDefines.h:128
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:369
llvm::APFloatBase::cmpResult
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:180
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::APFloatBase::cmpUnordered
@ cmpUnordered
Definition: APFloat.h:184
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:445
I
#define I(x, y, z)
Definition: MD5.cpp:59
convertTo16Bit
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
Definition: AMDGPUInstCombineIntrinsic.cpp:88
llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1212
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:141
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
canSafelyConvertTo16Bit
static bool canSafelyConvertTo16Bit(Value &V)
Definition: AMDGPUInstCombineIntrinsic.cpp:61
simplifyAMDGCNImageIntrinsic
static Optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
Definition: AMDGPUInstCombineIntrinsic.cpp:101
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::AMDGPU::ImageDimIntrinsicInfo::VAddrEnd
uint8_t VAddrEnd
Definition: AMDGPUInstrInfo.h:67
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:472
llvm::scalbn
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Definition: APFloat.h:1264
llvm::APFloat::isSignaling
bool isSignaling() const
Definition: APFloat.h:1219
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
uint32_t
llvm::PatternMatch::m_FiniteNonZero
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:684
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:235
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1369
llvm::isKnownNeverInfinity
bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
Definition: ValueTracking.cpp:3653
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:3717
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1343
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombiner::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:415
llvm::SIInstrFlags::P_SUBNORMAL
@ P_SUBNORMAL
Definition: SIDefines.h:135
llvm::SIInstrFlags::P_INFINITY
@ P_INFINITY
Definition: SIDefines.h:137
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1697
AMDGPUInstrInfo.h
llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:194
llvm::SIInstrFlags::P_ZERO
@ P_ZERO
Definition: SIDefines.h:134
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:188
llvm::MetadataAsValue::get
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:947
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::CmpInst::isSigned
bool isSigned() const
Definition: InstrTypes.h:934
llvm::AMDGPU::getImageDimIntrinsicInfo
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::frexp
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1276
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::CallBase::setCalledOperand
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1426
llvm::PatternMatch::m_AnyZeroFP
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:696
llvm::IRBuilderBase::CreateMinNum
CallInst * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:896
llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2029
llvm::SIInstrFlags::N_ZERO
@ N_ZERO
Definition: SIDefines.h:133
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4842
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1322
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::IRBuilderBase::CreateFCmpUNO
Value * CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2306
llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1301
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1427
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SIInstrFlags::N_SUBNORMAL
@ N_SUBNORMAL
Definition: SIDefines.h:132
llvm::CmpInst::LAST_FCMP_PREDICATE
@ LAST_FCMP_PREDICATE
Definition: InstrTypes.h:739
RegName
#define RegName(no)
llvm::Type::getInt16Ty
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:202
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::APFloat::compare
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1172
llvm::AMDGPU::ImageDimIntrinsicInfo::GradientStart
uint8_t GradientStart
Definition: AMDGPUInstrInfo.h:63
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:370
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::GCNTTIImpl::canSimplifyLegacyMulToMul
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const
Definition: AMDGPUInstCombineIntrinsic.cpp:169
llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: AMDGPUInstCombineIntrinsic.cpp:1047
llvm::CallBase::arg_operands
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1330
llvm::APFloatBase::cmpEqual
@ cmpEqual
Definition: APFloat.h:182
llvm::SIInstrFlags::P_NORMAL
@ P_NORMAL
Definition: SIDefines.h:136
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
llvm::CallBase::args
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1319
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::CmpInst::LAST_ICMP_PREDICATE
@ LAST_ICMP_PREDICATE
Definition: InstrTypes.h:752