LLVM 19.0.0git
AMDGPUInstCombineIntrinsic.cpp
Go to the documentation of this file.
1//===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// AMDGPU target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPUInstrInfo.h"
19#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include <optional>
24
25using namespace llvm;
26using namespace llvm::PatternMatch;
27
28#define DEBUG_TYPE "AMDGPUtti"
29
30namespace {
31
32struct AMDGPUImageDMaskIntrinsic {
33 unsigned Intr;
34};
35
36#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
37#include "InstCombineTables.inc"
38
39} // end anonymous namespace
40
41// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
42//
43// A single NaN input is folded to minnum, so we rely on that folding for
44// handling NaNs.
45static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
46 const APFloat &Src2) {
47 APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
48
49 APFloat::cmpResult Cmp0 = Max3.compare(Src0);
50 assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
51 if (Cmp0 == APFloat::cmpEqual)
52 return maxnum(Src1, Src2);
53
54 APFloat::cmpResult Cmp1 = Max3.compare(Src1);
55 assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
56 if (Cmp1 == APFloat::cmpEqual)
57 return maxnum(Src0, Src2);
58
59 return maxnum(Src0, Src1);
60}
61
62// Check if a value can be converted to a 16-bit value without losing
63// precision.
64// The value is expected to be either a float (IsFloat = true) or an unsigned
65// integer (IsFloat = false).
66static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
67 Type *VTy = V.getType();
68 if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
69 // The value is already 16-bit, so we don't want to convert to 16-bit again!
70 return false;
71 }
72 if (IsFloat) {
73 if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
74 // We need to check that if we cast the index down to a half, we do not
75 // lose precision.
76 APFloat FloatValue(ConstFloat->getValueAPF());
77 bool LosesInfo = true;
78 FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
79 &LosesInfo);
80 return !LosesInfo;
81 }
82 } else {
83 if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
84 // We need to check that if we cast the index down to an i16, we do not
85 // lose precision.
86 APInt IntValue(ConstInt->getValue());
87 return IntValue.getActiveBits() <= 16;
88 }
89 }
90
91 Value *CastSrc;
92 bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
93 : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
94 if (IsExt) {
95 Type *CastSrcTy = CastSrc->getType();
96 if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
97 return true;
98 }
99
100 return false;
101}
102
103// Convert a value to 16-bit.
105 Type *VTy = V.getType();
106 if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
107 return cast<Instruction>(&V)->getOperand(0);
108 if (VTy->isIntegerTy())
109 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
110 if (VTy->isFloatingPointTy())
111 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
112
113 llvm_unreachable("Should never be called!");
114}
115
116/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
117/// modified arguments (based on OldIntr) and replaces InstToReplace with
118/// this newly created intrinsic call.
119static std::optional<Instruction *> modifyIntrinsicCall(
120 IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
121 InstCombiner &IC,
122 std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
123 Func) {
126 return std::nullopt;
127
128 SmallVector<Value *, 8> Args(OldIntr.args());
129
130 // Modify arguments and types
131 Func(Args, ArgTys);
132
133 Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
134
135 CallInst *NewCall = IC.Builder.CreateCall(I, Args);
136 NewCall->takeName(&OldIntr);
137 NewCall->copyMetadata(OldIntr);
138 if (isa<FPMathOperator>(NewCall))
139 NewCall->copyFastMathFlags(&OldIntr);
140
141 // Erase and replace uses
142 if (!InstToReplace.getType()->isVoidTy())
143 IC.replaceInstUsesWith(InstToReplace, NewCall);
144
145 bool RemoveOldIntr = &OldIntr != &InstToReplace;
146
147 auto RetValue = IC.eraseInstFromFunction(InstToReplace);
148 if (RemoveOldIntr)
149 IC.eraseInstFromFunction(OldIntr);
150
151 return RetValue;
152}
153
154static std::optional<Instruction *>
156 const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
157 IntrinsicInst &II, InstCombiner &IC) {
158 // Optimize _L to _LZ when _L is zero
159 if (const auto *LZMappingInfo =
161 if (auto *ConstantLod =
162 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
163 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
164 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
166 ImageDimIntr->Dim);
167 return modifyIntrinsicCall(
168 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
169 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
170 });
171 }
172 }
173 }
174
175 // Optimize _mip away, when 'lod' is zero
176 if (const auto *MIPMappingInfo =
178 if (auto *ConstantMip =
179 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
180 if (ConstantMip->isZero()) {
181 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
182 AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
183 ImageDimIntr->Dim);
184 return modifyIntrinsicCall(
185 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
186 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
187 });
188 }
189 }
190 }
191
192 // Optimize _bias away when 'bias' is zero
193 if (const auto *BiasMappingInfo =
195 if (auto *ConstantBias =
196 dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
197 if (ConstantBias->isZero()) {
198 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
199 AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
200 ImageDimIntr->Dim);
201 return modifyIntrinsicCall(
202 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
203 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
204 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
205 });
206 }
207 }
208 }
209
210 // Optimize _offset away when 'offset' is zero
211 if (const auto *OffsetMappingInfo =
213 if (auto *ConstantOffset =
214 dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
215 if (ConstantOffset->isZero()) {
216 const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
218 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
219 return modifyIntrinsicCall(
220 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
221 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
222 });
223 }
224 }
225 }
226
227 // Try to use D16
228 if (ST->hasD16Images()) {
229
230 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
232
233 if (BaseOpcode->HasD16) {
234
235 // If the only use of image intrinsic is a fptrunc (with conversion to
236 // half) then both fptrunc and image intrinsic will be replaced with image
237 // intrinsic with D16 flag.
238 if (II.hasOneUse()) {
239 Instruction *User = II.user_back();
240
241 if (User->getOpcode() == Instruction::FPTrunc &&
243
244 return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
245 [&](auto &Args, auto &ArgTys) {
246 // Change return type of image intrinsic.
247 // Set it to return type of fptrunc.
248 ArgTys[0] = User->getType();
249 });
250 }
251 }
252 }
253 }
254
255 // Try to use A16 or G16
256 if (!ST->hasA16() && !ST->hasG16())
257 return std::nullopt;
258
259 // Address is interpreted as float if the instruction has a sampler or as
260 // unsigned int if there is no sampler.
261 bool HasSampler =
263 bool FloatCoord = false;
264 // true means derivatives can be converted to 16 bit, coordinates not
265 bool OnlyDerivatives = false;
266
267 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
268 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
269 Value *Coord = II.getOperand(OperandIndex);
270 // If the values are not derived from 16-bit values, we cannot optimize.
271 if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
272 if (OperandIndex < ImageDimIntr->CoordStart ||
273 ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
274 return std::nullopt;
275 }
276 // All gradients can be converted, so convert only them
277 OnlyDerivatives = true;
278 break;
279 }
280
281 assert(OperandIndex == ImageDimIntr->GradientStart ||
282 FloatCoord == Coord->getType()->isFloatingPointTy());
283 FloatCoord = Coord->getType()->isFloatingPointTy();
284 }
285
286 if (!OnlyDerivatives && !ST->hasA16())
287 OnlyDerivatives = true; // Only supports G16
288
289 // Check if there is a bias parameter and if it can be converted to f16
290 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
291 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
292 assert(HasSampler &&
293 "Only image instructions with a sampler can have a bias");
294 if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
295 OnlyDerivatives = true;
296 }
297
298 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
299 ImageDimIntr->CoordStart))
300 return std::nullopt;
301
302 Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
304
305 return modifyIntrinsicCall(
306 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
307 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
308 if (!OnlyDerivatives) {
309 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
310
311 // Change the bias type
312 if (ImageDimIntr->NumBiasArgs != 0)
313 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
314 }
315
316 unsigned EndIndex =
317 OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
318 for (unsigned OperandIndex = ImageDimIntr->GradientStart;
319 OperandIndex < EndIndex; OperandIndex++) {
320 Args[OperandIndex] =
321 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
322 }
323
324 // Convert the bias
325 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
326 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
327 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
328 }
329 });
330}
331
333 const Value *Op0, const Value *Op1,
334 InstCombiner &IC) const {
335 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
336 // infinity, gives +0.0. If we can prove we don't have one of the special
337 // cases then we can use a normal multiply instead.
338 // TODO: Create and use isKnownFiniteNonZero instead of just matching
339 // constants here.
342 // One operand is not zero or infinity or NaN.
343 return true;
344 }
345
347 if (isKnownNeverInfOrNaN(Op0, /*Depth=*/0, SQ) &&
348 isKnownNeverInfOrNaN(Op1, /*Depth=*/0, SQ)) {
349 // Neither operand is infinity or NaN.
350 return true;
351 }
352 return false;
353}
354
355/// Match an fpext from half to float, or a constant we can convert.
356static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
357 if (match(Arg, m_OneUse(m_FPExt(m_Value(FPExtSrc)))))
358 return FPExtSrc->getType()->isHalfTy();
359
360 ConstantFP *CFP;
361 if (match(Arg, m_ConstantFP(CFP))) {
362 bool LosesInfo;
363 APFloat Val(CFP->getValueAPF());
365 if (LosesInfo)
366 return false;
367
368 FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val);
369 return true;
370 }
371
372 return false;
373}
374
375// Trim all zero components from the end of the vector \p UseV and return
376// an appropriate bitset with known elements.
378 Instruction *I) {
379 auto *VTy = cast<FixedVectorType>(UseV->getType());
380 unsigned VWidth = VTy->getNumElements();
381 APInt DemandedElts = APInt::getAllOnes(VWidth);
382
383 for (int i = VWidth - 1; i > 0; --i) {
384 auto *Elt = findScalarElement(UseV, i);
385 if (!Elt)
386 break;
387
388 if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
389 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
390 break;
391 } else {
392 break;
393 }
394
395 DemandedElts.clearBit(i);
396 }
397
398 return DemandedElts;
399}
400
401// Trim elements of the end of the vector \p V, if they are
402// equal to the first element of the vector.
404 auto *VTy = cast<FixedVectorType>(V->getType());
405 unsigned VWidth = VTy->getNumElements();
406 APInt DemandedElts = APInt::getAllOnes(VWidth);
407 Value *FirstComponent = findScalarElement(V, 0);
408
409 SmallVector<int> ShuffleMask;
410 if (auto *SVI = dyn_cast<ShuffleVectorInst>(V))
411 SVI->getShuffleMask(ShuffleMask);
412
413 for (int I = VWidth - 1; I > 0; --I) {
414 if (ShuffleMask.empty()) {
415 auto *Elt = findScalarElement(V, I);
416 if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
417 break;
418 } else {
419 // Detect identical elements in the shufflevector result, even though
420 // findScalarElement cannot tell us what that element is.
421 if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)
422 break;
423 }
424 DemandedElts.clearBit(I);
425 }
426
427 return DemandedElts;
428}
429
431 IntrinsicInst &II,
432 APInt DemandedElts,
433 int DMaskIdx = -1,
434 bool IsLoad = true);
435
436/// Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
437static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
438 return (SqrtOp->getType()->isFloatTy() &&
439 (SqrtOp->hasApproxFunc() || SqrtOp->getFPAccuracy() >= 1.0f)) ||
440 SqrtOp->getType()->isHalfTy();
441}
442
443std::optional<Instruction *>
445 Intrinsic::ID IID = II.getIntrinsicID();
446 switch (IID) {
447 case Intrinsic::amdgcn_rcp: {
448 Value *Src = II.getArgOperand(0);
449
450 // TODO: Move to ConstantFolding/InstSimplify?
451 if (isa<UndefValue>(Src)) {
452 Type *Ty = II.getType();
453 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
454 return IC.replaceInstUsesWith(II, QNaN);
455 }
456
457 if (II.isStrictFP())
458 break;
459
460 if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
461 const APFloat &ArgVal = C->getValueAPF();
462 APFloat Val(ArgVal.getSemantics(), 1);
464
465 // This is more precise than the instruction may give.
466 //
467 // TODO: The instruction always flushes denormal results (except for f16),
468 // should this also?
469 return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
470 }
471
472 FastMathFlags FMF = cast<FPMathOperator>(II).getFastMathFlags();
473 if (!FMF.allowContract())
474 break;
475 auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
476 if (!SrcCI)
477 break;
478
479 auto IID = SrcCI->getIntrinsicID();
480 // llvm.amdgcn.rcp(llvm.amdgcn.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable
481 //
482 // llvm.amdgcn.rcp(llvm.sqrt(x)) -> llvm.amdgcn.rsq(x) if contractable and
483 // relaxed.
484 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
485 const FPMathOperator *SqrtOp = cast<FPMathOperator>(SrcCI);
486 FastMathFlags InnerFMF = SqrtOp->getFastMathFlags();
487 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())
488 break;
489
490 if (IID == Intrinsic::sqrt && !canContractSqrtToRsq(SqrtOp))
491 break;
492
494 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
495
496 InnerFMF |= FMF;
497 II.setFastMathFlags(InnerFMF);
498
499 II.setCalledFunction(NewDecl);
500 return IC.replaceOperand(II, 0, SrcCI->getArgOperand(0));
501 }
502
503 break;
504 }
505 case Intrinsic::amdgcn_sqrt:
506 case Intrinsic::amdgcn_rsq: {
507 Value *Src = II.getArgOperand(0);
508
509 // TODO: Move to ConstantFolding/InstSimplify?
510 if (isa<UndefValue>(Src)) {
511 Type *Ty = II.getType();
512 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
513 return IC.replaceInstUsesWith(II, QNaN);
514 }
515
516 // f16 amdgcn.sqrt is identical to regular sqrt.
517 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
519 II.getModule(), Intrinsic::sqrt, {II.getType()});
520 II.setCalledFunction(NewDecl);
521 return &II;
522 }
523
524 break;
525 }
526 case Intrinsic::amdgcn_log:
527 case Intrinsic::amdgcn_exp2: {
528 const bool IsLog = IID == Intrinsic::amdgcn_log;
529 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
530 Value *Src = II.getArgOperand(0);
531 Type *Ty = II.getType();
532
533 if (isa<PoisonValue>(Src))
534 return IC.replaceInstUsesWith(II, Src);
535
536 if (IC.getSimplifyQuery().isUndefValue(Src))
537 return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
538
539 if (ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
540 if (C->isInfinity()) {
541 // exp2(+inf) -> +inf
542 // log2(+inf) -> +inf
543 if (!C->isNegative())
544 return IC.replaceInstUsesWith(II, C);
545
546 // exp2(-inf) -> 0
547 if (IsExp && C->isNegative())
548 return IC.replaceInstUsesWith(II, ConstantFP::getZero(Ty));
549 }
550
551 if (II.isStrictFP())
552 break;
553
554 if (C->isNaN()) {
555 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
556 return IC.replaceInstUsesWith(II, Quieted);
557 }
558
559 // f32 instruction doesn't handle denormals, f16 does.
560 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
561 Constant *FoldedValue = IsLog ? ConstantFP::getInfinity(Ty, true)
562 : ConstantFP::get(Ty, 1.0);
563 return IC.replaceInstUsesWith(II, FoldedValue);
564 }
565
566 if (IsLog && C->isNegative())
567 return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
568
569 // TODO: Full constant folding matching hardware behavior.
570 }
571
572 break;
573 }
574 case Intrinsic::amdgcn_frexp_mant:
575 case Intrinsic::amdgcn_frexp_exp: {
576 Value *Src = II.getArgOperand(0);
577 if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
578 int Exp;
579 APFloat Significand =
580 frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
581
582 if (IID == Intrinsic::amdgcn_frexp_mant) {
583 return IC.replaceInstUsesWith(
584 II, ConstantFP::get(II.getContext(), Significand));
585 }
586
587 // Match instruction special case behavior.
588 if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
589 Exp = 0;
590
591 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
592 }
593
594 if (isa<UndefValue>(Src)) {
595 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
596 }
597
598 break;
599 }
600 case Intrinsic::amdgcn_class: {
601 Value *Src0 = II.getArgOperand(0);
602 Value *Src1 = II.getArgOperand(1);
603 const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
604 if (CMask) {
606 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
607
608 // Clamp any excess bits, as they're illegal for the generic intrinsic.
609 II.setArgOperand(1, ConstantInt::get(Src1->getType(),
610 CMask->getZExtValue() & fcAllFlags));
611 return &II;
612 }
613
614 // Propagate poison.
615 if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
616 return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
617
618 // llvm.amdgcn.class(_, undef) -> false
619 if (IC.getSimplifyQuery().isUndefValue(Src1))
620 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
621
622 // llvm.amdgcn.class(undef, mask) -> mask != 0
623 if (IC.getSimplifyQuery().isUndefValue(Src0)) {
624 Value *CmpMask = IC.Builder.CreateICmpNE(
625 Src1, ConstantInt::getNullValue(Src1->getType()));
626 return IC.replaceInstUsesWith(II, CmpMask);
627 }
628 break;
629 }
630 case Intrinsic::amdgcn_cvt_pkrtz: {
631 Value *Src0 = II.getArgOperand(0);
632 Value *Src1 = II.getArgOperand(1);
633 if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
634 if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
635 const fltSemantics &HalfSem =
637 bool LosesInfo;
638 APFloat Val0 = C0->getValueAPF();
639 APFloat Val1 = C1->getValueAPF();
640 Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
641 Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
642
643 Constant *Folded =
644 ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
645 ConstantFP::get(II.getContext(), Val1)});
646 return IC.replaceInstUsesWith(II, Folded);
647 }
648 }
649
650 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
651 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
652 }
653
654 break;
655 }
656 case Intrinsic::amdgcn_cvt_pknorm_i16:
657 case Intrinsic::amdgcn_cvt_pknorm_u16:
658 case Intrinsic::amdgcn_cvt_pk_i16:
659 case Intrinsic::amdgcn_cvt_pk_u16: {
660 Value *Src0 = II.getArgOperand(0);
661 Value *Src1 = II.getArgOperand(1);
662
663 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
664 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
665 }
666
667 break;
668 }
669 case Intrinsic::amdgcn_ubfe:
670 case Intrinsic::amdgcn_sbfe: {
671 // Decompose simple cases into standard shifts.
672 Value *Src = II.getArgOperand(0);
673 if (isa<UndefValue>(Src)) {
674 return IC.replaceInstUsesWith(II, Src);
675 }
676
677 unsigned Width;
678 Type *Ty = II.getType();
679 unsigned IntSize = Ty->getIntegerBitWidth();
680
681 ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
682 if (CWidth) {
683 Width = CWidth->getZExtValue();
684 if ((Width & (IntSize - 1)) == 0) {
686 }
687
688 // Hardware ignores high bits, so remove those.
689 if (Width >= IntSize) {
690 return IC.replaceOperand(
691 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
692 }
693 }
694
695 unsigned Offset;
696 ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
697 if (COffset) {
698 Offset = COffset->getZExtValue();
699 if (Offset >= IntSize) {
700 return IC.replaceOperand(
701 II, 1,
702 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
703 }
704 }
705
706 bool Signed = IID == Intrinsic::amdgcn_sbfe;
707
708 if (!CWidth || !COffset)
709 break;
710
711 // The case of Width == 0 is handled above, which makes this transformation
712 // safe. If Width == 0, then the ashr and lshr instructions become poison
713 // value since the shift amount would be equal to the bit size.
714 assert(Width != 0);
715
716 // TODO: This allows folding to undef when the hardware has specific
717 // behavior?
718 if (Offset + Width < IntSize) {
719 Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
720 Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
721 : IC.Builder.CreateLShr(Shl, IntSize - Width);
722 RightShift->takeName(&II);
723 return IC.replaceInstUsesWith(II, RightShift);
724 }
725
726 Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
727 : IC.Builder.CreateLShr(Src, Offset);
728
729 RightShift->takeName(&II);
730 return IC.replaceInstUsesWith(II, RightShift);
731 }
732 case Intrinsic::amdgcn_exp:
733 case Intrinsic::amdgcn_exp_row:
734 case Intrinsic::amdgcn_exp_compr: {
735 ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
736 unsigned EnBits = En->getZExtValue();
737 if (EnBits == 0xf)
738 break; // All inputs enabled.
739
740 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
741 bool Changed = false;
742 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
743 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
744 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
745 Value *Src = II.getArgOperand(I + 2);
746 if (!isa<UndefValue>(Src)) {
747 IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
748 Changed = true;
749 }
750 }
751 }
752
753 if (Changed) {
754 return &II;
755 }
756
757 break;
758 }
759 case Intrinsic::amdgcn_fmed3: {
760 // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
761 // for the shader.
762
763 Value *Src0 = II.getArgOperand(0);
764 Value *Src1 = II.getArgOperand(1);
765 Value *Src2 = II.getArgOperand(2);
766
767 // Checking for NaN before canonicalization provides better fidelity when
768 // mapping other operations onto fmed3 since the order of operands is
769 // unchanged.
770 Value *V = nullptr;
771 if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
772 V = IC.Builder.CreateMinNum(Src1, Src2);
773 } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
774 V = IC.Builder.CreateMinNum(Src0, Src2);
775 } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
776 V = IC.Builder.CreateMaxNum(Src0, Src1);
777 }
778
779 if (V) {
780 if (auto *CI = dyn_cast<CallInst>(V)) {
781 CI->copyFastMathFlags(&II);
782 CI->takeName(&II);
783 }
784 return IC.replaceInstUsesWith(II, V);
785 }
786
787 bool Swap = false;
788 // Canonicalize constants to RHS operands.
789 //
790 // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
791 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
792 std::swap(Src0, Src1);
793 Swap = true;
794 }
795
796 if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
797 std::swap(Src1, Src2);
798 Swap = true;
799 }
800
801 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
802 std::swap(Src0, Src1);
803 Swap = true;
804 }
805
806 if (Swap) {
807 II.setArgOperand(0, Src0);
808 II.setArgOperand(1, Src1);
809 II.setArgOperand(2, Src2);
810 return &II;
811 }
812
813 if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
814 if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
815 if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
816 APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
817 C2->getValueAPF());
818 return IC.replaceInstUsesWith(
819 II, ConstantFP::get(IC.Builder.getContext(), Result));
820 }
821 }
822 }
823
824 if (!ST->hasMed3_16())
825 break;
826
827 Value *X, *Y, *Z;
828
829 // Repeat floating-point width reduction done for minnum/maxnum.
830 // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z))
831 if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) &&
832 matchFPExtFromF16(Src2, Z)) {
833 Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()},
834 {X, Y, Z}, &II, II.getName());
835 return new FPExtInst(NewCall, II.getType());
836 }
837
838 break;
839 }
840 case Intrinsic::amdgcn_icmp:
841 case Intrinsic::amdgcn_fcmp: {
842 const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
843 // Guard against invalid arguments.
844 int64_t CCVal = CC->getZExtValue();
845 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
846 if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
848 (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
850 break;
851
852 Value *Src0 = II.getArgOperand(0);
853 Value *Src1 = II.getArgOperand(1);
854
855 if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
856 if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
857 Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
858 if (CCmp->isNullValue()) {
859 return IC.replaceInstUsesWith(
860 II, IC.Builder.CreateSExt(CCmp, II.getType()));
861 }
862
863 // The result of V_ICMP/V_FCMP assembly instructions (which this
864 // intrinsic exposes) is one bit per thread, masked with the EXEC
865 // register (which contains the bitmask of live threads). So a
866 // comparison that always returns true is the same as a read of the
867 // EXEC register.
869 II.getModule(), Intrinsic::read_register, II.getType());
870 Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
871 MDNode *MD = MDNode::get(II.getContext(), MDArgs);
872 Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
873 CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
874 NewCall->addFnAttr(Attribute::Convergent);
875 NewCall->takeName(&II);
876 return IC.replaceInstUsesWith(II, NewCall);
877 }
878
879 // Canonicalize constants to RHS.
880 CmpInst::Predicate SwapPred =
882 II.setArgOperand(0, Src1);
883 II.setArgOperand(1, Src0);
884 II.setArgOperand(
885 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
886 return &II;
887 }
888
889 if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
890 break;
891
892 // Canonicalize compare eq with true value to compare != 0
893 // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
894 // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
895 // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
896 // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
897 Value *ExtSrc;
898 if (CCVal == CmpInst::ICMP_EQ &&
899 ((match(Src1, PatternMatch::m_One()) &&
900 match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
901 (match(Src1, PatternMatch::m_AllOnes()) &&
902 match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
903 ExtSrc->getType()->isIntegerTy(1)) {
905 IC.replaceOperand(II, 2,
906 ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
907 return &II;
908 }
909
910 CmpInst::Predicate SrcPred;
911 Value *SrcLHS;
912 Value *SrcRHS;
913
914 // Fold compare eq/ne with 0 from a compare result as the predicate to the
915 // intrinsic. The typical use is a wave vote function in the library, which
916 // will be fed from a user code condition compared with 0. Fold in the
917 // redundant compare.
918
919 // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
920 // -> llvm.amdgcn.[if]cmp(a, b, pred)
921 //
922 // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
923 // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
924 if (match(Src1, PatternMatch::m_Zero()) &&
926 m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
927 PatternMatch::m_Value(SrcRHS))))) {
928 if (CCVal == CmpInst::ICMP_EQ)
929 SrcPred = CmpInst::getInversePredicate(SrcPred);
930
931 Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
932 ? Intrinsic::amdgcn_fcmp
933 : Intrinsic::amdgcn_icmp;
934
935 Type *Ty = SrcLHS->getType();
936 if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
937 // Promote to next legal integer type.
938 unsigned Width = CmpType->getBitWidth();
939 unsigned NewWidth = Width;
940
941 // Don't do anything for i1 comparisons.
942 if (Width == 1)
943 break;
944
945 if (Width <= 16)
946 NewWidth = 16;
947 else if (Width <= 32)
948 NewWidth = 32;
949 else if (Width <= 64)
950 NewWidth = 64;
951 else
952 break; // Can't handle this.
953
954 if (Width != NewWidth) {
955 IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
956 if (CmpInst::isSigned(SrcPred)) {
957 SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
958 SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
959 } else {
960 SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
961 SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
962 }
963 }
964 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
965 break;
966
968 II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
969 Value *Args[] = {SrcLHS, SrcRHS,
970 ConstantInt::get(CC->getType(), SrcPred)};
971 CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
972 NewCall->takeName(&II);
973 return IC.replaceInstUsesWith(II, NewCall);
974 }
975
976 break;
977 }
978 case Intrinsic::amdgcn_mbcnt_hi: {
979 // exec_hi is all 0, so this is just a copy.
980 if (ST->isWave32())
981 return IC.replaceInstUsesWith(II, II.getArgOperand(1));
982 break;
983 }
984 case Intrinsic::amdgcn_ballot: {
985 if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
986 if (Src->isZero()) {
987 // amdgcn.ballot(i1 0) is zero.
989 }
990 }
991 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
992 // %b64 = call i64 ballot.i64(...)
993 // =>
994 // %b32 = call i32 ballot.i32(...)
995 // %b64 = zext i32 %b32 to i64
996 Value *Call = IC.Builder.CreateZExt(
997 IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
998 {IC.Builder.getInt32Ty()},
999 {II.getArgOperand(0)}),
1000 II.getType());
1001 Call->takeName(&II);
1002 return IC.replaceInstUsesWith(II, Call);
1003 }
1004 break;
1005 }
1006 case Intrinsic::amdgcn_wqm_vote: {
1007 // wqm_vote is identity when the argument is constant.
1008 if (!isa<Constant>(II.getArgOperand(0)))
1009 break;
1010
1011 return IC.replaceInstUsesWith(II, II.getArgOperand(0));
1012 }
1013 case Intrinsic::amdgcn_kill: {
1014 const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
1015 if (!C || !C->getZExtValue())
1016 break;
1017
1018 // amdgcn.kill(i1 1) is a no-op
1019 return IC.eraseInstFromFunction(II);
1020 }
1021 case Intrinsic::amdgcn_update_dpp: {
1022 Value *Old = II.getArgOperand(0);
1023
1024 auto *BC = cast<ConstantInt>(II.getArgOperand(5));
1025 auto *RM = cast<ConstantInt>(II.getArgOperand(3));
1026 auto *BM = cast<ConstantInt>(II.getArgOperand(4));
1027 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1028 BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
1029 break;
1030
1031 // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
1032 return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
1033 }
1034 case Intrinsic::amdgcn_permlane16:
1035 case Intrinsic::amdgcn_permlane16_var:
1036 case Intrinsic::amdgcn_permlanex16:
1037 case Intrinsic::amdgcn_permlanex16_var: {
1038 // Discard vdst_in if it's not going to be read.
1039 Value *VDstIn = II.getArgOperand(0);
1040 if (isa<UndefValue>(VDstIn))
1041 break;
1042
1043 // FetchInvalid operand idx.
1044 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1045 IID == Intrinsic::amdgcn_permlanex16)
1046 ? 4 /* for permlane16 and permlanex16 */
1047 : 3; /* for permlane16_var and permlanex16_var */
1048
1049 // BoundCtrl operand idx.
1050 // For permlane16 and permlanex16 it should be 5
1051 // For Permlane16_var and permlanex16_var it should be 4
1052 unsigned int BcIdx = FiIdx + 1;
1053
1054 ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(FiIdx));
1055 ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(BcIdx));
1056 if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
1057 break;
1058
1059 return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
1060 }
1061 case Intrinsic::amdgcn_permlane64:
1062 // A constant value is trivially uniform.
1063 if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
1064 return IC.replaceInstUsesWith(II, C);
1065 }
1066 break;
1067 case Intrinsic::amdgcn_readfirstlane:
1068 case Intrinsic::amdgcn_readlane: {
1069 // A constant value is trivially uniform.
1070 if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
1071 return IC.replaceInstUsesWith(II, C);
1072 }
1073
1074 // The rest of these may not be safe if the exec may not be the same between
1075 // the def and use.
1076 Value *Src = II.getArgOperand(0);
1077 Instruction *SrcInst = dyn_cast<Instruction>(Src);
1078 if (SrcInst && SrcInst->getParent() != II.getParent())
1079 break;
1080
1081 // readfirstlane (readfirstlane x) -> readfirstlane x
1082 // readlane (readfirstlane x), y -> readfirstlane x
1083 if (match(Src,
1084 PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1085 return IC.replaceInstUsesWith(II, Src);
1086 }
1087
1088 if (IID == Intrinsic::amdgcn_readfirstlane) {
1089 // readfirstlane (readlane x, y) -> readlane x, y
1090 if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1091 return IC.replaceInstUsesWith(II, Src);
1092 }
1093 } else {
1094 // readlane (readlane x, y), y -> readlane x, y
1095 if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1098 return IC.replaceInstUsesWith(II, Src);
1099 }
1100 }
1101
1102 break;
1103 }
1104 case Intrinsic::amdgcn_fmul_legacy: {
1105 Value *Op0 = II.getArgOperand(0);
1106 Value *Op1 = II.getArgOperand(1);
1107
1108 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1109 // infinity, gives +0.0.
1110 // TODO: Move to InstSimplify?
1111 if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1113 return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
1114
1115 // If we can prove we don't have one of the special cases then we can use a
1116 // normal fmul instruction instead.
1117 if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1118 auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
1119 FMul->takeName(&II);
1120 return IC.replaceInstUsesWith(II, FMul);
1121 }
1122 break;
1123 }
1124 case Intrinsic::amdgcn_fma_legacy: {
1125 Value *Op0 = II.getArgOperand(0);
1126 Value *Op1 = II.getArgOperand(1);
1127 Value *Op2 = II.getArgOperand(2);
1128
1129 // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1130 // infinity, gives +0.0.
1131 // TODO: Move to InstSimplify?
1132 if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1134 // It's tempting to just return Op2 here, but that would give the wrong
1135 // result if Op2 was -0.0.
1136 auto *Zero = ConstantFP::getZero(II.getType());
1137 auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1138 FAdd->takeName(&II);
1139 return IC.replaceInstUsesWith(II, FAdd);
1140 }
1141
1142 // If we can prove we don't have one of the special cases then we can use a
1143 // normal fma instead.
1144 if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
1146 II.getModule(), Intrinsic::fma, II.getType()));
1147 return &II;
1148 }
1149 break;
1150 }
1151 case Intrinsic::amdgcn_is_shared:
1152 case Intrinsic::amdgcn_is_private: {
1153 if (isa<UndefValue>(II.getArgOperand(0)))
1154 return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1155
1156 if (isa<ConstantPointerNull>(II.getArgOperand(0)))
1158 break;
1159 }
1160 case Intrinsic::amdgcn_buffer_store_format:
1161 case Intrinsic::amdgcn_raw_buffer_store_format:
1162 case Intrinsic::amdgcn_struct_buffer_store_format:
1163 case Intrinsic::amdgcn_raw_tbuffer_store:
1164 case Intrinsic::amdgcn_struct_tbuffer_store:
1165 case Intrinsic::amdgcn_tbuffer_store:
1166 case Intrinsic::amdgcn_image_store_1d:
1167 case Intrinsic::amdgcn_image_store_1darray:
1168 case Intrinsic::amdgcn_image_store_2d:
1169 case Intrinsic::amdgcn_image_store_2darray:
1170 case Intrinsic::amdgcn_image_store_2darraymsaa:
1171 case Intrinsic::amdgcn_image_store_2dmsaa:
1172 case Intrinsic::amdgcn_image_store_3d:
1173 case Intrinsic::amdgcn_image_store_cube:
1174 case Intrinsic::amdgcn_image_store_mip_1d:
1175 case Intrinsic::amdgcn_image_store_mip_1darray:
1176 case Intrinsic::amdgcn_image_store_mip_2d:
1177 case Intrinsic::amdgcn_image_store_mip_2darray:
1178 case Intrinsic::amdgcn_image_store_mip_3d:
1179 case Intrinsic::amdgcn_image_store_mip_cube: {
1180 if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
1181 break;
1182
1183 APInt DemandedElts;
1185 DemandedElts = defaultComponentBroadcast(II.getArgOperand(0));
1186 else if (ST->hasDefaultComponentZero())
1187 DemandedElts = trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
1188 else
1189 break;
1190
1191 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
1192 if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
1193 false)) {
1194 return IC.eraseInstFromFunction(II);
1195 }
1196
1197 break;
1198 }
1199 }
1200 if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1202 return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1203 }
1204 return std::nullopt;
1205}
1206
1207/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
1208///
1209/// The result of simplifying amdgcn image and buffer store intrinsics is updating
1210/// definitions of the intrinsics vector argument, not Uses of the result like
1211/// image and buffer loads.
1212/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
1213/// struct returns.
1215 IntrinsicInst &II,
1216 APInt DemandedElts,
1217 int DMaskIdx, bool IsLoad) {
1218
1219 auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
1220 : II.getOperand(0)->getType());
1221 unsigned VWidth = IIVTy->getNumElements();
1222 if (VWidth == 1)
1223 return nullptr;
1224 Type *EltTy = IIVTy->getElementType();
1225
1227 IC.Builder.SetInsertPoint(&II);
1228
1229 // Assume the arguments are unchanged and later override them, if needed.
1230 SmallVector<Value *, 16> Args(II.args());
1231
1232 if (DMaskIdx < 0) {
1233 // Buffer case.
1234
1235 const unsigned ActiveBits = DemandedElts.getActiveBits();
1236 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
1237
1238 // Start assuming the prefix of elements is demanded, but possibly clear
1239 // some other bits if there are trailing zeros (unused components at front)
1240 // and update offset.
1241 DemandedElts = (1 << ActiveBits) - 1;
1242
1243 if (UnusedComponentsAtFront > 0) {
1244 static const unsigned InvalidOffsetIdx = 0xf;
1245
1246 unsigned OffsetIdx;
1247 switch (II.getIntrinsicID()) {
1248 case Intrinsic::amdgcn_raw_buffer_load:
1249 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1250 OffsetIdx = 1;
1251 break;
1252 case Intrinsic::amdgcn_s_buffer_load:
1253 // If resulting type is vec3, there is no point in trimming the
1254 // load with updated offset, as the vec3 would most likely be widened to
1255 // vec4 anyway during lowering.
1256 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1257 OffsetIdx = InvalidOffsetIdx;
1258 else
1259 OffsetIdx = 1;
1260 break;
1261 case Intrinsic::amdgcn_struct_buffer_load:
1262 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1263 OffsetIdx = 2;
1264 break;
1265 default:
1266 // TODO: handle tbuffer* intrinsics.
1267 OffsetIdx = InvalidOffsetIdx;
1268 break;
1269 }
1270
1271 if (OffsetIdx != InvalidOffsetIdx) {
1272 // Clear demanded bits and update the offset.
1273 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1274 auto *Offset = Args[OffsetIdx];
1275 unsigned SingleComponentSizeInBits =
1276 IC.getDataLayout().getTypeSizeInBits(EltTy);
1277 unsigned OffsetAdd =
1278 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1279 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
1280 Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
1281 }
1282 }
1283 } else {
1284 // Image case.
1285
1286 ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1287 unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1288
1289 // dmask 0 has special semantics, do not simplify.
1290 if (DMaskVal == 0)
1291 return nullptr;
1292
1293 // Mask off values that are undefined because the dmask doesn't cover them
1294 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
1295
1296 unsigned NewDMaskVal = 0;
1297 unsigned OrigLdStIdx = 0;
1298 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1299 const unsigned Bit = 1 << SrcIdx;
1300 if (!!(DMaskVal & Bit)) {
1301 if (!!DemandedElts[OrigLdStIdx])
1302 NewDMaskVal |= Bit;
1303 OrigLdStIdx++;
1304 }
1305 }
1306
1307 if (DMaskVal != NewDMaskVal)
1308 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
1309 }
1310
1311 unsigned NewNumElts = DemandedElts.popcount();
1312 if (!NewNumElts)
1313 return PoisonValue::get(IIVTy);
1314
1315 if (NewNumElts >= VWidth && DemandedElts.isMask()) {
1316 if (DMaskIdx >= 0)
1317 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1318 return nullptr;
1319 }
1320
1321 // Validate function argument and return types, extracting overloaded types
1322 // along the way.
1323 SmallVector<Type *, 6> OverloadTys;
1325 return nullptr;
1326
1327 Type *NewTy =
1328 (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
1329 OverloadTys[0] = NewTy;
1330
1331 if (!IsLoad) {
1332 SmallVector<int, 8> EltMask;
1333 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1334 if (DemandedElts[OrigStoreIdx])
1335 EltMask.push_back(OrigStoreIdx);
1336
1337 if (NewNumElts == 1)
1338 Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
1339 else
1340 Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
1341 }
1342
1344 II.getModule(), II.getIntrinsicID(), OverloadTys);
1345 CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
1346 NewCall->takeName(&II);
1347 NewCall->copyMetadata(II);
1348
1349 if (IsLoad) {
1350 if (NewNumElts == 1) {
1351 return IC.Builder.CreateInsertElement(PoisonValue::get(IIVTy), NewCall,
1352 DemandedElts.countr_zero());
1353 }
1354
1355 SmallVector<int, 8> EltMask;
1356 unsigned NewLoadIdx = 0;
1357 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1358 if (!!DemandedElts[OrigLoadIdx])
1359 EltMask.push_back(NewLoadIdx++);
1360 else
1361 EltMask.push_back(NewNumElts);
1362 }
1363
1364 auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
1365
1366 return Shuffle;
1367 }
1368
1369 return NewCall;
1370}
1371
1373 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1374 APInt &UndefElts2, APInt &UndefElts3,
1375 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1376 SimplifyAndSetOp) const {
1377 switch (II.getIntrinsicID()) {
1378 case Intrinsic::amdgcn_buffer_load:
1379 case Intrinsic::amdgcn_buffer_load_format:
1380 case Intrinsic::amdgcn_raw_buffer_load:
1381 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1382 case Intrinsic::amdgcn_raw_buffer_load_format:
1383 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1384 case Intrinsic::amdgcn_raw_tbuffer_load:
1385 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1386 case Intrinsic::amdgcn_s_buffer_load:
1387 case Intrinsic::amdgcn_struct_buffer_load:
1388 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1389 case Intrinsic::amdgcn_struct_buffer_load_format:
1390 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1391 case Intrinsic::amdgcn_struct_tbuffer_load:
1392 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
1393 case Intrinsic::amdgcn_tbuffer_load:
1394 return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
1395 default: {
1396 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
1397 return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
1398 }
1399 break;
1400 }
1401 }
1402 return std::nullopt;
1403}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
unsigned Intr
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc)
Match an fpext from half to float, or a constant we can convert.
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(VerifyEach)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition: APFloat.h:988
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1069
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
cmpResult compare(const APFloat &RHS) const
Definition: APFloat.h:1250
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isMask(unsigned numBits) const
Definition: APInt.h:466
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1851
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:2228
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1692
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1678
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1778
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1781
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
bool isSigned() const
Definition: InstrTypes.h:1265
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
bool isFPPredicate() const
Definition: InstrTypes.h:1122
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2328
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
static Constant * getInfinity(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1083
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1037
static Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
Definition: Constants.cpp:1004
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:201
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition: Operator.h:319
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition: Operator.h:314
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
bool allowContract() const
Definition: FMF.h:70
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:844
bool hasMed3_16() const
Definition: GCNSubtarget.h:409
bool isWave32() const
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:846
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1547
Value * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Definition: IRBuilder.h:1002
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2217
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1601
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Definition: IRBuilder.h:992
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
The core instruction combiner logic.
Definition: InstCombiner.h:47
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
BuilderTy & Builder
Definition: InstCombiner.h:60
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:342
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Metadata node.
Definition: Metadata.h:1067
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
unsigned getIntegerBitWidth() const
const fltSemantics & getFltSemantics() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Definition: Function.cpp:1749
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:483
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
Definition: PatternMatch.h:719
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:830
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:547
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
Definition: PatternMatch.h:173
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
Definition: PatternMatch.h:707
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:665
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:567
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isKnownNeverInfOrNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point value can never contain a NaN or infinity.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1373
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2019 maximumNumber semantics.
Definition: APFloat.h:1410
constexpr int PoisonMaskElem
@ FMul
Product of floats.
@ FAdd
Sum of floats.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:220
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96
bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.