LLVM 22.0.0git
ConstantFolding.cpp
Go to the documentation of this file.
1//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines routines for folding instructions into constants.
10//
11// Also, to supplement the basic IR ConstantExpr simplifications,
12// this file defines some additional folding routines that can make use of
13// DataLayout information. These functions cannot go in IR due to library
14// dependency issues.
15//
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/APSInt.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/StringRef.h"
31#include "llvm/Config/config.h"
32#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/IntrinsicsAMDGPU.h"
47#include "llvm/IR/IntrinsicsARM.h"
48#include "llvm/IR/IntrinsicsNVPTX.h"
49#include "llvm/IR/IntrinsicsWebAssembly.h"
50#include "llvm/IR/IntrinsicsX86.h"
52#include "llvm/IR/Operator.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
59#include <cassert>
60#include <cerrno>
61#include <cfenv>
62#include <cmath>
63#include <cstdint>
64
65using namespace llvm;
66
68 "disable-fp-call-folding",
69 cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
70 cl::init(false), cl::Hidden);
71
72namespace {
73
74//===----------------------------------------------------------------------===//
75// Constant Folding internal helper functions
76//===----------------------------------------------------------------------===//
77
78static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
79 Constant *C, Type *SrcEltTy,
80 unsigned NumSrcElts,
81 const DataLayout &DL) {
82 // Now that we know that the input value is a vector of integers, just shift
83 // and insert them into our result.
84 unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
85 for (unsigned i = 0; i != NumSrcElts; ++i) {
86 Constant *Element;
87 if (DL.isLittleEndian())
88 Element = C->getAggregateElement(NumSrcElts - i - 1);
89 else
90 Element = C->getAggregateElement(i);
91
92 if (isa_and_nonnull<UndefValue>(Element)) {
93 Result <<= BitShift;
94 continue;
95 }
96
97 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
98 if (!ElementCI)
99 return ConstantExpr::getBitCast(C, DestTy);
100
101 Result <<= BitShift;
102 Result |= ElementCI->getValue().zext(Result.getBitWidth());
103 }
104
105 return nullptr;
106}
107
108/// Constant fold bitcast, symbolically evaluating it with DataLayout.
109/// This always returns a non-null constant, but it may be a
110/// ConstantExpr if unfoldable.
111Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
112 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
113 "Invalid constantexpr bitcast!");
114
115 // Catch the obvious splat cases.
116 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
117 return Res;
118
119 if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
120 // Handle a vector->scalar integer/fp cast.
121 if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
122 unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
123 Type *SrcEltTy = VTy->getElementType();
124
125 // If the vector is a vector of floating point, convert it to vector of int
126 // to simplify things.
127 if (SrcEltTy->isFloatingPointTy()) {
128 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
129 auto *SrcIVTy = FixedVectorType::get(
130 IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
131 // Ask IR to do the conversion now that #elts line up.
132 C = ConstantExpr::getBitCast(C, SrcIVTy);
133 }
134
135 APInt Result(DL.getTypeSizeInBits(DestTy), 0);
136 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
137 SrcEltTy, NumSrcElts, DL))
138 return CE;
139
140 if (isa<IntegerType>(DestTy))
141 return ConstantInt::get(DestTy, Result);
142
143 APFloat FP(DestTy->getFltSemantics(), Result);
144 return ConstantFP::get(DestTy->getContext(), FP);
145 }
146 }
147
148 // The code below only handles casts to vectors currently.
149 auto *DestVTy = dyn_cast<VectorType>(DestTy);
150 if (!DestVTy)
151 return ConstantExpr::getBitCast(C, DestTy);
152
153 // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
154 // vector so the code below can handle it uniformly.
155 if (!isa<VectorType>(C->getType()) &&
157 Constant *Ops = C; // don't take the address of C!
158 return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
159 }
160
161 // Some of what follows may extend to cover scalable vectors but the current
162 // implementation is fixed length specific.
163 if (!isa<FixedVectorType>(C->getType()))
164 return ConstantExpr::getBitCast(C, DestTy);
165
166 // If this is a bitcast from constant vector -> vector, fold it.
169 return ConstantExpr::getBitCast(C, DestTy);
170
171 // If the element types match, IR can fold it.
172 unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
173 unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
174 if (NumDstElt == NumSrcElt)
175 return ConstantExpr::getBitCast(C, DestTy);
176
177 Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
178 Type *DstEltTy = DestVTy->getElementType();
179
180 // Otherwise, we're changing the number of elements in a vector, which
181 // requires endianness information to do the right thing. For example,
182 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
183 // folds to (little endian):
184 // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
185 // and to (big endian):
186 // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
187
188 // First thing is first. We only want to think about integer here, so if
189 // we have something in FP form, recast it as integer.
190 if (DstEltTy->isFloatingPointTy()) {
191 // Fold to an vector of integers with same size as our FP type.
192 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
193 auto *DestIVTy = FixedVectorType::get(
194 IntegerType::get(C->getContext(), FPWidth), NumDstElt);
195 // Recursively handle this integer conversion, if possible.
196 C = FoldBitCast(C, DestIVTy, DL);
197
198 // Finally, IR can handle this now that #elts line up.
199 return ConstantExpr::getBitCast(C, DestTy);
200 }
201
202 // Okay, we know the destination is integer, if the input is FP, convert
203 // it to integer first.
204 if (SrcEltTy->isFloatingPointTy()) {
205 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
206 auto *SrcIVTy = FixedVectorType::get(
207 IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
208 // Ask IR to do the conversion now that #elts line up.
209 C = ConstantExpr::getBitCast(C, SrcIVTy);
210 assert((isa<ConstantVector>(C) || // FIXME: Remove ConstantVector.
212 "Constant folding cannot fail for plain fp->int bitcast!");
213 }
214
215 // Now we know that the input and output vectors are both integer vectors
216 // of the same size, and that their #elements is not the same. Do the
217 // conversion here, which depends on whether the input or output has
218 // more elements.
219 bool isLittleEndian = DL.isLittleEndian();
220
222 if (NumDstElt < NumSrcElt) {
223 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
225 unsigned Ratio = NumSrcElt/NumDstElt;
226 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
227 unsigned SrcElt = 0;
228 for (unsigned i = 0; i != NumDstElt; ++i) {
229 // Build each element of the result.
230 Constant *Elt = Zero;
231 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
232 for (unsigned j = 0; j != Ratio; ++j) {
233 Constant *Src = C->getAggregateElement(SrcElt++);
236 cast<VectorType>(C->getType())->getElementType());
237 else
239 if (!Src) // Reject constantexpr elements.
240 return ConstantExpr::getBitCast(C, DestTy);
241
242 // Zero extend the element to the right size.
243 Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),
244 DL);
245 assert(Src && "Constant folding cannot fail on plain integers");
246
247 // Shift it to the right place, depending on endianness.
249 Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),
250 DL);
251 assert(Src && "Constant folding cannot fail on plain integers");
252
253 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
254
255 // Mix it in.
256 Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
257 assert(Elt && "Constant folding cannot fail on plain integers");
258 }
259 Result.push_back(Elt);
260 }
261 return ConstantVector::get(Result);
262 }
263
264 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
265 unsigned Ratio = NumDstElt/NumSrcElt;
266 unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
267
268 // Loop over each source value, expanding into multiple results.
269 for (unsigned i = 0; i != NumSrcElt; ++i) {
270 auto *Element = C->getAggregateElement(i);
271
272 if (!Element) // Reject constantexpr elements.
273 return ConstantExpr::getBitCast(C, DestTy);
274
275 if (isa<UndefValue>(Element)) {
276 // Correctly Propagate undef values.
277 Result.append(Ratio, UndefValue::get(DstEltTy));
278 continue;
279 }
280
281 auto *Src = dyn_cast<ConstantInt>(Element);
282 if (!Src)
283 return ConstantExpr::getBitCast(C, DestTy);
284
285 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
286 for (unsigned j = 0; j != Ratio; ++j) {
287 // Shift the piece of the value into the right place, depending on
288 // endianness.
289 APInt Elt = Src->getValue().lshr(ShiftAmt);
290 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
291
292 // Truncate and remember this piece.
293 Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));
294 }
295 }
296
297 return ConstantVector::get(Result);
298}
299
300} // end anonymous namespace
301
302/// If this constant is a constant offset from a global, return the global and
303/// the constant. Because of constantexprs, this function is recursive.
305 APInt &Offset, const DataLayout &DL,
306 DSOLocalEquivalent **DSOEquiv) {
307 if (DSOEquiv)
308 *DSOEquiv = nullptr;
309
310 // Trivial case, constant is the global.
311 if ((GV = dyn_cast<GlobalValue>(C))) {
312 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
313 Offset = APInt(BitWidth, 0);
314 return true;
315 }
316
317 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
318 if (DSOEquiv)
319 *DSOEquiv = FoundDSOEquiv;
320 GV = FoundDSOEquiv->getGlobalValue();
321 unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
322 Offset = APInt(BitWidth, 0);
323 return true;
324 }
325
326 // Otherwise, if this isn't a constant expr, bail out.
327 auto *CE = dyn_cast<ConstantExpr>(C);
328 if (!CE) return false;
329
330 // Look through ptr->int and ptr->ptr casts.
331 if (CE->getOpcode() == Instruction::PtrToInt ||
332 CE->getOpcode() == Instruction::PtrToAddr ||
333 CE->getOpcode() == Instruction::BitCast)
334 return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
335 DSOEquiv);
336
337 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
338 auto *GEP = dyn_cast<GEPOperator>(CE);
339 if (!GEP)
340 return false;
341
342 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
343 APInt TmpOffset(BitWidth, 0);
344
345 // If the base isn't a global+constant, we aren't either.
346 if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
347 DSOEquiv))
348 return false;
349
350 // Otherwise, add any offset that our operands provide.
351 if (!GEP->accumulateConstantOffset(DL, TmpOffset))
352 return false;
353
354 Offset = TmpOffset;
355 return true;
356}
357
359 const DataLayout &DL) {
360 do {
361 Type *SrcTy = C->getType();
362 if (SrcTy == DestTy)
363 return C;
364
365 TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
366 TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
367 if (!TypeSize::isKnownGE(SrcSize, DestSize))
368 return nullptr;
369
370 // Catch the obvious splat cases (since all-zeros can coerce non-integral
371 // pointers legally).
372 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))
373 return Res;
374
375 // If the type sizes are the same and a cast is legal, just directly
376 // cast the constant.
377 // But be careful not to coerce non-integral pointers illegally.
378 if (SrcSize == DestSize &&
379 DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
380 DL.isNonIntegralPointerType(DestTy->getScalarType())) {
381 Instruction::CastOps Cast = Instruction::BitCast;
382 // If we are going from a pointer to int or vice versa, we spell the cast
383 // differently.
384 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
385 Cast = Instruction::IntToPtr;
386 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
387 Cast = Instruction::PtrToInt;
388
389 if (CastInst::castIsValid(Cast, C, DestTy))
390 return ConstantFoldCastOperand(Cast, C, DestTy, DL);
391 }
392
393 // If this isn't an aggregate type, there is nothing we can do to drill down
394 // and find a bitcastable constant.
395 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
396 return nullptr;
397
398 // We're simulating a load through a pointer that was bitcast to point to
399 // a different type, so we can try to walk down through the initial
400 // elements of an aggregate to see if some part of the aggregate is
401 // castable to implement the "load" semantic model.
402 if (SrcTy->isStructTy()) {
403 // Struct types might have leading zero-length elements like [0 x i32],
404 // which are certainly not what we are looking for, so skip them.
405 unsigned Elem = 0;
406 Constant *ElemC;
407 do {
408 ElemC = C->getAggregateElement(Elem++);
409 } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
410 C = ElemC;
411 } else {
412 // For non-byte-sized vector elements, the first element is not
413 // necessarily located at the vector base address.
414 if (auto *VT = dyn_cast<VectorType>(SrcTy))
415 if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
416 return nullptr;
417
418 C = C->getAggregateElement(0u);
419 }
420 } while (C);
421
422 return nullptr;
423}
424
425namespace {
426
427/// Recursive helper to read bits out of global. C is the constant being copied
428/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
429/// results into and BytesLeft is the number of bytes left in
430/// the CurPtr buffer. DL is the DataLayout.
431bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
432 unsigned BytesLeft, const DataLayout &DL) {
433 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
434 "Out of range access");
435
436 // Reading type padding, return zero.
437 if (ByteOffset >= DL.getTypeStoreSize(C->getType()))
438 return true;
439
440 // If this element is zero or undefined, we can just return since *CurPtr is
441 // zero initialized.
443 return true;
444
445 if (auto *CI = dyn_cast<ConstantInt>(C)) {
446 if ((CI->getBitWidth() & 7) != 0)
447 return false;
448 const APInt &Val = CI->getValue();
449 unsigned IntBytes = unsigned(CI->getBitWidth()/8);
450
451 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
452 unsigned n = ByteOffset;
453 if (!DL.isLittleEndian())
454 n = IntBytes - n - 1;
455 CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();
456 ++ByteOffset;
457 }
458 return true;
459 }
460
461 if (auto *CFP = dyn_cast<ConstantFP>(C)) {
462 if (CFP->getType()->isDoubleTy()) {
463 C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
464 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
465 }
466 if (CFP->getType()->isFloatTy()){
467 C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
468 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
469 }
470 if (CFP->getType()->isHalfTy()){
471 C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
472 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
473 }
474 return false;
475 }
476
477 if (auto *CS = dyn_cast<ConstantStruct>(C)) {
478 const StructLayout *SL = DL.getStructLayout(CS->getType());
479 unsigned Index = SL->getElementContainingOffset(ByteOffset);
480 uint64_t CurEltOffset = SL->getElementOffset(Index);
481 ByteOffset -= CurEltOffset;
482
483 while (true) {
484 // If the element access is to the element itself and not to tail padding,
485 // read the bytes from the element.
486 uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
487
488 if (ByteOffset < EltSize &&
489 !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
490 BytesLeft, DL))
491 return false;
492
493 ++Index;
494
495 // Check to see if we read from the last struct element, if so we're done.
496 if (Index == CS->getType()->getNumElements())
497 return true;
498
499 // If we read all of the bytes we needed from this element we're done.
500 uint64_t NextEltOffset = SL->getElementOffset(Index);
501
502 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
503 return true;
504
505 // Move to the next element of the struct.
506 CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
507 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
508 ByteOffset = 0;
509 CurEltOffset = NextEltOffset;
510 }
511 // not reached.
512 }
513
516 uint64_t NumElts, EltSize;
517 Type *EltTy;
518 if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
519 NumElts = AT->getNumElements();
520 EltTy = AT->getElementType();
521 EltSize = DL.getTypeAllocSize(EltTy);
522 } else {
523 NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
524 EltTy = cast<FixedVectorType>(C->getType())->getElementType();
525 // TODO: For non-byte-sized vectors, current implementation assumes there is
526 // padding to the next byte boundary between elements.
527 if (!DL.typeSizeEqualsStoreSize(EltTy))
528 return false;
529
530 EltSize = DL.getTypeStoreSize(EltTy);
531 }
532 uint64_t Index = ByteOffset / EltSize;
533 uint64_t Offset = ByteOffset - Index * EltSize;
534
535 for (; Index != NumElts; ++Index) {
536 if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
537 BytesLeft, DL))
538 return false;
539
540 uint64_t BytesWritten = EltSize - Offset;
541 assert(BytesWritten <= EltSize && "Not indexing into this element?");
542 if (BytesWritten >= BytesLeft)
543 return true;
544
545 Offset = 0;
546 BytesLeft -= BytesWritten;
547 CurPtr += BytesWritten;
548 }
549 return true;
550 }
551
552 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
553 if (CE->getOpcode() == Instruction::IntToPtr &&
554 CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
555 return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
556 BytesLeft, DL);
557 }
558 }
559
560 // Otherwise, unknown initializer type.
561 return false;
562}
563
564Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
565 int64_t Offset, const DataLayout &DL) {
566 // Bail out early. Not expect to load from scalable global variable.
567 if (isa<ScalableVectorType>(LoadTy))
568 return nullptr;
569
570 auto *IntType = dyn_cast<IntegerType>(LoadTy);
571
572 // If this isn't an integer load we can't fold it directly.
573 if (!IntType) {
574 // If this is a non-integer load, we can try folding it as an int load and
575 // then bitcast the result. This can be useful for union cases. Note
576 // that address spaces don't matter here since we're not going to result in
577 // an actual new load.
578 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
579 !LoadTy->isVectorTy())
580 return nullptr;
581
582 Type *MapTy = Type::getIntNTy(C->getContext(),
583 DL.getTypeSizeInBits(LoadTy).getFixedValue());
584 if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
585 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
586 // Materializing a zero can be done trivially without a bitcast
587 return Constant::getNullValue(LoadTy);
588 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
589 Res = FoldBitCast(Res, CastTy, DL);
590 if (LoadTy->isPtrOrPtrVectorTy()) {
591 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
592 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
593 return Constant::getNullValue(LoadTy);
594 if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
595 // Be careful not to replace a load of an addrspace value with an inttoptr here
596 return nullptr;
597 Res = ConstantExpr::getIntToPtr(Res, LoadTy);
598 }
599 return Res;
600 }
601 return nullptr;
602 }
603
604 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
605 if (BytesLoaded > 32 || BytesLoaded == 0)
606 return nullptr;
607
608 // If we're not accessing anything in this constant, the result is undefined.
609 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
610 return PoisonValue::get(IntType);
611
612 // TODO: We should be able to support scalable types.
613 TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
614 if (InitializerSize.isScalable())
615 return nullptr;
616
617 // If we're not accessing anything in this constant, the result is undefined.
618 if (Offset >= (int64_t)InitializerSize.getFixedValue())
619 return PoisonValue::get(IntType);
620
621 unsigned char RawBytes[32] = {0};
622 unsigned char *CurPtr = RawBytes;
623 unsigned BytesLeft = BytesLoaded;
624
625 // If we're loading off the beginning of the global, some bytes may be valid.
626 if (Offset < 0) {
627 CurPtr += -Offset;
628 BytesLeft += Offset;
629 Offset = 0;
630 }
631
632 if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
633 return nullptr;
634
635 APInt ResultVal = APInt(IntType->getBitWidth(), 0);
636 if (DL.isLittleEndian()) {
637 ResultVal = RawBytes[BytesLoaded - 1];
638 for (unsigned i = 1; i != BytesLoaded; ++i) {
639 ResultVal <<= 8;
640 ResultVal |= RawBytes[BytesLoaded - 1 - i];
641 }
642 } else {
643 ResultVal = RawBytes[0];
644 for (unsigned i = 1; i != BytesLoaded; ++i) {
645 ResultVal <<= 8;
646 ResultVal |= RawBytes[i];
647 }
648 }
649
650 return ConstantInt::get(IntType->getContext(), ResultVal);
651}
652
653} // anonymous namespace
654
655// If GV is a constant with an initializer read its representation starting
656// at Offset and return it as a constant array of unsigned char. Otherwise
657// return null.
660 if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
661 return nullptr;
662
663 const DataLayout &DL = GV->getDataLayout();
664 Constant *Init = const_cast<Constant *>(GV->getInitializer());
665 TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
666 if (InitSize < Offset)
667 return nullptr;
668
669 uint64_t NBytes = InitSize - Offset;
670 if (NBytes > UINT16_MAX)
671 // Bail for large initializers in excess of 64K to avoid allocating
672 // too much memory.
673 // Offset is assumed to be less than or equal than InitSize (this
674 // is enforced in ReadDataFromGlobal).
675 return nullptr;
676
677 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
678 unsigned char *CurPtr = RawBytes.data();
679
680 if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
681 return nullptr;
682
683 return ConstantDataArray::get(GV->getContext(), RawBytes);
684}
685
686/// If this Offset points exactly to the start of an aggregate element, return
687/// that element, otherwise return nullptr.
689 const DataLayout &DL) {
690 if (Offset.isZero())
691 return Base;
692
694 return nullptr;
695
696 Type *ElemTy = Base->getType();
697 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
698 if (!Offset.isZero() || !Indices[0].isZero())
699 return nullptr;
700
701 Constant *C = Base;
702 for (const APInt &Index : drop_begin(Indices)) {
703 if (Index.isNegative() || Index.getActiveBits() >= 32)
704 return nullptr;
705
706 C = C->getAggregateElement(Index.getZExtValue());
707 if (!C)
708 return nullptr;
709 }
710
711 return C;
712}
713
715 const APInt &Offset,
716 const DataLayout &DL) {
717 if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
718 if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
719 return Result;
720
721 // Explicitly check for out-of-bounds access, so we return poison even if the
722 // constant is a uniform value.
723 TypeSize Size = DL.getTypeAllocSize(C->getType());
724 if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))
725 return PoisonValue::get(Ty);
726
727 // Try an offset-independent fold of a uniform value.
728 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))
729 return Result;
730
731 // Try hard to fold loads from bitcasted strange and non-type-safe things.
732 if (Offset.getSignificantBits() <= 64)
733 if (Constant *Result =
734 FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
735 return Result;
736
737 return nullptr;
738}
739
744
747 const DataLayout &DL) {
748 // We can only fold loads from constant globals with a definitive initializer.
749 // Check this upfront, to skip expensive offset calculations.
751 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
752 return nullptr;
753
754 C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
755 DL, Offset, /* AllowNonInbounds */ true));
756
757 if (C == GV)
758 if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
759 Offset, DL))
760 return Result;
761
762 // If this load comes from anywhere in a uniform constant global, the value
763 // is always the same, regardless of the loaded offset.
764 return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL);
765}
766
768 const DataLayout &DL) {
769 APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
770 return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL);
771}
772
774 const DataLayout &DL) {
775 if (isa<PoisonValue>(C))
776 return PoisonValue::get(Ty);
777 if (isa<UndefValue>(C))
778 return UndefValue::get(Ty);
779 // If padding is needed when storing C to memory, then it isn't considered as
780 // uniform.
781 if (!DL.typeSizeEqualsStoreSize(C->getType()))
782 return nullptr;
783 if (C->isNullValue() && !Ty->isX86_AMXTy())
784 return Constant::getNullValue(Ty);
785 if (C->isAllOnesValue() &&
786 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
787 return Constant::getAllOnesValue(Ty);
788 return nullptr;
789}
790
791namespace {
792
793/// One of Op0/Op1 is a constant expression.
794/// Attempt to symbolically evaluate the result of a binary operator merging
795/// these together. If target data info is available, it is provided as DL,
796/// otherwise DL is null.
797Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
798 const DataLayout &DL) {
799 // SROA
800
801 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
802 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
803 // bits.
804
805 if (Opc == Instruction::And) {
806 KnownBits Known0 = computeKnownBits(Op0, DL);
807 KnownBits Known1 = computeKnownBits(Op1, DL);
808 if ((Known1.One | Known0.Zero).isAllOnes()) {
809 // All the bits of Op0 that the 'and' could be masking are already zero.
810 return Op0;
811 }
812 if ((Known0.One | Known1.Zero).isAllOnes()) {
813 // All the bits of Op1 that the 'and' could be masking are already zero.
814 return Op1;
815 }
816
817 Known0 &= Known1;
818 if (Known0.isConstant())
819 return ConstantInt::get(Op0->getType(), Known0.getConstant());
820 }
821
822 // If the constant expr is something like &A[123] - &A[4].f, fold this into a
823 // constant. This happens frequently when iterating over a global array.
824 if (Opc == Instruction::Sub) {
825 GlobalValue *GV1, *GV2;
826 APInt Offs1, Offs2;
827
828 if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
829 if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
830 unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
831
832 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
833 // PtrToInt may change the bitwidth so we have convert to the right size
834 // first.
835 return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
836 Offs2.zextOrTrunc(OpSize));
837 }
838 }
839
840 return nullptr;
841}
842
843/// If array indices are not pointer-sized integers, explicitly cast them so
844/// that they aren't implicitly casted by the getelementptr.
845Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
846 Type *ResultTy, GEPNoWrapFlags NW,
847 std::optional<ConstantRange> InRange,
848 const DataLayout &DL, const TargetLibraryInfo *TLI) {
849 Type *IntIdxTy = DL.getIndexType(ResultTy);
850 Type *IntIdxScalarTy = IntIdxTy->getScalarType();
851
852 bool Any = false;
854 for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
855 if ((i == 1 ||
857 SrcElemTy, Ops.slice(1, i - 1)))) &&
858 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
859 Any = true;
860 Type *NewType =
861 Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;
863 CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,
864 DL);
865 if (!NewIdx)
866 return nullptr;
867 NewIdxs.push_back(NewIdx);
868 } else
869 NewIdxs.push_back(Ops[i]);
870 }
871
872 if (!Any)
873 return nullptr;
874
875 Constant *C =
876 ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange);
877 return ConstantFoldConstant(C, DL, TLI);
878}
879
880/// If we can symbolically evaluate the GEP constant expression, do so.
881Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
883 const DataLayout &DL,
884 const TargetLibraryInfo *TLI) {
885 Type *SrcElemTy = GEP->getSourceElementType();
886 Type *ResTy = GEP->getType();
887 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
888 return nullptr;
889
890 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(),
891 GEP->getInRange(), DL, TLI))
892 return C;
893
894 Constant *Ptr = Ops[0];
895 if (!Ptr->getType()->isPointerTy())
896 return nullptr;
897
898 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
899
900 for (unsigned i = 1, e = Ops.size(); i != e; ++i)
901 if (!isa<ConstantInt>(Ops[i]) || !Ops[i]->getType()->isIntegerTy())
902 return nullptr;
903
904 unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
906 BitWidth,
907 DL.getIndexedOffsetInType(
908 SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)),
909 /*isSigned=*/true, /*implicitTrunc=*/true);
910
911 std::optional<ConstantRange> InRange = GEP->getInRange();
912 if (InRange)
913 InRange = InRange->sextOrTrunc(BitWidth);
914
915 // If this is a GEP of a GEP, fold it all into a single GEP.
916 GEPNoWrapFlags NW = GEP->getNoWrapFlags();
917 bool Overflow = false;
918 while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
919 NW &= GEP->getNoWrapFlags();
920
921 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
922
923 // Do not try the incorporate the sub-GEP if some index is not a number.
924 bool AllConstantInt = true;
925 for (Value *NestedOp : NestedOps)
926 if (!isa<ConstantInt>(NestedOp)) {
927 AllConstantInt = false;
928 break;
929 }
930 if (!AllConstantInt)
931 break;
932
933 // Adjust inrange offset and intersect inrange attributes
934 if (auto GEPRange = GEP->getInRange()) {
935 auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
936 InRange =
937 InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
938 }
939
940 Ptr = cast<Constant>(GEP->getOperand(0));
941 SrcElemTy = GEP->getSourceElementType();
942 Offset = Offset.sadd_ov(
943 APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps),
944 /*isSigned=*/true, /*implicitTrunc=*/true),
945 Overflow);
946 }
947
948 // Preserving nusw (without inbounds) also requires that the offset
949 // additions did not overflow.
950 if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)
952
953 // If the base value for this address is a literal integer value, fold the
954 // getelementptr to the resulting integer value casted to the pointer type.
955 APInt BaseIntVal(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
956 if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
957 if (CE->getOpcode() == Instruction::IntToPtr) {
958 if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
959 BaseIntVal = Base->getValue().zextOrTrunc(BaseIntVal.getBitWidth());
960 }
961 }
962
963 if ((Ptr->isNullValue() || BaseIntVal != 0) &&
964 !DL.mustNotIntroduceIntToPtr(Ptr->getType())) {
965
966 // If the index size is smaller than the pointer size, add to the low
967 // bits only.
968 BaseIntVal.insertBits(BaseIntVal.trunc(BitWidth) + Offset, 0);
969 Constant *C = ConstantInt::get(Ptr->getContext(), BaseIntVal);
970 return ConstantExpr::getIntToPtr(C, ResTy);
971 }
972
973 // Try to infer inbounds for GEPs of globals.
974 if (!NW.isInBounds() && Offset.isNonNegative()) {
975 bool CanBeNull, CanBeFreed;
976 uint64_t DerefBytes =
977 Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
978 if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes))
980 }
981
982 // nusw + nneg -> nuw
983 if (NW.hasNoUnsignedSignedWrap() && Offset.isNonNegative())
985
986 // Otherwise canonicalize this to a single ptradd.
987 LLVMContext &Ctx = Ptr->getContext();
989 ConstantInt::get(Ctx, Offset), NW,
990 InRange);
991}
992
993/// Attempt to constant fold an instruction with the
994/// specified opcode and operands. If successful, the constant result is
995/// returned, if not, null is returned. Note that this function can fail when
996/// attempting to fold instructions like loads and stores, which have no
997/// constant expression form.
998Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
1000 const DataLayout &DL,
1001 const TargetLibraryInfo *TLI,
1002 bool AllowNonDeterministic) {
1003 Type *DestTy = InstOrCE->getType();
1004
1005 if (Instruction::isUnaryOp(Opcode))
1006 return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
1007
1008 if (Instruction::isBinaryOp(Opcode)) {
1009 switch (Opcode) {
1010 default:
1011 break;
1012 case Instruction::FAdd:
1013 case Instruction::FSub:
1014 case Instruction::FMul:
1015 case Instruction::FDiv:
1016 case Instruction::FRem:
1017 // Handle floating point instructions separately to account for denormals
1018 // TODO: If a constant expression is being folded rather than an
1019 // instruction, denormals will not be flushed/treated as zero
1020 if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
1021 return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,
1022 AllowNonDeterministic);
1023 }
1024 }
1025 return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
1026 }
1027
1028 if (Instruction::isCast(Opcode))
1029 return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
1030
1031 if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
1032 Type *SrcElemTy = GEP->getSourceElementType();
1034 return nullptr;
1035
1036 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
1037 return C;
1038
1039 return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),
1040 GEP->getNoWrapFlags(),
1041 GEP->getInRange());
1042 }
1043
1044 if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
1045 return CE->getWithOperands(Ops);
1046
1047 switch (Opcode) {
1048 default: return nullptr;
1049 case Instruction::ICmp:
1050 case Instruction::FCmp: {
1051 auto *C = cast<CmpInst>(InstOrCE);
1052 return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
1053 DL, TLI, C);
1054 }
1055 case Instruction::Freeze:
1056 return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
1057 case Instruction::Call:
1058 if (auto *F = dyn_cast<Function>(Ops.back())) {
1059 const auto *Call = cast<CallBase>(InstOrCE);
1061 return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,
1062 AllowNonDeterministic);
1063 }
1064 return nullptr;
1065 case Instruction::Select:
1066 return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);
1067 case Instruction::ExtractElement:
1069 case Instruction::ExtractValue:
1071 Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
1072 case Instruction::InsertElement:
1073 return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
1074 case Instruction::InsertValue:
1076 Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
1077 case Instruction::ShuffleVector:
1079 Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
1080 case Instruction::Load: {
1081 const auto *LI = dyn_cast<LoadInst>(InstOrCE);
1082 if (LI->isVolatile())
1083 return nullptr;
1084 return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
1085 }
1086 }
1087}
1088
1089} // end anonymous namespace
1090
1091//===----------------------------------------------------------------------===//
1092// Constant Folding public APIs
1093//===----------------------------------------------------------------------===//
1094
1095namespace {
1096
1097Constant *
1098ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
1099 const TargetLibraryInfo *TLI,
1102 return const_cast<Constant *>(C);
1103
1105 for (const Use &OldU : C->operands()) {
1106 Constant *OldC = cast<Constant>(&OldU);
1107 Constant *NewC = OldC;
1108 // Recursively fold the ConstantExpr's operands. If we have already folded
1109 // a ConstantExpr, we don't have to process it again.
1110 if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
1111 auto It = FoldedOps.find(OldC);
1112 if (It == FoldedOps.end()) {
1113 NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
1114 FoldedOps.insert({OldC, NewC});
1115 } else {
1116 NewC = It->second;
1117 }
1118 }
1119 Ops.push_back(NewC);
1120 }
1121
1122 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1123 if (Constant *Res = ConstantFoldInstOperandsImpl(
1124 CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
1125 return Res;
1126 return const_cast<Constant *>(C);
1127 }
1128
1130 return ConstantVector::get(Ops);
1131}
1132
1133} // end anonymous namespace
1134
1136 const DataLayout &DL,
1137 const TargetLibraryInfo *TLI) {
1138 // Handle PHI nodes quickly here...
1139 if (auto *PN = dyn_cast<PHINode>(I)) {
1140 Constant *CommonValue = nullptr;
1141
1143 for (Value *Incoming : PN->incoming_values()) {
1144 // If the incoming value is undef then skip it. Note that while we could
1145 // skip the value if it is equal to the phi node itself we choose not to
1146 // because that would break the rule that constant folding only applies if
1147 // all operands are constants.
1149 continue;
1150 // If the incoming value is not a constant, then give up.
1151 auto *C = dyn_cast<Constant>(Incoming);
1152 if (!C)
1153 return nullptr;
1154 // Fold the PHI's operands.
1155 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1156 // If the incoming value is a different constant to
1157 // the one we saw previously, then give up.
1158 if (CommonValue && C != CommonValue)
1159 return nullptr;
1160 CommonValue = C;
1161 }
1162
1163 // If we reach here, all incoming values are the same constant or undef.
1164 return CommonValue ? CommonValue : UndefValue::get(PN->getType());
1165 }
1166
1167 // Scan the operand list, checking to see if they are all constants, if so,
1168 // hand off to ConstantFoldInstOperandsImpl.
1169 if (!all_of(I->operands(), [](const Use &U) { return isa<Constant>(U); }))
1170 return nullptr;
1171
1174 for (const Use &OpU : I->operands()) {
1175 auto *Op = cast<Constant>(&OpU);
1176 // Fold the Instruction's operands.
1177 Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
1178 Ops.push_back(Op);
1179 }
1180
1181 return ConstantFoldInstOperands(I, Ops, DL, TLI);
1182}
1183
1185 const TargetLibraryInfo *TLI) {
1187 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1188}
1189
1192 const DataLayout &DL,
1193 const TargetLibraryInfo *TLI,
1194 bool AllowNonDeterministic) {
1195 return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,
1196 AllowNonDeterministic);
1197}
1198
1200 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
1201 const TargetLibraryInfo *TLI, const Instruction *I) {
1202 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
1203 // fold: icmp (inttoptr x), null -> icmp x, 0
1204 // fold: icmp null, (inttoptr x) -> icmp 0, x
1205 // fold: icmp (ptrtoint x), 0 -> icmp x, null
1206 // fold: icmp 0, (ptrtoint x) -> icmp null, x
1207 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
1208 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
1209 //
1210 // FIXME: The following comment is out of data and the DataLayout is here now.
1211 // ConstantExpr::getCompare cannot do this, because it doesn't have DL
1212 // around to know if bit truncation is happening.
1213 if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
1214 if (Ops1->isNullValue()) {
1215 if (CE0->getOpcode() == Instruction::IntToPtr) {
1216 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1217 // Convert the integer value to the right size to ensure we get the
1218 // proper extension or truncation.
1219 if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1220 /*IsSigned*/ false, DL)) {
1221 Constant *Null = Constant::getNullValue(C->getType());
1222 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1223 }
1224 }
1225
1226 // Only do this transformation if the int is intptrty in size, otherwise
1227 // there is a truncation or extension that we aren't modeling.
1228 if (CE0->getOpcode() == Instruction::PtrToInt) {
1229 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1230 if (CE0->getType() == IntPtrTy) {
1231 Constant *C = CE0->getOperand(0);
1232 Constant *Null = Constant::getNullValue(C->getType());
1233 return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
1234 }
1235 }
1236 }
1237
1238 if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
1239 if (CE0->getOpcode() == CE1->getOpcode()) {
1240 if (CE0->getOpcode() == Instruction::IntToPtr) {
1241 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1242
1243 // Convert the integer value to the right size to ensure we get the
1244 // proper extension or truncation.
1245 Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,
1246 /*IsSigned*/ false, DL);
1247 Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,
1248 /*IsSigned*/ false, DL);
1249 if (C0 && C1)
1250 return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
1251 }
1252
1253 // Only do this transformation if the int is intptrty in size, otherwise
1254 // there is a truncation or extension that we aren't modeling.
1255 if (CE0->getOpcode() == Instruction::PtrToInt) {
1256 Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
1257 if (CE0->getType() == IntPtrTy &&
1258 CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
1260 Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
1261 }
1262 }
1263 }
1264 }
1265
1266 // Convert pointer comparison (base+offset1) pred (base+offset2) into
1267 // offset1 pred offset2, for the case where the offset is inbounds. This
1268 // only works for equality and unsigned comparison, as inbounds permits
1269 // crossing the sign boundary. However, the offset comparison itself is
1270 // signed.
1271 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
1272 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
1273 APInt Offset0(IndexWidth, 0);
1274 bool IsEqPred = ICmpInst::isEquality(Predicate);
1275 Value *Stripped0 = Ops0->stripAndAccumulateConstantOffsets(
1276 DL, Offset0, /*AllowNonInbounds=*/IsEqPred,
1277 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1278 /*LookThroughIntToPtr=*/IsEqPred);
1279 APInt Offset1(IndexWidth, 0);
1280 Value *Stripped1 = Ops1->stripAndAccumulateConstantOffsets(
1281 DL, Offset1, /*AllowNonInbounds=*/IsEqPred,
1282 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1283 /*LookThroughIntToPtr=*/IsEqPred);
1284 if (Stripped0 == Stripped1)
1285 return ConstantInt::getBool(
1286 Ops0->getContext(),
1287 ICmpInst::compare(Offset0, Offset1,
1288 ICmpInst::getSignedPredicate(Predicate)));
1289 }
1290 } else if (isa<ConstantExpr>(Ops1)) {
1291 // If RHS is a constant expression, but the left side isn't, swap the
1292 // operands and try again.
1293 Predicate = ICmpInst::getSwappedPredicate(Predicate);
1294 return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
1295 }
1296
1297 if (CmpInst::isFPPredicate(Predicate)) {
1298 // Flush any denormal constant float input according to denormal handling
1299 // mode.
1300 Ops0 = FlushFPConstant(Ops0, I, /*IsOutput=*/false);
1301 if (!Ops0)
1302 return nullptr;
1303 Ops1 = FlushFPConstant(Ops1, I, /*IsOutput=*/false);
1304 if (!Ops1)
1305 return nullptr;
1306 }
1307
1308 return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);
1309}
1310
1312 const DataLayout &DL) {
1314
1315 return ConstantFoldUnaryInstruction(Opcode, Op);
1316}
1317
1319 Constant *RHS,
1320 const DataLayout &DL) {
1322 if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
1323 if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
1324 return C;
1325
1327 return ConstantExpr::get(Opcode, LHS, RHS);
1328 return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
1329}
1330
1333 switch (Mode) {
1335 return nullptr;
1336 case DenormalMode::IEEE:
1337 return ConstantFP::get(Ty->getContext(), APF);
1339 return ConstantFP::get(
1340 Ty->getContext(),
1343 return ConstantFP::get(Ty->getContext(),
1344 APFloat::getZero(APF.getSemantics(), false));
1345 default:
1346 break;
1347 }
1348
1349 llvm_unreachable("unknown denormal mode");
1350}
1351
1352/// Return the denormal mode that can be assumed when executing a floating point
1353/// operation at \p CtxI.
1355 if (!CtxI || !CtxI->getParent() || !CtxI->getFunction())
1356 return DenormalMode::getDynamic();
1357 return CtxI->getFunction()->getDenormalMode(Ty->getFltSemantics());
1358}
1359
1361 const Instruction *Inst,
1362 bool IsOutput) {
1363 const APFloat &APF = CFP->getValueAPF();
1364 if (!APF.isDenormal())
1365 return CFP;
1366
1368 return flushDenormalConstant(CFP->getType(), APF,
1369 IsOutput ? Mode.Output : Mode.Input);
1370}
1371
1373 bool IsOutput) {
1374 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand))
1375 return flushDenormalConstantFP(CFP, Inst, IsOutput);
1376
1378 return Operand;
1379
1380 Type *Ty = Operand->getType();
1381 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1382 if (VecTy) {
1383 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Operand->getSplatValue())) {
1384 ConstantFP *Folded = flushDenormalConstantFP(Splat, Inst, IsOutput);
1385 if (!Folded)
1386 return nullptr;
1387 return ConstantVector::getSplat(VecTy->getElementCount(), Folded);
1388 }
1389
1390 Ty = VecTy->getElementType();
1391 }
1392
1393 if (isa<ConstantExpr>(Operand))
1394 return Operand;
1395
1396 if (const auto *CV = dyn_cast<ConstantVector>(Operand)) {
1398 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1399 Constant *Element = CV->getAggregateElement(i);
1400 if (isa<UndefValue>(Element)) {
1401 NewElts.push_back(Element);
1402 continue;
1403 }
1404
1405 ConstantFP *CFP = dyn_cast<ConstantFP>(Element);
1406 if (!CFP)
1407 return nullptr;
1408
1409 ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput);
1410 if (!Folded)
1411 return nullptr;
1412 NewElts.push_back(Folded);
1413 }
1414
1415 return ConstantVector::get(NewElts);
1416 }
1417
1418 if (const auto *CDV = dyn_cast<ConstantDataVector>(Operand)) {
1420 for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) {
1421 const APFloat &Elt = CDV->getElementAsAPFloat(I);
1422 if (!Elt.isDenormal()) {
1423 NewElts.push_back(ConstantFP::get(Ty, Elt));
1424 } else {
1425 DenormalMode Mode = getInstrDenormalMode(Inst, Ty);
1426 ConstantFP *Folded =
1427 flushDenormalConstant(Ty, Elt, IsOutput ? Mode.Output : Mode.Input);
1428 if (!Folded)
1429 return nullptr;
1430 NewElts.push_back(Folded);
1431 }
1432 }
1433
1434 return ConstantVector::get(NewElts);
1435 }
1436
1437 return nullptr;
1438}
1439
1441 Constant *RHS, const DataLayout &DL,
1442 const Instruction *I,
1443 bool AllowNonDeterministic) {
1444 if (Instruction::isBinaryOp(Opcode)) {
1445 // Flush denormal inputs if needed.
1446 Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
1447 if (!Op0)
1448 return nullptr;
1449 Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
1450 if (!Op1)
1451 return nullptr;
1452
1453 // If nsz or an algebraic FMF flag is set, the result of the FP operation
1454 // may change due to future optimization. Don't constant fold them if
1455 // non-deterministic results are not allowed.
1456 if (!AllowNonDeterministic)
1458 if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
1459 FP->hasAllowContract() || FP->hasAllowReciprocal())
1460 return nullptr;
1461
1462 // Calculate constant result.
1463 Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
1464 if (!C)
1465 return nullptr;
1466
1467 // Flush denormal output if needed.
1468 C = FlushFPConstant(C, I, /* IsOutput */ true);
1469 if (!C)
1470 return nullptr;
1471
1472 // The precise NaN value is non-deterministic.
1473 if (!AllowNonDeterministic && C->isNaN())
1474 return nullptr;
1475
1476 return C;
1477 }
1478 // If instruction lacks a parent/function and the denormal mode cannot be
1479 // determined, use the default (IEEE).
1480 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
1481}
1482
1484 Type *DestTy, const DataLayout &DL) {
1485 assert(Instruction::isCast(Opcode));
1486
1487 if (auto *CE = dyn_cast<ConstantExpr>(C))
1488 if (CE->isCast())
1489 if (unsigned NewOp = CastInst::isEliminableCastPair(
1490 Instruction::CastOps(CE->getOpcode()),
1491 Instruction::CastOps(Opcode), CE->getOperand(0)->getType(),
1492 C->getType(), DestTy, &DL))
1493 return ConstantFoldCastOperand(NewOp, CE->getOperand(0), DestTy, DL);
1494
1495 switch (Opcode) {
1496 default:
1497 llvm_unreachable("Missing case");
1498 case Instruction::PtrToAddr:
1499 case Instruction::PtrToInt:
1500 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1501 Constant *FoldedValue = nullptr;
1502 // If the input is an inttoptr, eliminate the pair. This requires knowing
1503 // the width of a pointer, so it can't be done in ConstantExpr::getCast.
1504 if (CE->getOpcode() == Instruction::IntToPtr) {
1505 // zext/trunc the inttoptr to pointer/address size.
1506 Type *MidTy = Opcode == Instruction::PtrToInt
1507 ? DL.getAddressType(CE->getType())
1508 : DL.getIntPtrType(CE->getType());
1509 FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0), MidTy,
1510 /*IsSigned=*/false, DL);
1511 } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
1512 // If we have GEP, we can perform the following folds:
1513 // (ptrtoint/ptrtoaddr (gep null, x)) -> x
1514 // (ptrtoint/ptrtoaddr (gep (gep null, x), y) -> x + y, etc.
1515 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
1516 APInt BaseOffset(BitWidth, 0);
1517 auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
1518 DL, BaseOffset, /*AllowNonInbounds=*/true));
1519 if (Base->isNullValue()) {
1520 FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
1521 } else {
1522 // ptrtoint/ptrtoaddr (gep i8, Ptr, (sub 0, V))
1523 // -> sub (ptrtoint/ptrtoaddr Ptr), V
1524 if (GEP->getNumIndices() == 1 &&
1525 GEP->getSourceElementType()->isIntegerTy(8)) {
1526 auto *Ptr = cast<Constant>(GEP->getPointerOperand());
1527 auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
1528 Type *IntIdxTy = DL.getIndexType(Ptr->getType());
1529 if (Sub && Sub->getType() == IntIdxTy &&
1530 Sub->getOpcode() == Instruction::Sub &&
1531 Sub->getOperand(0)->isNullValue())
1532 FoldedValue = ConstantExpr::getSub(
1533 ConstantExpr::getCast(Opcode, Ptr, IntIdxTy),
1534 Sub->getOperand(1));
1535 }
1536 }
1537 }
1538 if (FoldedValue) {
1539 // Do a zext or trunc to get to the ptrtoint/ptrtoaddr dest size.
1540 return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,
1541 DL);
1542 }
1543 }
1544 break;
1545 case Instruction::IntToPtr:
1546 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
1547 // the int size is >= the ptr size and the address spaces are the same.
1548 // This requires knowing the width of a pointer, so it can't be done in
1549 // ConstantExpr::getCast.
1550 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
1551 if (CE->getOpcode() == Instruction::PtrToInt) {
1552 Constant *SrcPtr = CE->getOperand(0);
1553 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
1554 unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
1555
1556 if (MidIntSize >= SrcPtrSize) {
1557 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
1558 if (SrcAS == DestTy->getPointerAddressSpace())
1559 return FoldBitCast(CE->getOperand(0), DestTy, DL);
1560 }
1561 }
1562 }
1563 break;
1564 case Instruction::Trunc:
1565 case Instruction::ZExt:
1566 case Instruction::SExt:
1567 case Instruction::FPTrunc:
1568 case Instruction::FPExt:
1569 case Instruction::UIToFP:
1570 case Instruction::SIToFP:
1571 case Instruction::FPToUI:
1572 case Instruction::FPToSI:
1573 case Instruction::AddrSpaceCast:
1574 break;
1575 case Instruction::BitCast:
1576 return FoldBitCast(C, DestTy, DL);
1577 }
1578
1580 return ConstantExpr::getCast(Opcode, C, DestTy);
1581 return ConstantFoldCastInstruction(Opcode, C, DestTy);
1582}
1583
1585 bool IsSigned, const DataLayout &DL) {
1586 Type *SrcTy = C->getType();
1587 if (SrcTy == DestTy)
1588 return C;
1589 if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
1590 return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);
1591 if (IsSigned)
1592 return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);
1593 return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);
1594}
1595
1596//===----------------------------------------------------------------------===//
1597// Constant Folding for Calls
1598//
1599
1601 if (Call->isNoBuiltin())
1602 return false;
1603 if (Call->getFunctionType() != F->getFunctionType())
1604 return false;
1605
1606 // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
1607 // This can be useful for GPU targets or in cross-compilation scenarios
1608 // when the exact target FP behaviour is required, and the host compiler's
1609 // behaviour may be slightly different from the device's run-time behaviour.
1610 if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
1611 any_of(F->args(), [](const Argument &Arg) {
1612 return Arg.getType()->isFloatingPointTy();
1613 })))
1614 return false;
1615
1616 switch (F->getIntrinsicID()) {
1617 // Operations that do not operate floating-point numbers and do not depend on
1618 // FP environment can be folded even in strictfp functions.
1619 case Intrinsic::bswap:
1620 case Intrinsic::ctpop:
1621 case Intrinsic::ctlz:
1622 case Intrinsic::cttz:
1623 case Intrinsic::fshl:
1624 case Intrinsic::fshr:
1625 case Intrinsic::launder_invariant_group:
1626 case Intrinsic::strip_invariant_group:
1627 case Intrinsic::masked_load:
1628 case Intrinsic::get_active_lane_mask:
1629 case Intrinsic::abs:
1630 case Intrinsic::smax:
1631 case Intrinsic::smin:
1632 case Intrinsic::umax:
1633 case Intrinsic::umin:
1634 case Intrinsic::scmp:
1635 case Intrinsic::ucmp:
1636 case Intrinsic::sadd_with_overflow:
1637 case Intrinsic::uadd_with_overflow:
1638 case Intrinsic::ssub_with_overflow:
1639 case Intrinsic::usub_with_overflow:
1640 case Intrinsic::smul_with_overflow:
1641 case Intrinsic::umul_with_overflow:
1642 case Intrinsic::sadd_sat:
1643 case Intrinsic::uadd_sat:
1644 case Intrinsic::ssub_sat:
1645 case Intrinsic::usub_sat:
1646 case Intrinsic::smul_fix:
1647 case Intrinsic::smul_fix_sat:
1648 case Intrinsic::bitreverse:
1649 case Intrinsic::is_constant:
1650 case Intrinsic::vector_reduce_add:
1651 case Intrinsic::vector_reduce_mul:
1652 case Intrinsic::vector_reduce_and:
1653 case Intrinsic::vector_reduce_or:
1654 case Intrinsic::vector_reduce_xor:
1655 case Intrinsic::vector_reduce_smin:
1656 case Intrinsic::vector_reduce_smax:
1657 case Intrinsic::vector_reduce_umin:
1658 case Intrinsic::vector_reduce_umax:
1659 case Intrinsic::vector_extract:
1660 case Intrinsic::vector_insert:
1661 case Intrinsic::vector_interleave2:
1662 case Intrinsic::vector_deinterleave2:
1663 // Target intrinsics
1664 case Intrinsic::amdgcn_perm:
1665 case Intrinsic::amdgcn_wave_reduce_umin:
1666 case Intrinsic::amdgcn_wave_reduce_umax:
1667 case Intrinsic::amdgcn_wave_reduce_max:
1668 case Intrinsic::amdgcn_wave_reduce_min:
1669 case Intrinsic::amdgcn_wave_reduce_add:
1670 case Intrinsic::amdgcn_wave_reduce_sub:
1671 case Intrinsic::amdgcn_wave_reduce_and:
1672 case Intrinsic::amdgcn_wave_reduce_or:
1673 case Intrinsic::amdgcn_wave_reduce_xor:
1674 case Intrinsic::amdgcn_s_wqm:
1675 case Intrinsic::amdgcn_s_quadmask:
1676 case Intrinsic::amdgcn_s_bitreplicate:
1677 case Intrinsic::arm_mve_vctp8:
1678 case Intrinsic::arm_mve_vctp16:
1679 case Intrinsic::arm_mve_vctp32:
1680 case Intrinsic::arm_mve_vctp64:
1681 case Intrinsic::aarch64_sve_convert_from_svbool:
1682 case Intrinsic::wasm_alltrue:
1683 case Intrinsic::wasm_anytrue:
1684 case Intrinsic::wasm_dot:
1685 // WebAssembly float semantics are always known
1686 case Intrinsic::wasm_trunc_signed:
1687 case Intrinsic::wasm_trunc_unsigned:
1688 return true;
1689
1690 // Floating point operations cannot be folded in strictfp functions in
1691 // general case. They can be folded if FP environment is known to compiler.
1692 case Intrinsic::minnum:
1693 case Intrinsic::maxnum:
1694 case Intrinsic::minimum:
1695 case Intrinsic::maximum:
1696 case Intrinsic::minimumnum:
1697 case Intrinsic::maximumnum:
1698 case Intrinsic::log:
1699 case Intrinsic::log2:
1700 case Intrinsic::log10:
1701 case Intrinsic::exp:
1702 case Intrinsic::exp2:
1703 case Intrinsic::exp10:
1704 case Intrinsic::sqrt:
1705 case Intrinsic::sin:
1706 case Intrinsic::cos:
1707 case Intrinsic::sincos:
1708 case Intrinsic::sinh:
1709 case Intrinsic::cosh:
1710 case Intrinsic::atan:
1711 case Intrinsic::pow:
1712 case Intrinsic::powi:
1713 case Intrinsic::ldexp:
1714 case Intrinsic::fma:
1715 case Intrinsic::fmuladd:
1716 case Intrinsic::frexp:
1717 case Intrinsic::fptoui_sat:
1718 case Intrinsic::fptosi_sat:
1719 case Intrinsic::convert_from_fp16:
1720 case Intrinsic::convert_to_fp16:
1721 case Intrinsic::amdgcn_cos:
1722 case Intrinsic::amdgcn_cubeid:
1723 case Intrinsic::amdgcn_cubema:
1724 case Intrinsic::amdgcn_cubesc:
1725 case Intrinsic::amdgcn_cubetc:
1726 case Intrinsic::amdgcn_fmul_legacy:
1727 case Intrinsic::amdgcn_fma_legacy:
1728 case Intrinsic::amdgcn_fract:
1729 case Intrinsic::amdgcn_sin:
1730 // The intrinsics below depend on rounding mode in MXCSR.
1731 case Intrinsic::x86_sse_cvtss2si:
1732 case Intrinsic::x86_sse_cvtss2si64:
1733 case Intrinsic::x86_sse_cvttss2si:
1734 case Intrinsic::x86_sse_cvttss2si64:
1735 case Intrinsic::x86_sse2_cvtsd2si:
1736 case Intrinsic::x86_sse2_cvtsd2si64:
1737 case Intrinsic::x86_sse2_cvttsd2si:
1738 case Intrinsic::x86_sse2_cvttsd2si64:
1739 case Intrinsic::x86_avx512_vcvtss2si32:
1740 case Intrinsic::x86_avx512_vcvtss2si64:
1741 case Intrinsic::x86_avx512_cvttss2si:
1742 case Intrinsic::x86_avx512_cvttss2si64:
1743 case Intrinsic::x86_avx512_vcvtsd2si32:
1744 case Intrinsic::x86_avx512_vcvtsd2si64:
1745 case Intrinsic::x86_avx512_cvttsd2si:
1746 case Intrinsic::x86_avx512_cvttsd2si64:
1747 case Intrinsic::x86_avx512_vcvtss2usi32:
1748 case Intrinsic::x86_avx512_vcvtss2usi64:
1749 case Intrinsic::x86_avx512_cvttss2usi:
1750 case Intrinsic::x86_avx512_cvttss2usi64:
1751 case Intrinsic::x86_avx512_vcvtsd2usi32:
1752 case Intrinsic::x86_avx512_vcvtsd2usi64:
1753 case Intrinsic::x86_avx512_cvttsd2usi:
1754 case Intrinsic::x86_avx512_cvttsd2usi64:
1755
1756 // NVVM FMax intrinsics
1757 case Intrinsic::nvvm_fmax_d:
1758 case Intrinsic::nvvm_fmax_f:
1759 case Intrinsic::nvvm_fmax_ftz_f:
1760 case Intrinsic::nvvm_fmax_ftz_nan_f:
1761 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
1762 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
1763 case Intrinsic::nvvm_fmax_nan_f:
1764 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
1765 case Intrinsic::nvvm_fmax_xorsign_abs_f:
1766
1767 // NVVM FMin intrinsics
1768 case Intrinsic::nvvm_fmin_d:
1769 case Intrinsic::nvvm_fmin_f:
1770 case Intrinsic::nvvm_fmin_ftz_f:
1771 case Intrinsic::nvvm_fmin_ftz_nan_f:
1772 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
1773 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
1774 case Intrinsic::nvvm_fmin_nan_f:
1775 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
1776 case Intrinsic::nvvm_fmin_xorsign_abs_f:
1777
1778 // NVVM float/double to int32/uint32 conversion intrinsics
1779 case Intrinsic::nvvm_f2i_rm:
1780 case Intrinsic::nvvm_f2i_rn:
1781 case Intrinsic::nvvm_f2i_rp:
1782 case Intrinsic::nvvm_f2i_rz:
1783 case Intrinsic::nvvm_f2i_rm_ftz:
1784 case Intrinsic::nvvm_f2i_rn_ftz:
1785 case Intrinsic::nvvm_f2i_rp_ftz:
1786 case Intrinsic::nvvm_f2i_rz_ftz:
1787 case Intrinsic::nvvm_f2ui_rm:
1788 case Intrinsic::nvvm_f2ui_rn:
1789 case Intrinsic::nvvm_f2ui_rp:
1790 case Intrinsic::nvvm_f2ui_rz:
1791 case Intrinsic::nvvm_f2ui_rm_ftz:
1792 case Intrinsic::nvvm_f2ui_rn_ftz:
1793 case Intrinsic::nvvm_f2ui_rp_ftz:
1794 case Intrinsic::nvvm_f2ui_rz_ftz:
1795 case Intrinsic::nvvm_d2i_rm:
1796 case Intrinsic::nvvm_d2i_rn:
1797 case Intrinsic::nvvm_d2i_rp:
1798 case Intrinsic::nvvm_d2i_rz:
1799 case Intrinsic::nvvm_d2ui_rm:
1800 case Intrinsic::nvvm_d2ui_rn:
1801 case Intrinsic::nvvm_d2ui_rp:
1802 case Intrinsic::nvvm_d2ui_rz:
1803
1804 // NVVM float/double to int64/uint64 conversion intrinsics
1805 case Intrinsic::nvvm_f2ll_rm:
1806 case Intrinsic::nvvm_f2ll_rn:
1807 case Intrinsic::nvvm_f2ll_rp:
1808 case Intrinsic::nvvm_f2ll_rz:
1809 case Intrinsic::nvvm_f2ll_rm_ftz:
1810 case Intrinsic::nvvm_f2ll_rn_ftz:
1811 case Intrinsic::nvvm_f2ll_rp_ftz:
1812 case Intrinsic::nvvm_f2ll_rz_ftz:
1813 case Intrinsic::nvvm_f2ull_rm:
1814 case Intrinsic::nvvm_f2ull_rn:
1815 case Intrinsic::nvvm_f2ull_rp:
1816 case Intrinsic::nvvm_f2ull_rz:
1817 case Intrinsic::nvvm_f2ull_rm_ftz:
1818 case Intrinsic::nvvm_f2ull_rn_ftz:
1819 case Intrinsic::nvvm_f2ull_rp_ftz:
1820 case Intrinsic::nvvm_f2ull_rz_ftz:
1821 case Intrinsic::nvvm_d2ll_rm:
1822 case Intrinsic::nvvm_d2ll_rn:
1823 case Intrinsic::nvvm_d2ll_rp:
1824 case Intrinsic::nvvm_d2ll_rz:
1825 case Intrinsic::nvvm_d2ull_rm:
1826 case Intrinsic::nvvm_d2ull_rn:
1827 case Intrinsic::nvvm_d2ull_rp:
1828 case Intrinsic::nvvm_d2ull_rz:
1829
1830 // NVVM math intrinsics:
1831 case Intrinsic::nvvm_ceil_d:
1832 case Intrinsic::nvvm_ceil_f:
1833 case Intrinsic::nvvm_ceil_ftz_f:
1834
1835 case Intrinsic::nvvm_fabs:
1836 case Intrinsic::nvvm_fabs_ftz:
1837
1838 case Intrinsic::nvvm_floor_d:
1839 case Intrinsic::nvvm_floor_f:
1840 case Intrinsic::nvvm_floor_ftz_f:
1841
1842 case Intrinsic::nvvm_rcp_rm_d:
1843 case Intrinsic::nvvm_rcp_rm_f:
1844 case Intrinsic::nvvm_rcp_rm_ftz_f:
1845 case Intrinsic::nvvm_rcp_rn_d:
1846 case Intrinsic::nvvm_rcp_rn_f:
1847 case Intrinsic::nvvm_rcp_rn_ftz_f:
1848 case Intrinsic::nvvm_rcp_rp_d:
1849 case Intrinsic::nvvm_rcp_rp_f:
1850 case Intrinsic::nvvm_rcp_rp_ftz_f:
1851 case Intrinsic::nvvm_rcp_rz_d:
1852 case Intrinsic::nvvm_rcp_rz_f:
1853 case Intrinsic::nvvm_rcp_rz_ftz_f:
1854
1855 case Intrinsic::nvvm_round_d:
1856 case Intrinsic::nvvm_round_f:
1857 case Intrinsic::nvvm_round_ftz_f:
1858
1859 case Intrinsic::nvvm_saturate_d:
1860 case Intrinsic::nvvm_saturate_f:
1861 case Intrinsic::nvvm_saturate_ftz_f:
1862
1863 case Intrinsic::nvvm_sqrt_f:
1864 case Intrinsic::nvvm_sqrt_rn_d:
1865 case Intrinsic::nvvm_sqrt_rn_f:
1866 case Intrinsic::nvvm_sqrt_rn_ftz_f:
1867 return !Call->isStrictFP();
1868
1869 // NVVM add intrinsics with explicit rounding modes
1870 case Intrinsic::nvvm_add_rm_d:
1871 case Intrinsic::nvvm_add_rn_d:
1872 case Intrinsic::nvvm_add_rp_d:
1873 case Intrinsic::nvvm_add_rz_d:
1874 case Intrinsic::nvvm_add_rm_f:
1875 case Intrinsic::nvvm_add_rn_f:
1876 case Intrinsic::nvvm_add_rp_f:
1877 case Intrinsic::nvvm_add_rz_f:
1878 case Intrinsic::nvvm_add_rm_ftz_f:
1879 case Intrinsic::nvvm_add_rn_ftz_f:
1880 case Intrinsic::nvvm_add_rp_ftz_f:
1881 case Intrinsic::nvvm_add_rz_ftz_f:
1882
1883 // NVVM div intrinsics with explicit rounding modes
1884 case Intrinsic::nvvm_div_rm_d:
1885 case Intrinsic::nvvm_div_rn_d:
1886 case Intrinsic::nvvm_div_rp_d:
1887 case Intrinsic::nvvm_div_rz_d:
1888 case Intrinsic::nvvm_div_rm_f:
1889 case Intrinsic::nvvm_div_rn_f:
1890 case Intrinsic::nvvm_div_rp_f:
1891 case Intrinsic::nvvm_div_rz_f:
1892 case Intrinsic::nvvm_div_rm_ftz_f:
1893 case Intrinsic::nvvm_div_rn_ftz_f:
1894 case Intrinsic::nvvm_div_rp_ftz_f:
1895 case Intrinsic::nvvm_div_rz_ftz_f:
1896
1897 // NVVM mul intrinsics with explicit rounding modes
1898 case Intrinsic::nvvm_mul_rm_d:
1899 case Intrinsic::nvvm_mul_rn_d:
1900 case Intrinsic::nvvm_mul_rp_d:
1901 case Intrinsic::nvvm_mul_rz_d:
1902 case Intrinsic::nvvm_mul_rm_f:
1903 case Intrinsic::nvvm_mul_rn_f:
1904 case Intrinsic::nvvm_mul_rp_f:
1905 case Intrinsic::nvvm_mul_rz_f:
1906 case Intrinsic::nvvm_mul_rm_ftz_f:
1907 case Intrinsic::nvvm_mul_rn_ftz_f:
1908 case Intrinsic::nvvm_mul_rp_ftz_f:
1909 case Intrinsic::nvvm_mul_rz_ftz_f:
1910
1911 // NVVM fma intrinsics with explicit rounding modes
1912 case Intrinsic::nvvm_fma_rm_d:
1913 case Intrinsic::nvvm_fma_rn_d:
1914 case Intrinsic::nvvm_fma_rp_d:
1915 case Intrinsic::nvvm_fma_rz_d:
1916 case Intrinsic::nvvm_fma_rm_f:
1917 case Intrinsic::nvvm_fma_rn_f:
1918 case Intrinsic::nvvm_fma_rp_f:
1919 case Intrinsic::nvvm_fma_rz_f:
1920 case Intrinsic::nvvm_fma_rm_ftz_f:
1921 case Intrinsic::nvvm_fma_rn_ftz_f:
1922 case Intrinsic::nvvm_fma_rp_ftz_f:
1923 case Intrinsic::nvvm_fma_rz_ftz_f:
1924
1925 // Sign operations are actually bitwise operations, they do not raise
1926 // exceptions even for SNANs.
1927 case Intrinsic::fabs:
1928 case Intrinsic::copysign:
1929 case Intrinsic::is_fpclass:
1930 // Non-constrained variants of rounding operations means default FP
1931 // environment, they can be folded in any case.
1932 case Intrinsic::ceil:
1933 case Intrinsic::floor:
1934 case Intrinsic::round:
1935 case Intrinsic::roundeven:
1936 case Intrinsic::trunc:
1937 case Intrinsic::nearbyint:
1938 case Intrinsic::rint:
1939 case Intrinsic::canonicalize:
1940
1941 // Constrained intrinsics can be folded if FP environment is known
1942 // to compiler.
1943 case Intrinsic::experimental_constrained_fma:
1944 case Intrinsic::experimental_constrained_fmuladd:
1945 case Intrinsic::experimental_constrained_fadd:
1946 case Intrinsic::experimental_constrained_fsub:
1947 case Intrinsic::experimental_constrained_fmul:
1948 case Intrinsic::experimental_constrained_fdiv:
1949 case Intrinsic::experimental_constrained_frem:
1950 case Intrinsic::experimental_constrained_ceil:
1951 case Intrinsic::experimental_constrained_floor:
1952 case Intrinsic::experimental_constrained_round:
1953 case Intrinsic::experimental_constrained_roundeven:
1954 case Intrinsic::experimental_constrained_trunc:
1955 case Intrinsic::experimental_constrained_nearbyint:
1956 case Intrinsic::experimental_constrained_rint:
1957 case Intrinsic::experimental_constrained_fcmp:
1958 case Intrinsic::experimental_constrained_fcmps:
1959 return true;
1960 default:
1961 return false;
1962 case Intrinsic::not_intrinsic: break;
1963 }
1964
1965 if (!F->hasName() || Call->isStrictFP())
1966 return false;
1967
1968 // In these cases, the check of the length is required. We don't want to
1969 // return true for a name like "cos\0blah" which strcmp would return equal to
1970 // "cos", but has length 8.
1971 StringRef Name = F->getName();
1972 switch (Name[0]) {
1973 default:
1974 return false;
1975 case 'a':
1976 return Name == "acos" || Name == "acosf" ||
1977 Name == "asin" || Name == "asinf" ||
1978 Name == "atan" || Name == "atanf" ||
1979 Name == "atan2" || Name == "atan2f";
1980 case 'c':
1981 return Name == "ceil" || Name == "ceilf" ||
1982 Name == "cos" || Name == "cosf" ||
1983 Name == "cosh" || Name == "coshf";
1984 case 'e':
1985 return Name == "exp" || Name == "expf" || Name == "exp2" ||
1986 Name == "exp2f" || Name == "erf" || Name == "erff";
1987 case 'f':
1988 return Name == "fabs" || Name == "fabsf" ||
1989 Name == "floor" || Name == "floorf" ||
1990 Name == "fmod" || Name == "fmodf";
1991 case 'i':
1992 return Name == "ilogb" || Name == "ilogbf";
1993 case 'l':
1994 return Name == "log" || Name == "logf" || Name == "logl" ||
1995 Name == "log2" || Name == "log2f" || Name == "log10" ||
1996 Name == "log10f" || Name == "logb" || Name == "logbf" ||
1997 Name == "log1p" || Name == "log1pf";
1998 case 'n':
1999 return Name == "nearbyint" || Name == "nearbyintf";
2000 case 'p':
2001 return Name == "pow" || Name == "powf";
2002 case 'r':
2003 return Name == "remainder" || Name == "remainderf" ||
2004 Name == "rint" || Name == "rintf" ||
2005 Name == "round" || Name == "roundf";
2006 case 's':
2007 return Name == "sin" || Name == "sinf" ||
2008 Name == "sinh" || Name == "sinhf" ||
2009 Name == "sqrt" || Name == "sqrtf";
2010 case 't':
2011 return Name == "tan" || Name == "tanf" ||
2012 Name == "tanh" || Name == "tanhf" ||
2013 Name == "trunc" || Name == "truncf";
2014 case '_':
2015 // Check for various function names that get used for the math functions
2016 // when the header files are preprocessed with the macro
2017 // __FINITE_MATH_ONLY__ enabled.
2018 // The '12' here is the length of the shortest name that can match.
2019 // We need to check the size before looking at Name[1] and Name[2]
2020 // so we may as well check a limit that will eliminate mismatches.
2021 if (Name.size() < 12 || Name[1] != '_')
2022 return false;
2023 switch (Name[2]) {
2024 default:
2025 return false;
2026 case 'a':
2027 return Name == "__acos_finite" || Name == "__acosf_finite" ||
2028 Name == "__asin_finite" || Name == "__asinf_finite" ||
2029 Name == "__atan2_finite" || Name == "__atan2f_finite";
2030 case 'c':
2031 return Name == "__cosh_finite" || Name == "__coshf_finite";
2032 case 'e':
2033 return Name == "__exp_finite" || Name == "__expf_finite" ||
2034 Name == "__exp2_finite" || Name == "__exp2f_finite";
2035 case 'l':
2036 return Name == "__log_finite" || Name == "__logf_finite" ||
2037 Name == "__log10_finite" || Name == "__log10f_finite";
2038 case 'p':
2039 return Name == "__pow_finite" || Name == "__powf_finite";
2040 case 's':
2041 return Name == "__sinh_finite" || Name == "__sinhf_finite";
2042 }
2043 }
2044}
2045
2046namespace {
2047
2048Constant *GetConstantFoldFPValue(double V, Type *Ty) {
2049 if (Ty->isHalfTy() || Ty->isFloatTy()) {
2050 APFloat APF(V);
2051 bool unused;
2052 APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
2053 return ConstantFP::get(Ty->getContext(), APF);
2054 }
2055 if (Ty->isDoubleTy())
2056 return ConstantFP::get(Ty->getContext(), APFloat(V));
2057 llvm_unreachable("Can only constant fold half/float/double");
2058}
2059
2060#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2061Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {
2062 if (Ty->isFP128Ty())
2063 return ConstantFP::get(Ty, V);
2064 llvm_unreachable("Can only constant fold fp128");
2065}
2066#endif
2067
2068/// Clear the floating-point exception state.
2069inline void llvm_fenv_clearexcept() {
2070#if HAVE_DECL_FE_ALL_EXCEPT
2071 feclearexcept(FE_ALL_EXCEPT);
2072#endif
2073 errno = 0;
2074}
2075
2076/// Test if a floating-point exception was raised.
2077inline bool llvm_fenv_testexcept() {
2078 int errno_val = errno;
2079 if (errno_val == ERANGE || errno_val == EDOM)
2080 return true;
2081#if HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
2082 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
2083 return true;
2084#endif
2085 return false;
2086}
2087
2088static APFloat FTZPreserveSign(const APFloat &V) {
2089 if (V.isDenormal())
2090 return APFloat::getZero(V.getSemantics(), V.isNegative());
2091 return V;
2092}
2093
2094static APFloat FlushToPositiveZero(const APFloat &V) {
2095 if (V.isDenormal())
2096 return APFloat::getZero(V.getSemantics(), false);
2097 return V;
2098}
2099
2100static APFloat FlushWithDenormKind(const APFloat &V,
2101 DenormalMode::DenormalModeKind DenormKind) {
2104 switch (DenormKind) {
2106 return V;
2108 return FTZPreserveSign(V);
2110 return FlushToPositiveZero(V);
2111 default:
2112 llvm_unreachable("Invalid denormal mode!");
2113 }
2114}
2115
2116Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
2117 DenormalMode DenormMode = DenormalMode::getIEEE()) {
2118 if (!DenormMode.isValid() ||
2119 DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
2120 DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
2121 return nullptr;
2122
2123 llvm_fenv_clearexcept();
2124 auto Input = FlushWithDenormKind(V, DenormMode.Input);
2125 double Result = NativeFP(Input.convertToDouble());
2126 if (llvm_fenv_testexcept()) {
2127 llvm_fenv_clearexcept();
2128 return nullptr;
2129 }
2130
2131 Constant *Output = GetConstantFoldFPValue(Result, Ty);
2132 if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
2133 return Output;
2134 const auto *CFP = static_cast<ConstantFP *>(Output);
2135 const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
2136 return ConstantFP::get(Ty->getContext(), Res);
2137}
2138
2139#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2140Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
2141 Type *Ty) {
2142 llvm_fenv_clearexcept();
2143 float128 Result = NativeFP(V.convertToQuad());
2144 if (llvm_fenv_testexcept()) {
2145 llvm_fenv_clearexcept();
2146 return nullptr;
2147 }
2148
2149 return GetConstantFoldFPValue128(Result, Ty);
2150}
2151#endif
2152
2153Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
2154 const APFloat &V, const APFloat &W, Type *Ty) {
2155 llvm_fenv_clearexcept();
2156 double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
2157 if (llvm_fenv_testexcept()) {
2158 llvm_fenv_clearexcept();
2159 return nullptr;
2160 }
2161
2162 return GetConstantFoldFPValue(Result, Ty);
2163}
2164
2165Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
2167 if (!VT)
2168 return nullptr;
2169
2170 // This isn't strictly necessary, but handle the special/common case of zero:
2171 // all integer reductions of a zero input produce zero.
2173 return ConstantInt::get(VT->getElementType(), 0);
2174
2175 // This is the same as the underlying binops - poison propagates.
2176 if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
2177 return PoisonValue::get(VT->getElementType());
2178
2179 // TODO: Handle undef.
2181 return nullptr;
2182
2183 auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
2184 if (!EltC)
2185 return nullptr;
2186
2187 APInt Acc = EltC->getValue();
2188 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
2189 if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
2190 return nullptr;
2191 const APInt &X = EltC->getValue();
2192 switch (IID) {
2193 case Intrinsic::vector_reduce_add:
2194 Acc = Acc + X;
2195 break;
2196 case Intrinsic::vector_reduce_mul:
2197 Acc = Acc * X;
2198 break;
2199 case Intrinsic::vector_reduce_and:
2200 Acc = Acc & X;
2201 break;
2202 case Intrinsic::vector_reduce_or:
2203 Acc = Acc | X;
2204 break;
2205 case Intrinsic::vector_reduce_xor:
2206 Acc = Acc ^ X;
2207 break;
2208 case Intrinsic::vector_reduce_smin:
2209 Acc = APIntOps::smin(Acc, X);
2210 break;
2211 case Intrinsic::vector_reduce_smax:
2212 Acc = APIntOps::smax(Acc, X);
2213 break;
2214 case Intrinsic::vector_reduce_umin:
2215 Acc = APIntOps::umin(Acc, X);
2216 break;
2217 case Intrinsic::vector_reduce_umax:
2218 Acc = APIntOps::umax(Acc, X);
2219 break;
2220 }
2221 }
2222
2223 return ConstantInt::get(Op->getContext(), Acc);
2224}
2225
2226/// Attempt to fold an SSE floating point to integer conversion of a constant
2227/// floating point. If roundTowardZero is false, the default IEEE rounding is
2228/// used (toward nearest, ties to even). This matches the behavior of the
2229/// non-truncating SSE instructions in the default rounding mode. The desired
2230/// integer type Ty is used to select how many bits are available for the
2231/// result. Returns null if the conversion cannot be performed, otherwise
2232/// returns the Constant value resulting from the conversion.
2233Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
2234 Type *Ty, bool IsSigned) {
2235 // All of these conversion intrinsics form an integer of at most 64bits.
2236 unsigned ResultWidth = Ty->getIntegerBitWidth();
2237 assert(ResultWidth <= 64 &&
2238 "Can only constant fold conversions to 64 and 32 bit ints");
2239
2240 uint64_t UIntVal;
2241 bool isExact = false;
2245 Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,
2246 IsSigned, mode, &isExact);
2247 if (status != APFloat::opOK &&
2248 (!roundTowardZero || status != APFloat::opInexact))
2249 return nullptr;
2250 return ConstantInt::get(Ty, UIntVal, IsSigned);
2251}
2252
2253double getValueAsDouble(ConstantFP *Op) {
2254 Type *Ty = Op->getType();
2255
2256 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
2257 return Op->getValueAPF().convertToDouble();
2258
2259 bool unused;
2260 APFloat APF = Op->getValueAPF();
2262 return APF.convertToDouble();
2263}
2264
2265static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
2266 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
2267 C = &CI->getValue();
2268 return true;
2269 }
2270 if (isa<UndefValue>(Op)) {
2271 C = nullptr;
2272 return true;
2273 }
2274 return false;
2275}
2276
2277/// Checks if the given intrinsic call, which evaluates to constant, is allowed
2278/// to be folded.
2279///
2280/// \param CI Constrained intrinsic call.
2281/// \param St Exception flags raised during constant evaluation.
2282static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
2283 APFloat::opStatus St) {
2284 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2285 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2286
2287 // If the operation does not change exception status flags, it is safe
2288 // to fold.
2289 if (St == APFloat::opStatus::opOK)
2290 return true;
2291
2292 // If evaluation raised FP exception, the result can depend on rounding
2293 // mode. If the latter is unknown, folding is not possible.
2294 if (ORM == RoundingMode::Dynamic)
2295 return false;
2296
2297 // If FP exceptions are ignored, fold the call, even if such exception is
2298 // raised.
2299 if (EB && *EB != fp::ExceptionBehavior::ebStrict)
2300 return true;
2301
2302 // Leave the calculation for runtime so that exception flags be correctly set
2303 // in hardware.
2304 return false;
2305}
2306
2307/// Returns the rounding mode that should be used for constant evaluation.
2308static RoundingMode
2309getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
2310 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2311 if (!ORM || *ORM == RoundingMode::Dynamic)
2312 // Even if the rounding mode is unknown, try evaluating the operation.
2313 // If it does not raise inexact exception, rounding was not applied,
2314 // so the result is exact and does not depend on rounding mode. Whether
2315 // other FP exceptions are raised, it does not depend on rounding mode.
2317 return *ORM;
2318}
2319
2320/// Try to constant fold llvm.canonicalize for the given caller and value.
2321static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
2322 const APFloat &Src) {
2323 // Zero, positive and negative, is always OK to fold.
2324 if (Src.isZero()) {
2325 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros.
2326 return ConstantFP::get(
2327 CI->getContext(),
2328 APFloat::getZero(Src.getSemantics(), Src.isNegative()));
2329 }
2330
2331 if (!Ty->isIEEELikeFPTy())
2332 return nullptr;
2333
2334 // Zero is always canonical and the sign must be preserved.
2335 //
2336 // Denorms and nans may have special encodings, but it should be OK to fold a
2337 // totally average number.
2338 if (Src.isNormal() || Src.isInfinity())
2339 return ConstantFP::get(CI->getContext(), Src);
2340
2341 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
2342 DenormalMode DenormMode =
2343 CI->getFunction()->getDenormalMode(Src.getSemantics());
2344
2345 if (DenormMode == DenormalMode::getIEEE())
2346 return ConstantFP::get(CI->getContext(), Src);
2347
2348 if (DenormMode.Input == DenormalMode::Dynamic)
2349 return nullptr;
2350
2351 // If we know if either input or output is flushed, we can fold.
2352 if ((DenormMode.Input == DenormalMode::Dynamic &&
2353 DenormMode.Output == DenormalMode::IEEE) ||
2354 (DenormMode.Input == DenormalMode::IEEE &&
2355 DenormMode.Output == DenormalMode::Dynamic))
2356 return nullptr;
2357
2358 bool IsPositive =
2359 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
2360 (DenormMode.Output == DenormalMode::PositiveZero &&
2361 DenormMode.Input == DenormalMode::IEEE));
2362
2363 return ConstantFP::get(CI->getContext(),
2364 APFloat::getZero(Src.getSemantics(), !IsPositive));
2365 }
2366
2367 return nullptr;
2368}
2369
2370static Constant *ConstantFoldScalarCall1(StringRef Name,
2371 Intrinsic::ID IntrinsicID,
2372 Type *Ty,
2374 const TargetLibraryInfo *TLI,
2375 const CallBase *Call) {
2376 assert(Operands.size() == 1 && "Wrong number of operands.");
2377
2378 if (IntrinsicID == Intrinsic::is_constant) {
2379 // We know we have a "Constant" argument. But we want to only
2380 // return true for manifest constants, not those that depend on
2381 // constants with unknowable values, e.g. GlobalValue or BlockAddress.
2382 if (Operands[0]->isManifestConstant())
2383 return ConstantInt::getTrue(Ty->getContext());
2384 return nullptr;
2385 }
2386
2387 if (isa<UndefValue>(Operands[0])) {
2388 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
2389 // ctpop() is between 0 and bitwidth, pick 0 for undef.
2390 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
2391 if (IntrinsicID == Intrinsic::cos ||
2392 IntrinsicID == Intrinsic::ctpop ||
2393 IntrinsicID == Intrinsic::fptoui_sat ||
2394 IntrinsicID == Intrinsic::fptosi_sat ||
2395 IntrinsicID == Intrinsic::canonicalize)
2396 return Constant::getNullValue(Ty);
2397 if (IntrinsicID == Intrinsic::bswap ||
2398 IntrinsicID == Intrinsic::bitreverse ||
2399 IntrinsicID == Intrinsic::launder_invariant_group ||
2400 IntrinsicID == Intrinsic::strip_invariant_group)
2401 return Operands[0];
2402 }
2403
2405 // launder(null) == null == strip(null) iff in addrspace 0
2406 if (IntrinsicID == Intrinsic::launder_invariant_group ||
2407 IntrinsicID == Intrinsic::strip_invariant_group) {
2408 // If instruction is not yet put in a basic block (e.g. when cloning
2409 // a function during inlining), Call's caller may not be available.
2410 // So check Call's BB first before querying Call->getCaller.
2411 const Function *Caller =
2412 Call->getParent() ? Call->getCaller() : nullptr;
2413 if (Caller &&
2415 Caller, Operands[0]->getType()->getPointerAddressSpace())) {
2416 return Operands[0];
2417 }
2418 return nullptr;
2419 }
2420 }
2421
2422 if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
2423 if (IntrinsicID == Intrinsic::convert_to_fp16) {
2424 APFloat Val(Op->getValueAPF());
2425
2426 bool lost = false;
2428
2429 return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
2430 }
2431
2432 APFloat U = Op->getValueAPF();
2433
2434 if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
2435 IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
2436 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
2437
2438 if (U.isNaN())
2439 return nullptr;
2440
2441 unsigned Width = Ty->getIntegerBitWidth();
2442 APSInt Int(Width, !Signed);
2443 bool IsExact = false;
2445 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2446
2448 return ConstantInt::get(Ty, Int);
2449
2450 return nullptr;
2451 }
2452
2453 if (IntrinsicID == Intrinsic::fptoui_sat ||
2454 IntrinsicID == Intrinsic::fptosi_sat) {
2455 // convertToInteger() already has the desired saturation semantics.
2456 APSInt Int(Ty->getIntegerBitWidth(),
2457 IntrinsicID == Intrinsic::fptoui_sat);
2458 bool IsExact;
2459 U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
2460 return ConstantInt::get(Ty, Int);
2461 }
2462
2463 if (IntrinsicID == Intrinsic::canonicalize)
2464 return constantFoldCanonicalize(Ty, Call, U);
2465
2466#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2467 if (Ty->isFP128Ty()) {
2468 if (IntrinsicID == Intrinsic::log) {
2469 float128 Result = logf128(Op->getValueAPF().convertToQuad());
2470 return GetConstantFoldFPValue128(Result, Ty);
2471 }
2472
2473 LibFunc Fp128Func = NotLibFunc;
2474 if (TLI && TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&
2475 Fp128Func == LibFunc_logl)
2476 return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);
2477 }
2478#endif
2479
2480 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() &&
2481 !Ty->isIntegerTy())
2482 return nullptr;
2483
2484 // Use internal versions of these intrinsics.
2485
2486 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
2487 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2488 return ConstantFP::get(Ty->getContext(), U);
2489 }
2490
2491 if (IntrinsicID == Intrinsic::round) {
2492 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2493 return ConstantFP::get(Ty->getContext(), U);
2494 }
2495
2496 if (IntrinsicID == Intrinsic::roundeven) {
2497 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2498 return ConstantFP::get(Ty->getContext(), U);
2499 }
2500
2501 if (IntrinsicID == Intrinsic::ceil) {
2502 U.roundToIntegral(APFloat::rmTowardPositive);
2503 return ConstantFP::get(Ty->getContext(), U);
2504 }
2505
2506 if (IntrinsicID == Intrinsic::floor) {
2507 U.roundToIntegral(APFloat::rmTowardNegative);
2508 return ConstantFP::get(Ty->getContext(), U);
2509 }
2510
2511 if (IntrinsicID == Intrinsic::trunc) {
2512 U.roundToIntegral(APFloat::rmTowardZero);
2513 return ConstantFP::get(Ty->getContext(), U);
2514 }
2515
2516 if (IntrinsicID == Intrinsic::fabs) {
2517 U.clearSign();
2518 return ConstantFP::get(Ty->getContext(), U);
2519 }
2520
2521 if (IntrinsicID == Intrinsic::amdgcn_fract) {
2522 // The v_fract instruction behaves like the OpenCL spec, which defines
2523 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
2524 // there to prevent fract(-small) from returning 1.0. It returns the
2525 // largest positive floating-point number less than 1.0."
2526 APFloat FloorU(U);
2527 FloorU.roundToIntegral(APFloat::rmTowardNegative);
2528 APFloat FractU(U - FloorU);
2529 APFloat AlmostOne(U.getSemantics(), 1);
2530 AlmostOne.next(/*nextDown*/ true);
2531 return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
2532 }
2533
2534 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
2535 // raise FP exceptions, unless the argument is signaling NaN.
2536
2537 std::optional<APFloat::roundingMode> RM;
2538 switch (IntrinsicID) {
2539 default:
2540 break;
2541 case Intrinsic::experimental_constrained_nearbyint:
2542 case Intrinsic::experimental_constrained_rint: {
2544 RM = CI->getRoundingMode();
2545 if (!RM || *RM == RoundingMode::Dynamic)
2546 return nullptr;
2547 break;
2548 }
2549 case Intrinsic::experimental_constrained_round:
2551 break;
2552 case Intrinsic::experimental_constrained_ceil:
2554 break;
2555 case Intrinsic::experimental_constrained_floor:
2557 break;
2558 case Intrinsic::experimental_constrained_trunc:
2560 break;
2561 }
2562 if (RM) {
2564 if (U.isFinite()) {
2565 APFloat::opStatus St = U.roundToIntegral(*RM);
2566 if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
2567 St == APFloat::opInexact) {
2568 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2569 if (EB == fp::ebStrict)
2570 return nullptr;
2571 }
2572 } else if (U.isSignaling()) {
2573 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2574 if (EB && *EB != fp::ebIgnore)
2575 return nullptr;
2576 U = APFloat::getQNaN(U.getSemantics());
2577 }
2578 return ConstantFP::get(Ty->getContext(), U);
2579 }
2580
2581 // NVVM float/double to signed/unsigned int32/int64 conversions:
2582 switch (IntrinsicID) {
2583 // f2i
2584 case Intrinsic::nvvm_f2i_rm:
2585 case Intrinsic::nvvm_f2i_rn:
2586 case Intrinsic::nvvm_f2i_rp:
2587 case Intrinsic::nvvm_f2i_rz:
2588 case Intrinsic::nvvm_f2i_rm_ftz:
2589 case Intrinsic::nvvm_f2i_rn_ftz:
2590 case Intrinsic::nvvm_f2i_rp_ftz:
2591 case Intrinsic::nvvm_f2i_rz_ftz:
2592 // f2ui
2593 case Intrinsic::nvvm_f2ui_rm:
2594 case Intrinsic::nvvm_f2ui_rn:
2595 case Intrinsic::nvvm_f2ui_rp:
2596 case Intrinsic::nvvm_f2ui_rz:
2597 case Intrinsic::nvvm_f2ui_rm_ftz:
2598 case Intrinsic::nvvm_f2ui_rn_ftz:
2599 case Intrinsic::nvvm_f2ui_rp_ftz:
2600 case Intrinsic::nvvm_f2ui_rz_ftz:
2601 // d2i
2602 case Intrinsic::nvvm_d2i_rm:
2603 case Intrinsic::nvvm_d2i_rn:
2604 case Intrinsic::nvvm_d2i_rp:
2605 case Intrinsic::nvvm_d2i_rz:
2606 // d2ui
2607 case Intrinsic::nvvm_d2ui_rm:
2608 case Intrinsic::nvvm_d2ui_rn:
2609 case Intrinsic::nvvm_d2ui_rp:
2610 case Intrinsic::nvvm_d2ui_rz:
2611 // f2ll
2612 case Intrinsic::nvvm_f2ll_rm:
2613 case Intrinsic::nvvm_f2ll_rn:
2614 case Intrinsic::nvvm_f2ll_rp:
2615 case Intrinsic::nvvm_f2ll_rz:
2616 case Intrinsic::nvvm_f2ll_rm_ftz:
2617 case Intrinsic::nvvm_f2ll_rn_ftz:
2618 case Intrinsic::nvvm_f2ll_rp_ftz:
2619 case Intrinsic::nvvm_f2ll_rz_ftz:
2620 // f2ull
2621 case Intrinsic::nvvm_f2ull_rm:
2622 case Intrinsic::nvvm_f2ull_rn:
2623 case Intrinsic::nvvm_f2ull_rp:
2624 case Intrinsic::nvvm_f2ull_rz:
2625 case Intrinsic::nvvm_f2ull_rm_ftz:
2626 case Intrinsic::nvvm_f2ull_rn_ftz:
2627 case Intrinsic::nvvm_f2ull_rp_ftz:
2628 case Intrinsic::nvvm_f2ull_rz_ftz:
2629 // d2ll
2630 case Intrinsic::nvvm_d2ll_rm:
2631 case Intrinsic::nvvm_d2ll_rn:
2632 case Intrinsic::nvvm_d2ll_rp:
2633 case Intrinsic::nvvm_d2ll_rz:
2634 // d2ull
2635 case Intrinsic::nvvm_d2ull_rm:
2636 case Intrinsic::nvvm_d2ull_rn:
2637 case Intrinsic::nvvm_d2ull_rp:
2638 case Intrinsic::nvvm_d2ull_rz: {
2639 // In float-to-integer conversion, NaN inputs are converted to 0.
2640 if (U.isNaN()) {
2641 // In float-to-integer conversion, NaN inputs are converted to 0
2642 // when the source and destination bitwidths are both less than 64.
2643 if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID))
2644 return ConstantInt::get(Ty, 0);
2645
2646 // Otherwise, the most significant bit is set.
2647 unsigned BitWidth = Ty->getIntegerBitWidth();
2648 uint64_t Val = 1ULL << (BitWidth - 1);
2649 return ConstantInt::get(Ty, APInt(BitWidth, Val, /*IsSigned=*/false));
2650 }
2651
2652 APFloat::roundingMode RMode =
2654 bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
2655 bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
2656
2657 APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2658 auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
2659
2660 // Return max/min value for integers if the result is +/-inf or
2661 // is too large to fit in the result's integer bitwidth.
2662 bool IsExact = false;
2663 FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2664 return ConstantInt::get(Ty, ResInt);
2665 }
2666 }
2667
2668 /// We only fold functions with finite arguments. Folding NaN and inf is
2669 /// likely to be aborted with an exception anyway, and some host libms
2670 /// have known errors raising exceptions.
2671 if (!U.isFinite())
2672 return nullptr;
2673
2674 /// Currently APFloat versions of these functions do not exist, so we use
2675 /// the host native double versions. Float versions are not called
2676 /// directly but for all these it is true (float)(f((double)arg)) ==
2677 /// f(arg). Long double not supported yet.
2678 const APFloat &APF = Op->getValueAPF();
2679
2680 switch (IntrinsicID) {
2681 default: break;
2682 case Intrinsic::log:
2683 return ConstantFoldFP(log, APF, Ty);
2684 case Intrinsic::log2:
2685 // TODO: What about hosts that lack a C99 library?
2686 return ConstantFoldFP(log2, APF, Ty);
2687 case Intrinsic::log10:
2688 // TODO: What about hosts that lack a C99 library?
2689 return ConstantFoldFP(log10, APF, Ty);
2690 case Intrinsic::exp:
2691 return ConstantFoldFP(exp, APF, Ty);
2692 case Intrinsic::exp2:
2693 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2694 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2695 case Intrinsic::exp10:
2696 // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.
2697 return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);
2698 case Intrinsic::sin:
2699 return ConstantFoldFP(sin, APF, Ty);
2700 case Intrinsic::cos:
2701 return ConstantFoldFP(cos, APF, Ty);
2702 case Intrinsic::sinh:
2703 return ConstantFoldFP(sinh, APF, Ty);
2704 case Intrinsic::cosh:
2705 return ConstantFoldFP(cosh, APF, Ty);
2706 case Intrinsic::atan:
2707 // Implement optional behavior from C's Annex F for +/-0.0.
2708 if (U.isZero())
2709 return ConstantFP::get(Ty->getContext(), U);
2710 return ConstantFoldFP(atan, APF, Ty);
2711 case Intrinsic::sqrt:
2712 return ConstantFoldFP(sqrt, APF, Ty);
2713
2714 // NVVM Intrinsics:
2715 case Intrinsic::nvvm_ceil_ftz_f:
2716 case Intrinsic::nvvm_ceil_f:
2717 case Intrinsic::nvvm_ceil_d:
2718 return ConstantFoldFP(
2719 ceil, APF, Ty,
2721 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2722
2723 case Intrinsic::nvvm_fabs_ftz:
2724 case Intrinsic::nvvm_fabs:
2725 return ConstantFoldFP(
2726 fabs, APF, Ty,
2728 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2729
2730 case Intrinsic::nvvm_floor_ftz_f:
2731 case Intrinsic::nvvm_floor_f:
2732 case Intrinsic::nvvm_floor_d:
2733 return ConstantFoldFP(
2734 floor, APF, Ty,
2736 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2737
2738 case Intrinsic::nvvm_rcp_rm_ftz_f:
2739 case Intrinsic::nvvm_rcp_rn_ftz_f:
2740 case Intrinsic::nvvm_rcp_rp_ftz_f:
2741 case Intrinsic::nvvm_rcp_rz_ftz_f:
2742 case Intrinsic::nvvm_rcp_rm_d:
2743 case Intrinsic::nvvm_rcp_rm_f:
2744 case Intrinsic::nvvm_rcp_rn_d:
2745 case Intrinsic::nvvm_rcp_rn_f:
2746 case Intrinsic::nvvm_rcp_rp_d:
2747 case Intrinsic::nvvm_rcp_rp_f:
2748 case Intrinsic::nvvm_rcp_rz_d:
2749 case Intrinsic::nvvm_rcp_rz_f: {
2750 APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
2751 bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
2752
2753 auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
2755 APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
2756
2758 if (IsFTZ)
2759 Res = FTZPreserveSign(Res);
2760 return ConstantFP::get(Ty->getContext(), Res);
2761 }
2762 return nullptr;
2763 }
2764
2765 case Intrinsic::nvvm_round_ftz_f:
2766 case Intrinsic::nvvm_round_f:
2767 case Intrinsic::nvvm_round_d: {
2768 // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
2769 // integer, choosing even integer if source is equidistant between two
2770 // integers, so the semantics are closer to "rint" rather than "round".
2771 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2772 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2774 return ConstantFP::get(Ty->getContext(), V);
2775 }
2776
2777 case Intrinsic::nvvm_saturate_ftz_f:
2778 case Intrinsic::nvvm_saturate_d:
2779 case Intrinsic::nvvm_saturate_f: {
2780 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2781 auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
2782 if (V.isNegative() || V.isZero() || V.isNaN())
2783 return ConstantFP::getZero(Ty);
2785 if (V > One)
2786 return ConstantFP::get(Ty->getContext(), One);
2787 return ConstantFP::get(Ty->getContext(), APF);
2788 }
2789
2790 case Intrinsic::nvvm_sqrt_rn_ftz_f:
2791 case Intrinsic::nvvm_sqrt_f:
2792 case Intrinsic::nvvm_sqrt_rn_d:
2793 case Intrinsic::nvvm_sqrt_rn_f:
2794 if (APF.isNegative())
2795 return nullptr;
2796 return ConstantFoldFP(
2797 sqrt, APF, Ty,
2799 nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2800
2801 // AMDGCN Intrinsics:
2802 case Intrinsic::amdgcn_cos:
2803 case Intrinsic::amdgcn_sin: {
2804 double V = getValueAsDouble(Op);
2805 if (V < -256.0 || V > 256.0)
2806 // The gfx8 and gfx9 architectures handle arguments outside the range
2807 // [-256, 256] differently. This should be a rare case so bail out
2808 // rather than trying to handle the difference.
2809 return nullptr;
2810 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
2811 double V4 = V * 4.0;
2812 if (V4 == floor(V4)) {
2813 // Force exact results for quarter-integer inputs.
2814 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
2815 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
2816 } else {
2817 if (IsCos)
2818 V = cos(V * 2.0 * numbers::pi);
2819 else
2820 V = sin(V * 2.0 * numbers::pi);
2821 }
2822 return GetConstantFoldFPValue(V, Ty);
2823 }
2824 }
2825
2826 if (!TLI)
2827 return nullptr;
2828
2830 if (!TLI->getLibFunc(Name, Func))
2831 return nullptr;
2832
2833 switch (Func) {
2834 default:
2835 break;
2836 case LibFunc_acos:
2837 case LibFunc_acosf:
2838 case LibFunc_acos_finite:
2839 case LibFunc_acosf_finite:
2840 if (TLI->has(Func))
2841 return ConstantFoldFP(acos, APF, Ty);
2842 break;
2843 case LibFunc_asin:
2844 case LibFunc_asinf:
2845 case LibFunc_asin_finite:
2846 case LibFunc_asinf_finite:
2847 if (TLI->has(Func))
2848 return ConstantFoldFP(asin, APF, Ty);
2849 break;
2850 case LibFunc_atan:
2851 case LibFunc_atanf:
2852 // Implement optional behavior from C's Annex F for +/-0.0.
2853 if (U.isZero())
2854 return ConstantFP::get(Ty->getContext(), U);
2855 if (TLI->has(Func))
2856 return ConstantFoldFP(atan, APF, Ty);
2857 break;
2858 case LibFunc_ceil:
2859 case LibFunc_ceilf:
2860 if (TLI->has(Func)) {
2861 U.roundToIntegral(APFloat::rmTowardPositive);
2862 return ConstantFP::get(Ty->getContext(), U);
2863 }
2864 break;
2865 case LibFunc_cos:
2866 case LibFunc_cosf:
2867 if (TLI->has(Func))
2868 return ConstantFoldFP(cos, APF, Ty);
2869 break;
2870 case LibFunc_cosh:
2871 case LibFunc_coshf:
2872 case LibFunc_cosh_finite:
2873 case LibFunc_coshf_finite:
2874 if (TLI->has(Func))
2875 return ConstantFoldFP(cosh, APF, Ty);
2876 break;
2877 case LibFunc_exp:
2878 case LibFunc_expf:
2879 case LibFunc_exp_finite:
2880 case LibFunc_expf_finite:
2881 if (TLI->has(Func))
2882 return ConstantFoldFP(exp, APF, Ty);
2883 break;
2884 case LibFunc_exp2:
2885 case LibFunc_exp2f:
2886 case LibFunc_exp2_finite:
2887 case LibFunc_exp2f_finite:
2888 if (TLI->has(Func))
2889 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2890 return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
2891 break;
2892 case LibFunc_fabs:
2893 case LibFunc_fabsf:
2894 if (TLI->has(Func)) {
2895 U.clearSign();
2896 return ConstantFP::get(Ty->getContext(), U);
2897 }
2898 break;
2899 case LibFunc_floor:
2900 case LibFunc_floorf:
2901 if (TLI->has(Func)) {
2902 U.roundToIntegral(APFloat::rmTowardNegative);
2903 return ConstantFP::get(Ty->getContext(), U);
2904 }
2905 break;
2906 case LibFunc_log:
2907 case LibFunc_logf:
2908 case LibFunc_log_finite:
2909 case LibFunc_logf_finite:
2910 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2911 return ConstantFoldFP(log, APF, Ty);
2912 break;
2913 case LibFunc_log2:
2914 case LibFunc_log2f:
2915 case LibFunc_log2_finite:
2916 case LibFunc_log2f_finite:
2917 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2918 // TODO: What about hosts that lack a C99 library?
2919 return ConstantFoldFP(log2, APF, Ty);
2920 break;
2921 case LibFunc_log10:
2922 case LibFunc_log10f:
2923 case LibFunc_log10_finite:
2924 case LibFunc_log10f_finite:
2925 if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
2926 // TODO: What about hosts that lack a C99 library?
2927 return ConstantFoldFP(log10, APF, Ty);
2928 break;
2929 case LibFunc_ilogb:
2930 case LibFunc_ilogbf:
2931 if (!APF.isZero() && TLI->has(Func))
2932 return ConstantInt::get(Ty, ilogb(APF), true);
2933 break;
2934 case LibFunc_logb:
2935 case LibFunc_logbf:
2936 if (!APF.isZero() && TLI->has(Func))
2937 return ConstantFoldFP(logb, APF, Ty);
2938 break;
2939 case LibFunc_log1p:
2940 case LibFunc_log1pf:
2941 // Implement optional behavior from C's Annex F for +/-0.0.
2942 if (U.isZero())
2943 return ConstantFP::get(Ty->getContext(), U);
2944 if (APF > APFloat::getOne(APF.getSemantics(), true) && TLI->has(Func))
2945 return ConstantFoldFP(log1p, APF, Ty);
2946 break;
2947 case LibFunc_logl:
2948 return nullptr;
2949 case LibFunc_erf:
2950 case LibFunc_erff:
2951 if (TLI->has(Func))
2952 return ConstantFoldFP(erf, APF, Ty);
2953 break;
2954 case LibFunc_nearbyint:
2955 case LibFunc_nearbyintf:
2956 case LibFunc_rint:
2957 case LibFunc_rintf:
2958 if (TLI->has(Func)) {
2959 U.roundToIntegral(APFloat::rmNearestTiesToEven);
2960 return ConstantFP::get(Ty->getContext(), U);
2961 }
2962 break;
2963 case LibFunc_round:
2964 case LibFunc_roundf:
2965 if (TLI->has(Func)) {
2966 U.roundToIntegral(APFloat::rmNearestTiesToAway);
2967 return ConstantFP::get(Ty->getContext(), U);
2968 }
2969 break;
2970 case LibFunc_sin:
2971 case LibFunc_sinf:
2972 if (TLI->has(Func))
2973 return ConstantFoldFP(sin, APF, Ty);
2974 break;
2975 case LibFunc_sinh:
2976 case LibFunc_sinhf:
2977 case LibFunc_sinh_finite:
2978 case LibFunc_sinhf_finite:
2979 if (TLI->has(Func))
2980 return ConstantFoldFP(sinh, APF, Ty);
2981 break;
2982 case LibFunc_sqrt:
2983 case LibFunc_sqrtf:
2984 if (!APF.isNegative() && TLI->has(Func))
2985 return ConstantFoldFP(sqrt, APF, Ty);
2986 break;
2987 case LibFunc_tan:
2988 case LibFunc_tanf:
2989 if (TLI->has(Func))
2990 return ConstantFoldFP(tan, APF, Ty);
2991 break;
2992 case LibFunc_tanh:
2993 case LibFunc_tanhf:
2994 if (TLI->has(Func))
2995 return ConstantFoldFP(tanh, APF, Ty);
2996 break;
2997 case LibFunc_trunc:
2998 case LibFunc_truncf:
2999 if (TLI->has(Func)) {
3000 U.roundToIntegral(APFloat::rmTowardZero);
3001 return ConstantFP::get(Ty->getContext(), U);
3002 }
3003 break;
3004 }
3005 return nullptr;
3006 }
3007
3008 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
3009 switch (IntrinsicID) {
3010 case Intrinsic::bswap:
3011 return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
3012 case Intrinsic::ctpop:
3013 return ConstantInt::get(Ty, Op->getValue().popcount());
3014 case Intrinsic::bitreverse:
3015 return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
3016 case Intrinsic::convert_from_fp16: {
3017 APFloat Val(APFloat::IEEEhalf(), Op->getValue());
3018
3019 bool lost = false;
3020 APFloat::opStatus status = Val.convert(
3021 Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
3022
3023 // Conversion is always precise.
3024 (void)status;
3025 assert(status != APFloat::opInexact && !lost &&
3026 "Precision lost during fp16 constfolding");
3027
3028 return ConstantFP::get(Ty->getContext(), Val);
3029 }
3030
3031 case Intrinsic::amdgcn_s_wqm: {
3032 uint64_t Val = Op->getZExtValue();
3033 Val |= (Val & 0x5555555555555555ULL) << 1 |
3034 ((Val >> 1) & 0x5555555555555555ULL);
3035 Val |= (Val & 0x3333333333333333ULL) << 2 |
3036 ((Val >> 2) & 0x3333333333333333ULL);
3037 return ConstantInt::get(Ty, Val);
3038 }
3039
3040 case Intrinsic::amdgcn_s_quadmask: {
3041 uint64_t Val = Op->getZExtValue();
3042 uint64_t QuadMask = 0;
3043 for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
3044 if (!(Val & 0xF))
3045 continue;
3046
3047 QuadMask |= (1ULL << I);
3048 }
3049 return ConstantInt::get(Ty, QuadMask);
3050 }
3051
3052 case Intrinsic::amdgcn_s_bitreplicate: {
3053 uint64_t Val = Op->getZExtValue();
3054 Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
3055 Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
3056 Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
3057 Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
3058 Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
3059 Val = Val | Val << 1;
3060 return ConstantInt::get(Ty, Val);
3061 }
3062
3063 default:
3064 return nullptr;
3065 }
3066 }
3067
3068 switch (IntrinsicID) {
3069 default: break;
3070 case Intrinsic::vector_reduce_add:
3071 case Intrinsic::vector_reduce_mul:
3072 case Intrinsic::vector_reduce_and:
3073 case Intrinsic::vector_reduce_or:
3074 case Intrinsic::vector_reduce_xor:
3075 case Intrinsic::vector_reduce_smin:
3076 case Intrinsic::vector_reduce_smax:
3077 case Intrinsic::vector_reduce_umin:
3078 case Intrinsic::vector_reduce_umax:
3079 if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
3080 return C;
3081 break;
3082 }
3083
3084 // Support ConstantVector in case we have an Undef in the top.
3085 if (isa<ConstantVector>(Operands[0]) ||
3088 auto *Op = cast<Constant>(Operands[0]);
3089 switch (IntrinsicID) {
3090 default: break;
3091 case Intrinsic::x86_sse_cvtss2si:
3092 case Intrinsic::x86_sse_cvtss2si64:
3093 case Intrinsic::x86_sse2_cvtsd2si:
3094 case Intrinsic::x86_sse2_cvtsd2si64:
3095 if (ConstantFP *FPOp =
3096 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3097 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3098 /*roundTowardZero=*/false, Ty,
3099 /*IsSigned*/true);
3100 break;
3101 case Intrinsic::x86_sse_cvttss2si:
3102 case Intrinsic::x86_sse_cvttss2si64:
3103 case Intrinsic::x86_sse2_cvttsd2si:
3104 case Intrinsic::x86_sse2_cvttsd2si64:
3105 if (ConstantFP *FPOp =
3106 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3107 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3108 /*roundTowardZero=*/true, Ty,
3109 /*IsSigned*/true);
3110 break;
3111
3112 case Intrinsic::wasm_anytrue:
3113 return Op->isZeroValue() ? ConstantInt::get(Ty, 0)
3114 : ConstantInt::get(Ty, 1);
3115
3116 case Intrinsic::wasm_alltrue:
3117 // Check each element individually
3118 unsigned E = cast<FixedVectorType>(Op->getType())->getNumElements();
3119 for (unsigned I = 0; I != E; ++I)
3120 if (Constant *Elt = Op->getAggregateElement(I))
3121 if (Elt->isZeroValue())
3122 return ConstantInt::get(Ty, 0);
3123
3124 return ConstantInt::get(Ty, 1);
3125 }
3126 }
3127
3128 return nullptr;
3129}
3130
3131static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
3135 FCmpInst::Predicate Cond = FCmp->getPredicate();
3136 if (FCmp->isSignaling()) {
3137 if (Op1.isNaN() || Op2.isNaN())
3139 } else {
3140 if (Op1.isSignaling() || Op2.isSignaling())
3142 }
3143 bool Result = FCmpInst::compare(Op1, Op2, Cond);
3144 if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
3145 return ConstantInt::get(Call->getType()->getScalarType(), Result);
3146 return nullptr;
3147}
3148
3149static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,
3151 const TargetLibraryInfo *TLI) {
3152 if (!TLI)
3153 return nullptr;
3154
3156 if (!TLI->getLibFunc(Name, Func))
3157 return nullptr;
3158
3159 const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);
3160 if (!Op1)
3161 return nullptr;
3162
3163 const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);
3164 if (!Op2)
3165 return nullptr;
3166
3167 const APFloat &Op1V = Op1->getValueAPF();
3168 const APFloat &Op2V = Op2->getValueAPF();
3169
3170 switch (Func) {
3171 default:
3172 break;
3173 case LibFunc_pow:
3174 case LibFunc_powf:
3175 case LibFunc_pow_finite:
3176 case LibFunc_powf_finite:
3177 if (TLI->has(Func))
3178 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3179 break;
3180 case LibFunc_fmod:
3181 case LibFunc_fmodf:
3182 if (TLI->has(Func)) {
3183 APFloat V = Op1->getValueAPF();
3184 if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
3185 return ConstantFP::get(Ty->getContext(), V);
3186 }
3187 break;
3188 case LibFunc_remainder:
3189 case LibFunc_remainderf:
3190 if (TLI->has(Func)) {
3191 APFloat V = Op1->getValueAPF();
3192 if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
3193 return ConstantFP::get(Ty->getContext(), V);
3194 }
3195 break;
3196 case LibFunc_atan2:
3197 case LibFunc_atan2f:
3198 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
3199 // (Solaris), so we do not assume a known result for that.
3200 if (Op1V.isZero() && Op2V.isZero())
3201 return nullptr;
3202 [[fallthrough]];
3203 case LibFunc_atan2_finite:
3204 case LibFunc_atan2f_finite:
3205 if (TLI->has(Func))
3206 return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
3207 break;
3208 }
3209
3210 return nullptr;
3211}
3212
3213static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
3215 const CallBase *Call) {
3216 assert(Operands.size() == 2 && "Wrong number of operands.");
3217
3218 if (Ty->isFloatingPointTy()) {
3219 // TODO: We should have undef handling for all of the FP intrinsics that
3220 // are attempted to be folded in this function.
3221 bool IsOp0Undef = isa<UndefValue>(Operands[0]);
3222 bool IsOp1Undef = isa<UndefValue>(Operands[1]);
3223 switch (IntrinsicID) {
3224 case Intrinsic::maxnum:
3225 case Intrinsic::minnum:
3226 case Intrinsic::maximum:
3227 case Intrinsic::minimum:
3228 case Intrinsic::maximumnum:
3229 case Intrinsic::minimumnum:
3230 case Intrinsic::nvvm_fmax_d:
3231 case Intrinsic::nvvm_fmin_d:
3232 // If one argument is undef, return the other argument.
3233 if (IsOp0Undef)
3234 return Operands[1];
3235 if (IsOp1Undef)
3236 return Operands[0];
3237 break;
3238
3239 case Intrinsic::nvvm_fmax_f:
3240 case Intrinsic::nvvm_fmax_ftz_f:
3241 case Intrinsic::nvvm_fmax_ftz_nan_f:
3242 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3243 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3244 case Intrinsic::nvvm_fmax_nan_f:
3245 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3246 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3247
3248 case Intrinsic::nvvm_fmin_f:
3249 case Intrinsic::nvvm_fmin_ftz_f:
3250 case Intrinsic::nvvm_fmin_ftz_nan_f:
3251 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3252 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3253 case Intrinsic::nvvm_fmin_nan_f:
3254 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3255 case Intrinsic::nvvm_fmin_xorsign_abs_f:
3256 // If one arg is undef, the other arg can be returned only if it is
3257 // constant, as we may need to flush it to sign-preserving zero or
3258 // canonicalize the NaN.
3259 if (!IsOp0Undef && !IsOp1Undef)
3260 break;
3261 if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
3262 if (Op->isNaN()) {
3263 APInt NVCanonicalNaN(32, 0x7fffffff);
3264 return ConstantFP::get(
3265 Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
3266 }
3267 if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
3268 return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
3269 else
3270 return Op;
3271 }
3272 break;
3273 }
3274 }
3275
3276 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3277 const APFloat &Op1V = Op1->getValueAPF();
3278
3279 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3280 if (Op2->getType() != Op1->getType())
3281 return nullptr;
3282 const APFloat &Op2V = Op2->getValueAPF();
3283
3284 if (const auto *ConstrIntr =
3286 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3287 APFloat Res = Op1V;
3289 switch (IntrinsicID) {
3290 default:
3291 return nullptr;
3292 case Intrinsic::experimental_constrained_fadd:
3293 St = Res.add(Op2V, RM);
3294 break;
3295 case Intrinsic::experimental_constrained_fsub:
3296 St = Res.subtract(Op2V, RM);
3297 break;
3298 case Intrinsic::experimental_constrained_fmul:
3299 St = Res.multiply(Op2V, RM);
3300 break;
3301 case Intrinsic::experimental_constrained_fdiv:
3302 St = Res.divide(Op2V, RM);
3303 break;
3304 case Intrinsic::experimental_constrained_frem:
3305 St = Res.mod(Op2V);
3306 break;
3307 case Intrinsic::experimental_constrained_fcmp:
3308 case Intrinsic::experimental_constrained_fcmps:
3309 return evaluateCompare(Op1V, Op2V, ConstrIntr);
3310 }
3311 if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
3312 St))
3313 return ConstantFP::get(Ty->getContext(), Res);
3314 return nullptr;
3315 }
3316
3317 switch (IntrinsicID) {
3318 default:
3319 break;
3320 case Intrinsic::copysign:
3321 return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
3322 case Intrinsic::minnum:
3323 return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
3324 case Intrinsic::maxnum:
3325 return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
3326 case Intrinsic::minimum:
3327 return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
3328 case Intrinsic::maximum:
3329 return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
3330 case Intrinsic::minimumnum:
3331 return ConstantFP::get(Ty->getContext(), minimumnum(Op1V, Op2V));
3332 case Intrinsic::maximumnum:
3333 return ConstantFP::get(Ty->getContext(), maximumnum(Op1V, Op2V));
3334
3335 case Intrinsic::nvvm_fmax_d:
3336 case Intrinsic::nvvm_fmax_f:
3337 case Intrinsic::nvvm_fmax_ftz_f:
3338 case Intrinsic::nvvm_fmax_ftz_nan_f:
3339 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3340 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3341 case Intrinsic::nvvm_fmax_nan_f:
3342 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3343 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3344
3345 case Intrinsic::nvvm_fmin_d:
3346 case Intrinsic::nvvm_fmin_f:
3347 case Intrinsic::nvvm_fmin_ftz_f:
3348 case Intrinsic::nvvm_fmin_ftz_nan_f:
3349 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3350 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3351 case Intrinsic::nvvm_fmin_nan_f:
3352 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3353 case Intrinsic::nvvm_fmin_xorsign_abs_f: {
3354
3355 bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
3356 IntrinsicID == Intrinsic::nvvm_fmin_d);
3357 bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
3358 bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
3359 bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
3360
3361 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3362 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3363
3364 bool XorSign = false;
3365 if (IsXorSignAbs) {
3366 XorSign = A.isNegative() ^ B.isNegative();
3367 A = abs(A);
3368 B = abs(B);
3369 }
3370
3371 bool IsFMax = false;
3372 switch (IntrinsicID) {
3373 case Intrinsic::nvvm_fmax_d:
3374 case Intrinsic::nvvm_fmax_f:
3375 case Intrinsic::nvvm_fmax_ftz_f:
3376 case Intrinsic::nvvm_fmax_ftz_nan_f:
3377 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3378 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3379 case Intrinsic::nvvm_fmax_nan_f:
3380 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3381 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3382 IsFMax = true;
3383 break;
3384 }
3385 APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
3386
3387 if (ShouldCanonicalizeNaNs) {
3388 APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
3389 if (A.isNaN() && B.isNaN())
3390 return ConstantFP::get(Ty, NVCanonicalNaN);
3391 else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
3392 return ConstantFP::get(Ty, NVCanonicalNaN);
3393 }
3394
3395 if (A.isNaN() && B.isNaN())
3396 return Operands[1];
3397 else if (A.isNaN())
3398 Res = B;
3399 else if (B.isNaN())
3400 Res = A;
3401
3402 if (IsXorSignAbs && XorSign != Res.isNegative())
3403 Res.changeSign();
3404
3405 return ConstantFP::get(Ty->getContext(), Res);
3406 }
3407
3408 case Intrinsic::nvvm_add_rm_f:
3409 case Intrinsic::nvvm_add_rn_f:
3410 case Intrinsic::nvvm_add_rp_f:
3411 case Intrinsic::nvvm_add_rz_f:
3412 case Intrinsic::nvvm_add_rm_d:
3413 case Intrinsic::nvvm_add_rn_d:
3414 case Intrinsic::nvvm_add_rp_d:
3415 case Intrinsic::nvvm_add_rz_d:
3416 case Intrinsic::nvvm_add_rm_ftz_f:
3417 case Intrinsic::nvvm_add_rn_ftz_f:
3418 case Intrinsic::nvvm_add_rp_ftz_f:
3419 case Intrinsic::nvvm_add_rz_ftz_f: {
3420
3421 bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID);
3422 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3423 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3424
3425 APFloat::roundingMode RoundMode =
3426 nvvm::GetFAddRoundingMode(IntrinsicID);
3427
3428 APFloat Res = A;
3429 APFloat::opStatus Status = Res.add(B, RoundMode);
3430
3431 if (!Res.isNaN() &&
3433 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3434 return ConstantFP::get(Ty->getContext(), Res);
3435 }
3436 return nullptr;
3437 }
3438
3439 case Intrinsic::nvvm_mul_rm_f:
3440 case Intrinsic::nvvm_mul_rn_f:
3441 case Intrinsic::nvvm_mul_rp_f:
3442 case Intrinsic::nvvm_mul_rz_f:
3443 case Intrinsic::nvvm_mul_rm_d:
3444 case Intrinsic::nvvm_mul_rn_d:
3445 case Intrinsic::nvvm_mul_rp_d:
3446 case Intrinsic::nvvm_mul_rz_d:
3447 case Intrinsic::nvvm_mul_rm_ftz_f:
3448 case Intrinsic::nvvm_mul_rn_ftz_f:
3449 case Intrinsic::nvvm_mul_rp_ftz_f:
3450 case Intrinsic::nvvm_mul_rz_ftz_f: {
3451
3452 bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID);
3453 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3454 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3455
3456 APFloat::roundingMode RoundMode =
3457 nvvm::GetFMulRoundingMode(IntrinsicID);
3458
3459 APFloat Res = A;
3460 APFloat::opStatus Status = Res.multiply(B, RoundMode);
3461
3462 if (!Res.isNaN() &&
3464 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3465 return ConstantFP::get(Ty->getContext(), Res);
3466 }
3467 return nullptr;
3468 }
3469
3470 case Intrinsic::nvvm_div_rm_f:
3471 case Intrinsic::nvvm_div_rn_f:
3472 case Intrinsic::nvvm_div_rp_f:
3473 case Intrinsic::nvvm_div_rz_f:
3474 case Intrinsic::nvvm_div_rm_d:
3475 case Intrinsic::nvvm_div_rn_d:
3476 case Intrinsic::nvvm_div_rp_d:
3477 case Intrinsic::nvvm_div_rz_d:
3478 case Intrinsic::nvvm_div_rm_ftz_f:
3479 case Intrinsic::nvvm_div_rn_ftz_f:
3480 case Intrinsic::nvvm_div_rp_ftz_f:
3481 case Intrinsic::nvvm_div_rz_ftz_f: {
3482 bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID);
3483 APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
3484 APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
3485 APFloat::roundingMode RoundMode =
3486 nvvm::GetFDivRoundingMode(IntrinsicID);
3487
3488 APFloat Res = A;
3489 APFloat::opStatus Status = Res.divide(B, RoundMode);
3490 if (!Res.isNaN() &&
3492 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3493 return ConstantFP::get(Ty->getContext(), Res);
3494 }
3495 return nullptr;
3496 }
3497 }
3498
3499 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
3500 return nullptr;
3501
3502 switch (IntrinsicID) {
3503 default:
3504 break;
3505 case Intrinsic::pow:
3506 return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
3507 case Intrinsic::amdgcn_fmul_legacy:
3508 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3509 // NaN or infinity, gives +0.0.
3510 if (Op1V.isZero() || Op2V.isZero())
3511 return ConstantFP::getZero(Ty);
3512 return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
3513 }
3514
3515 } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
3516 switch (IntrinsicID) {
3517 case Intrinsic::ldexp: {
3518 return ConstantFP::get(
3519 Ty->getContext(),
3520 scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));
3521 }
3522 case Intrinsic::is_fpclass: {
3523 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
3524 bool Result =
3525 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
3526 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
3527 ((Mask & fcNegInf) && Op1V.isNegInfinity()) ||
3528 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
3529 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
3530 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
3531 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
3532 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
3533 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
3534 ((Mask & fcPosInf) && Op1V.isPosInfinity());
3535 return ConstantInt::get(Ty, Result);
3536 }
3537 case Intrinsic::powi: {
3538 int Exp = static_cast<int>(Op2C->getSExtValue());
3539 switch (Ty->getTypeID()) {
3540 case Type::HalfTyID:
3541 case Type::FloatTyID: {
3542 APFloat Res(static_cast<float>(std::pow(Op1V.convertToFloat(), Exp)));
3543 if (Ty->isHalfTy()) {
3544 bool Unused;
3546 &Unused);
3547 }
3548 return ConstantFP::get(Ty->getContext(), Res);
3549 }
3550 case Type::DoubleTyID:
3551 return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp));
3552 default:
3553 return nullptr;
3554 }
3555 }
3556 default:
3557 break;
3558 }
3559 }
3560 return nullptr;
3561 }
3562
3563 if (Operands[0]->getType()->isIntegerTy() &&
3564 Operands[1]->getType()->isIntegerTy()) {
3565 const APInt *C0, *C1;
3566 if (!getConstIntOrUndef(Operands[0], C0) ||
3567 !getConstIntOrUndef(Operands[1], C1))
3568 return nullptr;
3569
3570 switch (IntrinsicID) {
3571 default: break;
3572 case Intrinsic::smax:
3573 case Intrinsic::smin:
3574 case Intrinsic::umax:
3575 case Intrinsic::umin:
3576 if (!C0 && !C1)
3577 return UndefValue::get(Ty);
3578 if (!C0 || !C1)
3579 return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
3580 return ConstantInt::get(
3581 Ty, ICmpInst::compare(*C0, *C1,
3582 MinMaxIntrinsic::getPredicate(IntrinsicID))
3583 ? *C0
3584 : *C1);
3585
3586 case Intrinsic::scmp:
3587 case Intrinsic::ucmp:
3588 if (!C0 || !C1)
3589 return ConstantInt::get(Ty, 0);
3590
3591 int Res;
3592 if (IntrinsicID == Intrinsic::scmp)
3593 Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
3594 else
3595 Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
3596 return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
3597
3598 case Intrinsic::usub_with_overflow:
3599 case Intrinsic::ssub_with_overflow:
3600 // X - undef -> { 0, false }
3601 // undef - X -> { 0, false }
3602 if (!C0 || !C1)
3603 return Constant::getNullValue(Ty);
3604 [[fallthrough]];
3605 case Intrinsic::uadd_with_overflow:
3606 case Intrinsic::sadd_with_overflow:
3607 // X + undef -> { -1, false }
3608 // undef + x -> { -1, false }
3609 if (!C0 || !C1) {
3610 return ConstantStruct::get(
3611 cast<StructType>(Ty),
3612 {Constant::getAllOnesValue(Ty->getStructElementType(0)),
3613 Constant::getNullValue(Ty->getStructElementType(1))});
3614 }
3615 [[fallthrough]];
3616 case Intrinsic::smul_with_overflow:
3617 case Intrinsic::umul_with_overflow: {
3618 // undef * X -> { 0, false }
3619 // X * undef -> { 0, false }
3620 if (!C0 || !C1)
3621 return Constant::getNullValue(Ty);
3622
3623 APInt Res;
3624 bool Overflow;
3625 switch (IntrinsicID) {
3626 default: llvm_unreachable("Invalid case");
3627 case Intrinsic::sadd_with_overflow:
3628 Res = C0->sadd_ov(*C1, Overflow);
3629 break;
3630 case Intrinsic::uadd_with_overflow:
3631 Res = C0->uadd_ov(*C1, Overflow);
3632 break;
3633 case Intrinsic::ssub_with_overflow:
3634 Res = C0->ssub_ov(*C1, Overflow);
3635 break;
3636 case Intrinsic::usub_with_overflow:
3637 Res = C0->usub_ov(*C1, Overflow);
3638 break;
3639 case Intrinsic::smul_with_overflow:
3640 Res = C0->smul_ov(*C1, Overflow);
3641 break;
3642 case Intrinsic::umul_with_overflow:
3643 Res = C0->umul_ov(*C1, Overflow);
3644 break;
3645 }
3646 Constant *Ops[] = {
3647 ConstantInt::get(Ty->getContext(), Res),
3648 ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
3649 };
3651 }
3652 case Intrinsic::uadd_sat:
3653 case Intrinsic::sadd_sat:
3654 if (!C0 && !C1)
3655 return UndefValue::get(Ty);
3656 if (!C0 || !C1)
3657 return Constant::getAllOnesValue(Ty);
3658 if (IntrinsicID == Intrinsic::uadd_sat)
3659 return ConstantInt::get(Ty, C0->uadd_sat(*C1));
3660 else
3661 return ConstantInt::get(Ty, C0->sadd_sat(*C1));
3662 case Intrinsic::usub_sat:
3663 case Intrinsic::ssub_sat:
3664 if (!C0 && !C1)
3665 return UndefValue::get(Ty);
3666 if (!C0 || !C1)
3667 return Constant::getNullValue(Ty);
3668 if (IntrinsicID == Intrinsic::usub_sat)
3669 return ConstantInt::get(Ty, C0->usub_sat(*C1));
3670 else
3671 return ConstantInt::get(Ty, C0->ssub_sat(*C1));
3672 case Intrinsic::cttz:
3673 case Intrinsic::ctlz:
3674 assert(C1 && "Must be constant int");
3675
3676 // cttz(0, 1) and ctlz(0, 1) are poison.
3677 if (C1->isOne() && (!C0 || C0->isZero()))
3678 return PoisonValue::get(Ty);
3679 if (!C0)
3680 return Constant::getNullValue(Ty);
3681 if (IntrinsicID == Intrinsic::cttz)
3682 return ConstantInt::get(Ty, C0->countr_zero());
3683 else
3684 return ConstantInt::get(Ty, C0->countl_zero());
3685
3686 case Intrinsic::abs:
3687 assert(C1 && "Must be constant int");
3688 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
3689
3690 // Undef or minimum val operand with poison min --> poison
3691 if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
3692 return PoisonValue::get(Ty);
3693
3694 // Undef operand with no poison min --> 0 (sign bit must be clear)
3695 if (!C0)
3696 return Constant::getNullValue(Ty);
3697
3698 return ConstantInt::get(Ty, C0->abs());
3699 case Intrinsic::amdgcn_wave_reduce_umin:
3700 case Intrinsic::amdgcn_wave_reduce_umax:
3701 case Intrinsic::amdgcn_wave_reduce_max:
3702 case Intrinsic::amdgcn_wave_reduce_min:
3703 case Intrinsic::amdgcn_wave_reduce_add:
3704 case Intrinsic::amdgcn_wave_reduce_sub:
3705 case Intrinsic::amdgcn_wave_reduce_and:
3706 case Intrinsic::amdgcn_wave_reduce_or:
3707 case Intrinsic::amdgcn_wave_reduce_xor:
3708 return dyn_cast<Constant>(Operands[0]);
3709 }
3710
3711 return nullptr;
3712 }
3713
3714 // Support ConstantVector in case we have an Undef in the top.
3715 if ((isa<ConstantVector>(Operands[0]) ||
3717 // Check for default rounding mode.
3718 // FIXME: Support other rounding modes?
3720 cast<ConstantInt>(Operands[1])->getValue() == 4) {
3721 auto *Op = cast<Constant>(Operands[0]);
3722 switch (IntrinsicID) {
3723 default: break;
3724 case Intrinsic::x86_avx512_vcvtss2si32:
3725 case Intrinsic::x86_avx512_vcvtss2si64:
3726 case Intrinsic::x86_avx512_vcvtsd2si32:
3727 case Intrinsic::x86_avx512_vcvtsd2si64:
3728 if (ConstantFP *FPOp =
3729 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3730 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3731 /*roundTowardZero=*/false, Ty,
3732 /*IsSigned*/true);
3733 break;
3734 case Intrinsic::x86_avx512_vcvtss2usi32:
3735 case Intrinsic::x86_avx512_vcvtss2usi64:
3736 case Intrinsic::x86_avx512_vcvtsd2usi32:
3737 case Intrinsic::x86_avx512_vcvtsd2usi64:
3738 if (ConstantFP *FPOp =
3739 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3740 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3741 /*roundTowardZero=*/false, Ty,
3742 /*IsSigned*/false);
3743 break;
3744 case Intrinsic::x86_avx512_cvttss2si:
3745 case Intrinsic::x86_avx512_cvttss2si64:
3746 case Intrinsic::x86_avx512_cvttsd2si:
3747 case Intrinsic::x86_avx512_cvttsd2si64:
3748 if (ConstantFP *FPOp =
3749 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3750 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3751 /*roundTowardZero=*/true, Ty,
3752 /*IsSigned*/true);
3753 break;
3754 case Intrinsic::x86_avx512_cvttss2usi:
3755 case Intrinsic::x86_avx512_cvttss2usi64:
3756 case Intrinsic::x86_avx512_cvttsd2usi:
3757 case Intrinsic::x86_avx512_cvttsd2usi64:
3758 if (ConstantFP *FPOp =
3759 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
3760 return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
3761 /*roundTowardZero=*/true, Ty,
3762 /*IsSigned*/false);
3763 break;
3764 }
3765 }
3766 return nullptr;
3767}
3768
3769static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
3770 const APFloat &S0,
3771 const APFloat &S1,
3772 const APFloat &S2) {
3773 unsigned ID;
3774 const fltSemantics &Sem = S0.getSemantics();
3775 APFloat MA(Sem), SC(Sem), TC(Sem);
3776 if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
3777 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
3778 // S2 < 0
3779 ID = 5;
3780 SC = -S0;
3781 } else {
3782 ID = 4;
3783 SC = S0;
3784 }
3785 MA = S2;
3786 TC = -S1;
3787 } else if (abs(S1) >= abs(S0)) {
3788 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
3789 // S1 < 0
3790 ID = 3;
3791 TC = -S2;
3792 } else {
3793 ID = 2;
3794 TC = S2;
3795 }
3796 MA = S1;
3797 SC = S0;
3798 } else {
3799 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
3800 // S0 < 0
3801 ID = 1;
3802 SC = S2;
3803 } else {
3804 ID = 0;
3805 SC = -S2;
3806 }
3807 MA = S0;
3808 TC = -S1;
3809 }
3810 switch (IntrinsicID) {
3811 default:
3812 llvm_unreachable("unhandled amdgcn cube intrinsic");
3813 case Intrinsic::amdgcn_cubeid:
3814 return APFloat(Sem, ID);
3815 case Intrinsic::amdgcn_cubema:
3816 return MA + MA;
3817 case Intrinsic::amdgcn_cubesc:
3818 return SC;
3819 case Intrinsic::amdgcn_cubetc:
3820 return TC;
3821 }
3822}
3823
3824static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
3825 Type *Ty) {
3826 const APInt *C0, *C1, *C2;
3827 if (!getConstIntOrUndef(Operands[0], C0) ||
3828 !getConstIntOrUndef(Operands[1], C1) ||
3829 !getConstIntOrUndef(Operands[2], C2))
3830 return nullptr;
3831
3832 if (!C2)
3833 return UndefValue::get(Ty);
3834
3835 APInt Val(32, 0);
3836 unsigned NumUndefBytes = 0;
3837 for (unsigned I = 0; I < 32; I += 8) {
3838 unsigned Sel = C2->extractBitsAsZExtValue(8, I);
3839 unsigned B = 0;
3840
3841 if (Sel >= 13)
3842 B = 0xff;
3843 else if (Sel == 12)
3844 B = 0x00;
3845 else {
3846 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
3847 if (!Src)
3848 ++NumUndefBytes;
3849 else if (Sel < 8)
3850 B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
3851 else
3852 B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
3853 }
3854
3855 Val.insertBits(B, I, 8);
3856 }
3857
3858 if (NumUndefBytes == 4)
3859 return UndefValue::get(Ty);
3860
3861 return ConstantInt::get(Ty, Val);
3862}
3863
3864static Constant *ConstantFoldScalarCall3(StringRef Name,
3865 Intrinsic::ID IntrinsicID,
3866 Type *Ty,
3868 const TargetLibraryInfo *TLI,
3869 const CallBase *Call) {
3870 assert(Operands.size() == 3 && "Wrong number of operands.");
3871
3872 if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
3873 if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
3874 if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
3875 const APFloat &C1 = Op1->getValueAPF();
3876 const APFloat &C2 = Op2->getValueAPF();
3877 const APFloat &C3 = Op3->getValueAPF();
3878
3879 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
3880 RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
3881 APFloat Res = C1;
3883 switch (IntrinsicID) {
3884 default:
3885 return nullptr;
3886 case Intrinsic::experimental_constrained_fma:
3887 case Intrinsic::experimental_constrained_fmuladd:
3888 St = Res.fusedMultiplyAdd(C2, C3, RM);
3889 break;
3890 }
3891 if (mayFoldConstrained(
3892 const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
3893 return ConstantFP::get(Ty->getContext(), Res);
3894 return nullptr;
3895 }
3896
3897 switch (IntrinsicID) {
3898 default: break;
3899 case Intrinsic::amdgcn_fma_legacy: {
3900 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3901 // NaN or infinity, gives +0.0.
3902 if (C1.isZero() || C2.isZero()) {
3903 // It's tempting to just return C3 here, but that would give the
3904 // wrong result if C3 was -0.0.
3905 return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
3906 }
3907 [[fallthrough]];
3908 }
3909 case Intrinsic::fma:
3910 case Intrinsic::fmuladd: {
3911 APFloat V = C1;
3913 return ConstantFP::get(Ty->getContext(), V);
3914 }
3915
3916 case Intrinsic::nvvm_fma_rm_f:
3917 case Intrinsic::nvvm_fma_rn_f:
3918 case Intrinsic::nvvm_fma_rp_f:
3919 case Intrinsic::nvvm_fma_rz_f:
3920 case Intrinsic::nvvm_fma_rm_d:
3921 case Intrinsic::nvvm_fma_rn_d:
3922 case Intrinsic::nvvm_fma_rp_d:
3923 case Intrinsic::nvvm_fma_rz_d:
3924 case Intrinsic::nvvm_fma_rm_ftz_f:
3925 case Intrinsic::nvvm_fma_rn_ftz_f:
3926 case Intrinsic::nvvm_fma_rp_ftz_f:
3927 case Intrinsic::nvvm_fma_rz_ftz_f: {
3928 bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID);
3929 APFloat A = IsFTZ ? FTZPreserveSign(C1) : C1;
3930 APFloat B = IsFTZ ? FTZPreserveSign(C2) : C2;
3931 APFloat C = IsFTZ ? FTZPreserveSign(C3) : C3;
3932
3933 APFloat::roundingMode RoundMode =
3934 nvvm::GetFMARoundingMode(IntrinsicID);
3935
3936 APFloat Res = A;
3937 APFloat::opStatus Status = Res.fusedMultiplyAdd(B, C, RoundMode);
3938
3939 if (!Res.isNaN() &&
3941 Res = IsFTZ ? FTZPreserveSign(Res) : Res;
3942 return ConstantFP::get(Ty->getContext(), Res);
3943 }
3944 return nullptr;
3945 }
3946
3947 case Intrinsic::amdgcn_cubeid:
3948 case Intrinsic::amdgcn_cubema:
3949 case Intrinsic::amdgcn_cubesc:
3950 case Intrinsic::amdgcn_cubetc: {
3951 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
3952 return ConstantFP::get(Ty->getContext(), V);
3953 }
3954 }
3955 }
3956 }
3957 }
3958
3959 if (IntrinsicID == Intrinsic::smul_fix ||
3960 IntrinsicID == Intrinsic::smul_fix_sat) {
3961 const APInt *C0, *C1;
3962 if (!getConstIntOrUndef(Operands[0], C0) ||
3963 !getConstIntOrUndef(Operands[1], C1))
3964 return nullptr;
3965
3966 // undef * C -> 0
3967 // C * undef -> 0
3968 if (!C0 || !C1)
3969 return Constant::getNullValue(Ty);
3970
3971 // This code performs rounding towards negative infinity in case the result
3972 // cannot be represented exactly for the given scale. Targets that do care
3973 // about rounding should use a target hook for specifying how rounding
3974 // should be done, and provide their own folding to be consistent with
3975 // rounding. This is the same approach as used by
3976 // DAGTypeLegalizer::ExpandIntRes_MULFIX.
3977 unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
3978 unsigned Width = C0->getBitWidth();
3979 assert(Scale < Width && "Illegal scale.");
3980 unsigned ExtendedWidth = Width * 2;
3981 APInt Product =
3982 (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
3983 if (IntrinsicID == Intrinsic::smul_fix_sat) {
3984 APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
3985 APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
3986 Product = APIntOps::smin(Product, Max);
3987 Product = APIntOps::smax(Product, Min);
3988 }
3989 return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
3990 }
3991
3992 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
3993 const APInt *C0, *C1, *C2;
3994 if (!getConstIntOrUndef(Operands[0], C0) ||
3995 !getConstIntOrUndef(Operands[1], C1) ||
3996 !getConstIntOrUndef(Operands[2], C2))
3997 return nullptr;
3998
3999 bool IsRight = IntrinsicID == Intrinsic::fshr;
4000 if (!C2)
4001 return Operands[IsRight ? 1 : 0];
4002 if (!C0 && !C1)
4003 return UndefValue::get(Ty);
4004
4005 // The shift amount is interpreted as modulo the bitwidth. If the shift
4006 // amount is effectively 0, avoid UB due to oversized inverse shift below.
4007 unsigned BitWidth = C2->getBitWidth();
4008 unsigned ShAmt = C2->urem(BitWidth);
4009 if (!ShAmt)
4010 return Operands[IsRight ? 1 : 0];
4011
4012 // (C0 << ShlAmt) | (C1 >> LshrAmt)
4013 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
4014 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
4015 if (!C0)
4016 return ConstantInt::get(Ty, C1->lshr(LshrAmt));
4017 if (!C1)
4018 return ConstantInt::get(Ty, C0->shl(ShlAmt));
4019 return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
4020 }
4021
4022 if (IntrinsicID == Intrinsic::amdgcn_perm)
4023 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
4024
4025 return nullptr;
4026}
4027
4028static Constant *ConstantFoldScalarCall(StringRef Name,
4029 Intrinsic::ID IntrinsicID,
4030 Type *Ty,
4032 const TargetLibraryInfo *TLI,
4033 const CallBase *Call) {
4034 if (IntrinsicID != Intrinsic::not_intrinsic &&
4036 intrinsicPropagatesPoison(IntrinsicID))
4037 return PoisonValue::get(Ty);
4038
4039 if (Operands.size() == 1)
4040 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
4041
4042 if (Operands.size() == 2) {
4043 if (Constant *FoldedLibCall =
4044 ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {
4045 return FoldedLibCall;
4046 }
4047 return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);
4048 }
4049
4050 if (Operands.size() == 3)
4051 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
4052
4053 return nullptr;
4054}
4055
4056static Constant *ConstantFoldFixedVectorCall(
4057 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
4059 const TargetLibraryInfo *TLI, const CallBase *Call) {
4062 Type *Ty = FVTy->getElementType();
4063
4064 switch (IntrinsicID) {
4065 case Intrinsic::masked_load: {
4066 auto *SrcPtr = Operands[0];
4067 auto *Mask = Operands[2];
4068 auto *Passthru = Operands[3];
4069
4070 Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
4071
4072 SmallVector<Constant *, 32> NewElements;
4073 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4074 auto *MaskElt = Mask->getAggregateElement(I);
4075 if (!MaskElt)
4076 break;
4077 auto *PassthruElt = Passthru->getAggregateElement(I);
4078 auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
4079 if (isa<UndefValue>(MaskElt)) {
4080 if (PassthruElt)
4081 NewElements.push_back(PassthruElt);
4082 else if (VecElt)
4083 NewElements.push_back(VecElt);
4084 else
4085 return nullptr;
4086 }
4087 if (MaskElt->isNullValue()) {
4088 if (!PassthruElt)
4089 return nullptr;
4090 NewElements.push_back(PassthruElt);
4091 } else if (MaskElt->isOneValue()) {
4092 if (!VecElt)
4093 return nullptr;
4094 NewElements.push_back(VecElt);
4095 } else {
4096 return nullptr;
4097 }
4098 }
4099 if (NewElements.size() != FVTy->getNumElements())
4100 return nullptr;
4101 return ConstantVector::get(NewElements);
4102 }
4103 case Intrinsic::arm_mve_vctp8:
4104 case Intrinsic::arm_mve_vctp16:
4105 case Intrinsic::arm_mve_vctp32:
4106 case Intrinsic::arm_mve_vctp64: {
4107 if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
4108 unsigned Lanes = FVTy->getNumElements();
4109 uint64_t Limit = Op->getZExtValue();
4110
4112 for (unsigned i = 0; i < Lanes; i++) {
4113 if (i < Limit)
4115 else
4117 }
4118 return ConstantVector::get(NCs);
4119 }
4120 return nullptr;
4121 }
4122 case Intrinsic::get_active_lane_mask: {
4123 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
4124 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
4125 if (Op0 && Op1) {
4126 unsigned Lanes = FVTy->getNumElements();
4127 uint64_t Base = Op0->getZExtValue();
4128 uint64_t Limit = Op1->getZExtValue();
4129
4131 for (unsigned i = 0; i < Lanes; i++) {
4132 if (Base + i < Limit)
4134 else
4136 }
4137 return ConstantVector::get(NCs);
4138 }
4139 return nullptr;
4140 }
4141 case Intrinsic::vector_extract: {
4142 auto *Idx = dyn_cast<ConstantInt>(Operands[1]);
4143 Constant *Vec = Operands[0];
4144 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4145 return nullptr;
4146
4147 unsigned NumElements = FVTy->getNumElements();
4148 unsigned VecNumElements =
4149 cast<FixedVectorType>(Vec->getType())->getNumElements();
4150 unsigned StartingIndex = Idx->getZExtValue();
4151
4152 // Extracting entire vector is nop
4153 if (NumElements == VecNumElements && StartingIndex == 0)
4154 return Vec;
4155
4156 for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
4157 ++I) {
4158 Constant *Elt = Vec->getAggregateElement(I);
4159 if (!Elt)
4160 return nullptr;
4161 Result[I - StartingIndex] = Elt;
4162 }
4163
4164 return ConstantVector::get(Result);
4165 }
4166 case Intrinsic::vector_insert: {
4167 Constant *Vec = Operands[0];
4168 Constant *SubVec = Operands[1];
4169 auto *Idx = dyn_cast<ConstantInt>(Operands[2]);
4170 if (!Idx || !isa<FixedVectorType>(Vec->getType()))
4171 return nullptr;
4172
4173 unsigned SubVecNumElements =
4174 cast<FixedVectorType>(SubVec->getType())->getNumElements();
4175 unsigned VecNumElements =
4176 cast<FixedVectorType>(Vec->getType())->getNumElements();
4177 unsigned IdxN = Idx->getZExtValue();
4178 // Replacing entire vector with a subvec is nop
4179 if (SubVecNumElements == VecNumElements && IdxN == 0)
4180 return SubVec;
4181
4182 for (unsigned I = 0; I < VecNumElements; ++I) {
4183 Constant *Elt;
4184 if (I < IdxN + SubVecNumElements)
4185 Elt = SubVec->getAggregateElement(I - IdxN);
4186 else
4187 Elt = Vec->getAggregateElement(I);
4188 if (!Elt)
4189 return nullptr;
4190 Result[I] = Elt;
4191 }
4192 return ConstantVector::get(Result);
4193 }
4194 case Intrinsic::vector_interleave2: {
4195 unsigned NumElements =
4196 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4197 for (unsigned I = 0; I < NumElements; ++I) {
4198 Constant *Elt0 = Operands[0]->getAggregateElement(I);
4199 Constant *Elt1 = Operands[1]->getAggregateElement(I);
4200 if (!Elt0 || !Elt1)
4201 return nullptr;
4202 Result[2 * I] = Elt0;
4203 Result[2 * I + 1] = Elt1;
4204 }
4205 return ConstantVector::get(Result);
4206 }
4207 case Intrinsic::wasm_dot: {
4208 unsigned NumElements =
4209 cast<FixedVectorType>(Operands[0]->getType())->getNumElements();
4210
4211 assert(NumElements == 8 && Result.size() == 4 &&
4212 "wasm dot takes i16x8 and produces i32x4");
4213 assert(Ty->isIntegerTy());
4214 int32_t MulVector[8];
4215
4216 for (unsigned I = 0; I < NumElements; ++I) {
4217 ConstantInt *Elt0 =
4218 cast<ConstantInt>(Operands[0]->getAggregateElement(I));
4219 ConstantInt *Elt1 =
4220 cast<ConstantInt>(Operands[1]->getAggregateElement(I));
4221
4222 MulVector[I] = Elt0->getSExtValue() * Elt1->getSExtValue();
4223 }
4224 for (unsigned I = 0; I < Result.size(); I++) {
4225 int64_t IAdd = (int64_t)MulVector[I * 2] + (int64_t)MulVector[I * 2 + 1];
4226 Result[I] = ConstantInt::get(Ty, IAdd);
4227 }
4228
4229 return ConstantVector::get(Result);
4230 }
4231 default:
4232 break;
4233 }
4234
4235 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4236 // Gather a column of constants.
4237 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
4238 // Some intrinsics use a scalar type for certain arguments.
4239 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J, /*TTI=*/nullptr)) {
4240 Lane[J] = Operands[J];
4241 continue;
4242 }
4243
4244 Constant *Agg = Operands[J]->getAggregateElement(I);
4245 if (!Agg)
4246 return nullptr;
4247
4248 Lane[J] = Agg;
4249 }
4250
4251 // Use the regular scalar folding to simplify this column.
4252 Constant *Folded =
4253 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
4254 if (!Folded)
4255 return nullptr;
4256 Result[I] = Folded;
4257 }
4258
4259 return ConstantVector::get(Result);
4260}
4261
4262static Constant *ConstantFoldScalableVectorCall(
4263 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
4265 const TargetLibraryInfo *TLI, const CallBase *Call) {
4266 switch (IntrinsicID) {
4267 case Intrinsic::aarch64_sve_convert_from_svbool: {
4268 auto *Src = dyn_cast<Constant>(Operands[0]);
4269 if (!Src || !Src->isNullValue())
4270 break;
4271
4272 return ConstantInt::getFalse(SVTy);
4273 }
4274 case Intrinsic::get_active_lane_mask: {
4275 auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
4276 auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
4277 if (Op0 && Op1 && Op0->getValue().uge(Op1->getValue()))
4278 return ConstantVector::getNullValue(SVTy);
4279 break;
4280 }
4281 default:
4282 break;
4283 }
4284
4285 // If trivially vectorizable, try folding it via the scalar call if all
4286 // operands are splats.
4287
4288 // TODO: ConstantFoldFixedVectorCall should probably check this too?
4289 if (!isTriviallyVectorizable(IntrinsicID))
4290 return nullptr;
4291
4293 for (auto [I, Op] : enumerate(Operands)) {
4294 if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, I, /*TTI=*/nullptr)) {
4295 SplatOps.push_back(Op);
4296 continue;
4297 }
4298 Constant *Splat = Op->getSplatValue();
4299 if (!Splat)
4300 return nullptr;
4301 SplatOps.push_back(Splat);
4302 }
4303 Constant *Folded = ConstantFoldScalarCall(
4304 Name, IntrinsicID, SVTy->getElementType(), SplatOps, TLI, Call);
4305 if (!Folded)
4306 return nullptr;
4307 return ConstantVector::getSplat(SVTy->getElementCount(), Folded);
4308}
4309
4310static std::pair<Constant *, Constant *>
4311ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {
4312 if (isa<PoisonValue>(Op))
4313 return {Op, PoisonValue::get(IntTy)};
4314
4315 auto *ConstFP = dyn_cast<ConstantFP>(Op);
4316 if (!ConstFP)
4317 return {};
4318
4319 const APFloat &U = ConstFP->getValueAPF();
4320 int FrexpExp;
4321 APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);
4322 Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);
4323
4324 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid
4325 // using undef.
4326 Constant *Result1 = FrexpMant.isFinite()
4327 ? ConstantInt::getSigned(IntTy, FrexpExp)
4328 : ConstantInt::getNullValue(IntTy);
4329 return {Result0, Result1};
4330}
4331
4332/// Handle intrinsics that return tuples, which may be tuples of vectors.
4333static Constant *
4334ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
4336 const DataLayout &DL, const TargetLibraryInfo *TLI,
4337 const CallBase *Call) {
4338
4339 switch (IntrinsicID) {
4340 case Intrinsic::frexp: {
4341 Type *Ty0 = StTy->getContainedType(0);
4342 Type *Ty1 = StTy->getContainedType(1)->getScalarType();
4343
4344 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {
4345 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());
4346 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());
4347
4348 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {
4349 Constant *Lane = Operands[0]->getAggregateElement(I);
4350 std::tie(Results0[I], Results1[I]) =
4351 ConstantFoldScalarFrexpCall(Lane, Ty1);
4352 if (!Results0[I])
4353 return nullptr;
4354 }
4355
4356 return ConstantStruct::get(StTy, ConstantVector::get(Results0),
4357 ConstantVector::get(Results1));
4358 }
4359
4360 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);
4361 if (!Result0)
4362 return nullptr;
4363 return ConstantStruct::get(StTy, Result0, Result1);
4364 }
4365 case Intrinsic::sincos: {
4366 Type *Ty = StTy->getContainedType(0);
4367 Type *TyScalar = Ty->getScalarType();
4368
4369 auto ConstantFoldScalarSincosCall =
4370 [&](Constant *Op) -> std::pair<Constant *, Constant *> {
4371 Constant *SinResult =
4372 ConstantFoldScalarCall(Name, Intrinsic::sin, TyScalar, Op, TLI, Call);
4373 Constant *CosResult =
4374 ConstantFoldScalarCall(Name, Intrinsic::cos, TyScalar, Op, TLI, Call);
4375 return std::make_pair(SinResult, CosResult);
4376 };
4377
4378 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
4379 SmallVector<Constant *> SinResults(FVTy->getNumElements());
4380 SmallVector<Constant *> CosResults(FVTy->getNumElements());
4381
4382 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4383 Constant *Lane = Operands[0]->getAggregateElement(I);
4384 std::tie(SinResults[I], CosResults[I]) =
4385 ConstantFoldScalarSincosCall(Lane);
4386 if (!SinResults[I] || !CosResults[I])
4387 return nullptr;
4388 }
4389
4390 return ConstantStruct::get(StTy, ConstantVector::get(SinResults),
4391 ConstantVector::get(CosResults));
4392 }
4393
4394 auto [SinResult, CosResult] = ConstantFoldScalarSincosCall(Operands[0]);
4395 if (!SinResult || !CosResult)
4396 return nullptr;
4397 return ConstantStruct::get(StTy, SinResult, CosResult);
4398 }
4399 case Intrinsic::vector_deinterleave2: {
4400 auto *Vec = Operands[0];
4401 auto *VecTy = cast<VectorType>(Vec->getType());
4402
4403 if (auto *EltC = Vec->getSplatValue()) {
4404 ElementCount HalfEC = VecTy->getElementCount().divideCoefficientBy(2);
4405 auto *HalfVec = ConstantVector::getSplat(HalfEC, EltC);
4406 return ConstantStruct::get(StTy, HalfVec, HalfVec);
4407 }
4408
4409 if (!isa<FixedVectorType>(Vec->getType()))
4410 return nullptr;
4411
4412 unsigned NumElements = VecTy->getElementCount().getFixedValue() / 2;
4413 SmallVector<Constant *, 4> Res0(NumElements), Res1(NumElements);
4414 for (unsigned I = 0; I < NumElements; ++I) {
4415 Constant *Elt0 = Vec->getAggregateElement(2 * I);
4416 Constant *Elt1 = Vec->getAggregateElement(2 * I + 1);
4417 if (!Elt0 || !Elt1)
4418 return nullptr;
4419 Res0[I] = Elt0;
4420 Res1[I] = Elt1;
4421 }
4422 return ConstantStruct::get(StTy, ConstantVector::get(Res0),
4423 ConstantVector::get(Res1));
4424 }
4425 default:
4426 // TODO: Constant folding of vector intrinsics that fall through here does
4427 // not work (e.g. overflow intrinsics)
4428 return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);
4429 }
4430
4431 return nullptr;
4432}
4433
4434} // end anonymous namespace
4435
4437 Constant *RHS, Type *Ty,
4440 // Ensure we check flags like StrictFP that might prevent this from getting
4441 // folded before generating a result.
4442 if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
4443 return nullptr;
4444 return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
4445}
4446
4449 const TargetLibraryInfo *TLI,
4450 bool AllowNonDeterministic) {
4451 if (Call->isNoBuiltin())
4452 return nullptr;
4453 if (!F->hasName())
4454 return nullptr;
4455
4456 // If this is not an intrinsic and not recognized as a library call, bail out.
4457 Intrinsic::ID IID = F->getIntrinsicID();
4458 if (IID == Intrinsic::not_intrinsic) {
4459 if (!TLI)
4460 return nullptr;
4461 LibFunc LibF;
4462 if (!TLI->getLibFunc(*F, LibF))
4463 return nullptr;
4464 }
4465
4466 // Conservatively assume that floating-point libcalls may be
4467 // non-deterministic.
4468 Type *Ty = F->getReturnType();
4469 if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
4470 return nullptr;
4471
4472 StringRef Name = F->getName();
4473 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
4474 return ConstantFoldFixedVectorCall(
4475 Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call);
4476
4477 if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
4478 return ConstantFoldScalableVectorCall(
4479 Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call);
4480
4481 if (auto *StTy = dyn_cast<StructType>(Ty))
4482 return ConstantFoldStructCall(Name, IID, StTy, Operands,
4483 F->getDataLayout(), TLI, Call);
4484
4485 // TODO: If this is a library function, we already discovered that above,
4486 // so we should pass the LibFunc, not the name (and it might be better
4487 // still to separate intrinsic handling from libcalls).
4488 return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);
4489}
4490
4492 const TargetLibraryInfo *TLI) {
4493 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
4494 // (and to some extent ConstantFoldScalarCall).
4495 if (Call->isNoBuiltin() || Call->isStrictFP())
4496 return false;
4497 Function *F = Call->getCalledFunction();
4498 if (!F)
4499 return false;
4500
4501 LibFunc Func;
4502 if (!TLI || !TLI->getLibFunc(*F, Func))
4503 return false;
4504
4505 if (Call->arg_size() == 1) {
4506 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
4507 const APFloat &Op = OpC->getValueAPF();
4508 switch (Func) {
4509 case LibFunc_logl:
4510 case LibFunc_log:
4511 case LibFunc_logf:
4512 case LibFunc_log2l:
4513 case LibFunc_log2:
4514 case LibFunc_log2f:
4515 case LibFunc_log10l:
4516 case LibFunc_log10:
4517 case LibFunc_log10f:
4518 return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
4519
4520 case LibFunc_ilogb:
4521 return !Op.isNaN() && !Op.isZero() && !Op.isInfinity();
4522
4523 case LibFunc_expl:
4524 case LibFunc_exp:
4525 case LibFunc_expf:
4526 // FIXME: These boundaries are slightly conservative.
4527 if (OpC->getType()->isDoubleTy())
4528 return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
4529 if (OpC->getType()->isFloatTy())
4530 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
4531 break;
4532
4533 case LibFunc_exp2l:
4534 case LibFunc_exp2:
4535 case LibFunc_exp2f:
4536 // FIXME: These boundaries are slightly conservative.
4537 if (OpC->getType()->isDoubleTy())
4538 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
4539 if (OpC->getType()->isFloatTy())
4540 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
4541 break;
4542
4543 case LibFunc_sinl:
4544 case LibFunc_sin:
4545 case LibFunc_sinf:
4546 case LibFunc_cosl:
4547 case LibFunc_cos:
4548 case LibFunc_cosf:
4549 return !Op.isInfinity();
4550
4551 case LibFunc_tanl:
4552 case LibFunc_tan:
4553 case LibFunc_tanf: {
4554 // FIXME: Stop using the host math library.
4555 // FIXME: The computation isn't done in the right precision.
4556 Type *Ty = OpC->getType();
4557 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
4558 return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
4559 break;
4560 }
4561
4562 case LibFunc_atan:
4563 case LibFunc_atanf:
4564 case LibFunc_atanl:
4565 // Per POSIX, this MAY fail if Op is denormal. We choose not failing.
4566 return true;
4567
4568 case LibFunc_asinl:
4569 case LibFunc_asin:
4570 case LibFunc_asinf:
4571 case LibFunc_acosl:
4572 case LibFunc_acos:
4573 case LibFunc_acosf:
4574 return !(Op < APFloat::getOne(Op.getSemantics(), true) ||
4575 Op > APFloat::getOne(Op.getSemantics()));
4576
4577 case LibFunc_sinh:
4578 case LibFunc_cosh:
4579 case LibFunc_sinhf:
4580 case LibFunc_coshf:
4581 case LibFunc_sinhl:
4582 case LibFunc_coshl:
4583 // FIXME: These boundaries are slightly conservative.
4584 if (OpC->getType()->isDoubleTy())
4585 return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
4586 if (OpC->getType()->isFloatTy())
4587 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
4588 break;
4589
4590 case LibFunc_sqrtl:
4591 case LibFunc_sqrt:
4592 case LibFunc_sqrtf:
4593 return Op.isNaN() || Op.isZero() || !Op.isNegative();
4594
4595 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
4596 // maybe others?
4597 default:
4598 break;
4599 }
4600 }
4601 }
4602
4603 if (Call->arg_size() == 2) {
4604 ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
4605 ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
4606 if (Op0C && Op1C) {
4607 const APFloat &Op0 = Op0C->getValueAPF();
4608 const APFloat &Op1 = Op1C->getValueAPF();
4609
4610 switch (Func) {
4611 case LibFunc_powl:
4612 case LibFunc_pow:
4613 case LibFunc_powf: {
4614 // FIXME: Stop using the host math library.
4615 // FIXME: The computation isn't done in the right precision.
4616 Type *Ty = Op0C->getType();
4617 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
4618 if (Ty == Op1C->getType())
4619 return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
4620 }
4621 break;
4622 }
4623
4624 case LibFunc_fmodl:
4625 case LibFunc_fmod:
4626 case LibFunc_fmodf:
4627 case LibFunc_remainderl:
4628 case LibFunc_remainder:
4629 case LibFunc_remainderf:
4630 return Op0.isNaN() || Op1.isNaN() ||
4631 (!Op0.isInfinity() && !Op1.isZero());
4632
4633 case LibFunc_atan2:
4634 case LibFunc_atan2f:
4635 case LibFunc_atan2l:
4636 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and
4637 // GLIBC and MSVC do not appear to raise an error on those, we
4638 // cannot rely on that behavior. POSIX and C11 say that a domain error
4639 // may occur, so allow for that possibility.
4640 return !Op0.isZero() || !Op1.isZero();
4641
4642 default:
4643 break;
4644 }
4645 }
4646 }
4647
4648 return false;
4649}
4650
4652 unsigned CastOp, const DataLayout &DL,
4653 PreservedCastFlags *Flags) {
4654 switch (CastOp) {
4655 case Instruction::BitCast:
4656 // Bitcast is always lossless.
4657 return ConstantFoldCastOperand(Instruction::BitCast, C, InvCastTo, DL);
4658 case Instruction::Trunc: {
4659 auto *ZExtC = ConstantFoldCastOperand(Instruction::ZExt, C, InvCastTo, DL);
4660 if (Flags) {
4661 // Truncation back on ZExt value is always NUW.
4662 Flags->NUW = true;
4663 // Test positivity of C.
4664 auto *SExtC =
4665 ConstantFoldCastOperand(Instruction::SExt, C, InvCastTo, DL);
4666 Flags->NSW = ZExtC == SExtC;
4667 }
4668 return ZExtC;
4669 }
4670 case Instruction::SExt:
4671 case Instruction::ZExt: {
4672 auto *InvC = ConstantExpr::getTrunc(C, InvCastTo);
4673 auto *CastInvC = ConstantFoldCastOperand(CastOp, InvC, C->getType(), DL);
4674 // Must satisfy CastOp(InvC) == C.
4675 if (!CastInvC || CastInvC != C)
4676 return nullptr;
4677 if (Flags && CastOp == Instruction::ZExt) {
4678 auto *SExtInvC =
4679 ConstantFoldCastOperand(Instruction::SExt, InvC, C->getType(), DL);
4680 // Test positivity of InvC.
4681 Flags->NNeg = CastInvC == SExtInvC;
4682 }
4683 return InvC;
4684 }
4685 default:
4686 return nullptr;
4687 }
4688}
4689
4691 const DataLayout &DL,
4692 PreservedCastFlags *Flags) {
4693 return getLosslessInvCast(C, DestTy, Instruction::ZExt, DL, Flags);
4694}
4695
4697 const DataLayout &DL,
4698 PreservedCastFlags *Flags) {
4699 return getLosslessInvCast(C, DestTy, Instruction::SExt, DL, Flags);
4700}
4701
4702void TargetFolder::anchor() {}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Constant * FoldBitCast(Constant *V, Type *DestTy)
static ConstantFP * flushDenormalConstant(Type *Ty, const APFloat &APF, DenormalMode::DenormalModeKind Mode)
Constant * getConstantAtOffset(Constant *Base, APInt Offset, const DataLayout &DL)
If this Offset points exactly to the start of an aggregate element, return that element,...
static cl::opt< bool > DisableFPCallFolding("disable-fp-call-folding", cl::desc("Disable constant-folding of FP intrinsics and libcalls."), cl::init(false), cl::Hidden)
static ConstantFP * flushDenormalConstantFP(ConstantFP *CFP, const Instruction *Inst, bool IsOutput)
static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty)
Return the denormal mode that can be assumed when executing a floating point operation at CtxI.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
amode Optimize addressing mode
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
static bool InRange(int64_t Value, unsigned short Shift, int LBound, int HBound)
This file contains the definitions of the enumerations and flags associated with NVVM Intrinsics,...
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
The Input class is used to parse a yaml document into in-memory structs and vectors.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1120
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1208
void copySign(const APFloat &RHS)
Definition APFloat.h:1302
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
opStatus subtract(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isNegative() const
Definition APFloat.h:1449
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6115
bool isPosInfinity() const
Definition APFloat.h:1462
bool isNormal() const
Definition APFloat.h:1453
bool isDenormal() const
Definition APFloat.h:1450
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1181
const fltSemantics & getSemantics() const
Definition APFloat.h:1457
bool isNonZero() const
Definition APFloat.h:1458
bool isFinite() const
Definition APFloat.h:1454
bool isNaN() const
Definition APFloat.h:1447
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1088
opStatus multiply(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1199
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6143
bool isSignaling() const
Definition APFloat.h:1451
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1235
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1226
bool isNegInfinity() const
Definition APFloat.h:1463
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1248
void changeSign()
Definition APFloat.h:1297
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1079
bool isInfinity() const
Definition APFloat.h:1446
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
LLVM_ABI APInt usub_sat(const APInt &RHS) const
Definition APInt.cpp:2055
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:520
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
LLVM_ABI APInt sadd_sat(const APInt &RHS) const
Definition APInt.cpp:2026
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2036
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:873
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
LLVM_ABI APInt ssub_sat(const APInt &RHS) const
Definition APInt.cpp:2045
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static bool isFPPredicate(Predicate P)
Definition InstrTypes.h:770
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI bool isDesirableCastOp(unsigned Opcode)
Whether creating a constant expression for this cast is desirable.
static LLVM_ABI Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getInsertElement(Constant *Vec, Constant *Elt, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static bool isSupportedGetElementPtr(const Type *SrcElemTy)
Whether creating a constant expression for this getelementptr type is supported.
Definition Constants.h:1387
static LLVM_ABI Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
static LLVM_ABI bool isDesirableBinOp(unsigned Opcode)
Whether creating a constant expression for this binary operator is desirable.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition Constants.h:1274
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValueAPF() const
Definition Constants.h:320
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
Constrained floating point compare intrinsics.
This is the common base class for constrained floating point intrinsics.
LLVM_ABI std::optional< fp::ExceptionBehavior > getExceptionBehavior() const
LLVM_ABI std::optional< RoundingMode > getRoundingMode() const
Wrapper for a function that represents a value that functionally represents the original function.
Definition Constants.h:952
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
static LLVM_ABI bool compare(const APFloat &LHS, const APFloat &RHS, FCmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
This provides a helper for copying FMF from an instruction or setting specified flags.
Definition IRBuilder.h:93
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
Definition Function.cpp:803
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
GEPNoWrapFlags withoutNoUnsignedSignedWrap() const
static GEPNoWrapFlags noUnsignedWrap()
bool hasNoUnsignedSignedWrap() const
bool isInBounds() const
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
PointerType * getType() const
Global values are always pointers.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
bool hasDefinitiveInitializer() const
hasDefinitiveInitializer - Whether the global variable has an initializer, and any other instances of...
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
bool isCast() const
bool isBinaryOp() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isUnaryOp() const
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
static APInt getSaturationPoint(Intrinsic::ID ID, unsigned numBits)
Min/max intrinsics are monotonic, they operate on a fixed-bitwidth values, so there is a certain thre...
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Class to represent scalable SIMD vectors.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:712
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
Class to represent struct types.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ HalfTyID
16-bit floating point type
Definition Type.h:56
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:296
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:200
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:381
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:238
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
@ ebStrict
This corresponds to "fpexcept.strict".
Definition FPEnv.h:42
@ ebIgnore
This corresponds to "fpexcept.ignore".
Definition FPEnv.h:40
constexpr double pi
Definition MathExtras.h:53
APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID)
DenormalMode GetNVVMDenormMode(bool ShouldFTZ)
bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFDivRoundingMode(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID)
bool RCPShouldFTZ(Intrinsic::ID IntrinsicID)
bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FDivShouldFTZ(Intrinsic::ID IntrinsicID)
bool FAddShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFMulRoundingMode(Intrinsic::ID IntrinsicID)
bool UnaryMathIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetFAddRoundingMode(Intrinsic::ID IntrinsicID)
bool FMAShouldFTZ(Intrinsic::ID IntrinsicID)
bool FMulShouldFTZ(Intrinsic::ID IntrinsicID)
APFloat::roundingMode GetRCPRoundingMode(Intrinsic::ID IntrinsicID)
bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
LLVM_ABI Constant * ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, Constant *RHS, Type *Ty, Instruction *FMFSource)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
LLVM_ABI Constant * ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL)
ConstantFoldLoadThroughBitcast - try to cast constant to destination type returning null if unsuccess...
static double log2(double V)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
LLVM_ABI Constant * ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL, const Instruction *I, bool AllowNonDeterministic=true)
Attempt to constant fold a floating point binary operation with the specified operands,...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
unsigned getPointerAddressSpace(const Type *T)
Definition SPIRVUtils.h:345
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1563
LLVM_ABI Constant * ConstantFoldCompareInstruction(CmpInst::Predicate Predicate, Constant *C1, Constant *C2)
LLVM_ABI Constant * ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V)
LLVM_ABI bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL, DSOLocalEquivalent **DSOEquiv=nullptr)
If this constant is a constant offset from a global, return the global and the constant.
LLVM_ABI bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI)
Check whether the given call has no side-effects.
LLVM_ABI Constant * ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset)
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:733
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1643
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1534
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:677
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1555
LLVM_ABI Constant * ConstantFoldExtractValueInstruction(Constant *Agg, ArrayRef< unsigned > Idxs)
Attempt to constant fold an extractvalue instruction with the specified operands and indices.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1598
LLVM_ABI Constant * ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, const DataLayout &DL)
If C is a uniform value where all bits are the same (either all zero, all ones, all undef or all pois...
LLVM_ABI Constant * ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL)
Attempt to constant fold a unary operation with the specified operand.
LLVM_ABI Constant * FlushFPConstant(Constant *Operand, const Instruction *I, bool IsOutput)
Attempt to flush float point constant according to denormal mode set in the instruction's parent func...
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1629
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1543
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool intrinsicPropagatesPoison(Intrinsic::ID IID)
Return whether this intrinsic propagates poison for all operands.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1579
@ Sub
Subtraction of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ NearestTiesToEven
roundTiesToEven.
@ Dynamic
Denotes mode unknown at compile time.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1616
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1656
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:831
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:309
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:307
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:302
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:306
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
DenormalModeKind
Represent handled modes for denormal (aka subnormal) modes in the floating point environment.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ Dynamic
Denormals have unknown treatment.
@ IEEE
IEEE-754 denormal numbers preserved.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
static constexpr DenormalMode getDynamic()
static constexpr DenormalMode getIEEE()
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60