| File: | build/source/llvm/lib/Analysis/ConstantFolding.cpp |
| Warning: | line 707, column 39 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===-- ConstantFolding.cpp - Fold instructions into constants ------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file defines routines for folding instructions into constants. | |||
| 10 | // | |||
| 11 | // Also, to supplement the basic IR ConstantExpr simplifications, | |||
| 12 | // this file defines some additional folding routines that can make use of | |||
| 13 | // DataLayout information. These functions cannot go in IR due to library | |||
| 14 | // dependency issues. | |||
| 15 | // | |||
| 16 | //===----------------------------------------------------------------------===// | |||
| 17 | ||||
| 18 | #include "llvm/Analysis/ConstantFolding.h" | |||
| 19 | #include "llvm/ADT/APFloat.h" | |||
| 20 | #include "llvm/ADT/APInt.h" | |||
| 21 | #include "llvm/ADT/APSInt.h" | |||
| 22 | #include "llvm/ADT/ArrayRef.h" | |||
| 23 | #include "llvm/ADT/DenseMap.h" | |||
| 24 | #include "llvm/ADT/STLExtras.h" | |||
| 25 | #include "llvm/ADT/SmallVector.h" | |||
| 26 | #include "llvm/ADT/StringRef.h" | |||
| 27 | #include "llvm/Analysis/TargetFolder.h" | |||
| 28 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
| 29 | #include "llvm/Analysis/ValueTracking.h" | |||
| 30 | #include "llvm/Analysis/VectorUtils.h" | |||
| 31 | #include "llvm/Config/config.h" | |||
| 32 | #include "llvm/IR/Constant.h" | |||
| 33 | #include "llvm/IR/ConstantFold.h" | |||
| 34 | #include "llvm/IR/Constants.h" | |||
| 35 | #include "llvm/IR/DataLayout.h" | |||
| 36 | #include "llvm/IR/DerivedTypes.h" | |||
| 37 | #include "llvm/IR/Function.h" | |||
| 38 | #include "llvm/IR/GlobalValue.h" | |||
| 39 | #include "llvm/IR/GlobalVariable.h" | |||
| 40 | #include "llvm/IR/InstrTypes.h" | |||
| 41 | #include "llvm/IR/Instruction.h" | |||
| 42 | #include "llvm/IR/Instructions.h" | |||
| 43 | #include "llvm/IR/IntrinsicInst.h" | |||
| 44 | #include "llvm/IR/Intrinsics.h" | |||
| 45 | #include "llvm/IR/IntrinsicsAArch64.h" | |||
| 46 | #include "llvm/IR/IntrinsicsAMDGPU.h" | |||
| 47 | #include "llvm/IR/IntrinsicsARM.h" | |||
| 48 | #include "llvm/IR/IntrinsicsWebAssembly.h" | |||
| 49 | #include "llvm/IR/IntrinsicsX86.h" | |||
| 50 | #include "llvm/IR/Operator.h" | |||
| 51 | #include "llvm/IR/Type.h" | |||
| 52 | #include "llvm/IR/Value.h" | |||
| 53 | #include "llvm/Support/Casting.h" | |||
| 54 | #include "llvm/Support/ErrorHandling.h" | |||
| 55 | #include "llvm/Support/KnownBits.h" | |||
| 56 | #include "llvm/Support/MathExtras.h" | |||
| 57 | #include <cassert> | |||
| 58 | #include <cerrno> | |||
| 59 | #include <cfenv> | |||
| 60 | #include <cmath> | |||
| 61 | #include <cstdint> | |||
| 62 | ||||
| 63 | using namespace llvm; | |||
| 64 | ||||
| 65 | namespace { | |||
| 66 | ||||
| 67 | //===----------------------------------------------------------------------===// | |||
| 68 | // Constant Folding internal helper functions | |||
| 69 | //===----------------------------------------------------------------------===// | |||
| 70 | ||||
| 71 | static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, | |||
| 72 | Constant *C, Type *SrcEltTy, | |||
| 73 | unsigned NumSrcElts, | |||
| 74 | const DataLayout &DL) { | |||
| 75 | // Now that we know that the input value is a vector of integers, just shift | |||
| 76 | // and insert them into our result. | |||
| 77 | unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); | |||
| 78 | for (unsigned i = 0; i != NumSrcElts; ++i) { | |||
| 79 | Constant *Element; | |||
| 80 | if (DL.isLittleEndian()) | |||
| 81 | Element = C->getAggregateElement(NumSrcElts - i - 1); | |||
| 82 | else | |||
| 83 | Element = C->getAggregateElement(i); | |||
| 84 | ||||
| 85 | if (Element && isa<UndefValue>(Element)) { | |||
| 86 | Result <<= BitShift; | |||
| 87 | continue; | |||
| 88 | } | |||
| 89 | ||||
| 90 | auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); | |||
| 91 | if (!ElementCI) | |||
| 92 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 93 | ||||
| 94 | Result <<= BitShift; | |||
| 95 | Result |= ElementCI->getValue().zext(Result.getBitWidth()); | |||
| 96 | } | |||
| 97 | ||||
| 98 | return nullptr; | |||
| 99 | } | |||
| 100 | ||||
| 101 | /// Constant fold bitcast, symbolically evaluating it with DataLayout. | |||
| 102 | /// This always returns a non-null constant, but it may be a | |||
| 103 | /// ConstantExpr if unfoldable. | |||
| 104 | Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { | |||
| 105 | assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&(static_cast <bool> (CastInst::castIsValid(Instruction:: BitCast, C, DestTy) && "Invalid constantexpr bitcast!" ) ? void (0) : __assert_fail ("CastInst::castIsValid(Instruction::BitCast, C, DestTy) && \"Invalid constantexpr bitcast!\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 106, __extension__ __PRETTY_FUNCTION__)) | |||
| 106 | "Invalid constantexpr bitcast!")(static_cast <bool> (CastInst::castIsValid(Instruction:: BitCast, C, DestTy) && "Invalid constantexpr bitcast!" ) ? void (0) : __assert_fail ("CastInst::castIsValid(Instruction::BitCast, C, DestTy) && \"Invalid constantexpr bitcast!\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 106, __extension__ __PRETTY_FUNCTION__)); | |||
| 107 | ||||
| 108 | // Catch the obvious splat cases. | |||
| 109 | if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) | |||
| 110 | return Res; | |||
| 111 | ||||
| 112 | if (auto *VTy = dyn_cast<VectorType>(C->getType())) { | |||
| 113 | // Handle a vector->scalar integer/fp cast. | |||
| 114 | if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) { | |||
| 115 | unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements(); | |||
| 116 | Type *SrcEltTy = VTy->getElementType(); | |||
| 117 | ||||
| 118 | // If the vector is a vector of floating point, convert it to vector of int | |||
| 119 | // to simplify things. | |||
| 120 | if (SrcEltTy->isFloatingPointTy()) { | |||
| 121 | unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); | |||
| 122 | auto *SrcIVTy = FixedVectorType::get( | |||
| 123 | IntegerType::get(C->getContext(), FPWidth), NumSrcElts); | |||
| 124 | // Ask IR to do the conversion now that #elts line up. | |||
| 125 | C = ConstantExpr::getBitCast(C, SrcIVTy); | |||
| 126 | } | |||
| 127 | ||||
| 128 | APInt Result(DL.getTypeSizeInBits(DestTy), 0); | |||
| 129 | if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C, | |||
| 130 | SrcEltTy, NumSrcElts, DL)) | |||
| 131 | return CE; | |||
| 132 | ||||
| 133 | if (isa<IntegerType>(DestTy)) | |||
| 134 | return ConstantInt::get(DestTy, Result); | |||
| 135 | ||||
| 136 | APFloat FP(DestTy->getFltSemantics(), Result); | |||
| 137 | return ConstantFP::get(DestTy->getContext(), FP); | |||
| 138 | } | |||
| 139 | } | |||
| 140 | ||||
| 141 | // The code below only handles casts to vectors currently. | |||
| 142 | auto *DestVTy = dyn_cast<VectorType>(DestTy); | |||
| 143 | if (!DestVTy) | |||
| 144 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 145 | ||||
| 146 | // If this is a scalar -> vector cast, convert the input into a <1 x scalar> | |||
| 147 | // vector so the code below can handle it uniformly. | |||
| 148 | if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { | |||
| 149 | Constant *Ops = C; // don't take the address of C! | |||
| 150 | return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); | |||
| 151 | } | |||
| 152 | ||||
| 153 | // If this is a bitcast from constant vector -> vector, fold it. | |||
| 154 | if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) | |||
| 155 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 156 | ||||
| 157 | // If the element types match, IR can fold it. | |||
| 158 | unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements(); | |||
| 159 | unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements(); | |||
| 160 | if (NumDstElt == NumSrcElt) | |||
| 161 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 162 | ||||
| 163 | Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType(); | |||
| 164 | Type *DstEltTy = DestVTy->getElementType(); | |||
| 165 | ||||
| 166 | // Otherwise, we're changing the number of elements in a vector, which | |||
| 167 | // requires endianness information to do the right thing. For example, | |||
| 168 | // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) | |||
| 169 | // folds to (little endian): | |||
| 170 | // <4 x i32> <i32 0, i32 0, i32 1, i32 0> | |||
| 171 | // and to (big endian): | |||
| 172 | // <4 x i32> <i32 0, i32 0, i32 0, i32 1> | |||
| 173 | ||||
| 174 | // First thing is first. We only want to think about integer here, so if | |||
| 175 | // we have something in FP form, recast it as integer. | |||
| 176 | if (DstEltTy->isFloatingPointTy()) { | |||
| 177 | // Fold to an vector of integers with same size as our FP type. | |||
| 178 | unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); | |||
| 179 | auto *DestIVTy = FixedVectorType::get( | |||
| 180 | IntegerType::get(C->getContext(), FPWidth), NumDstElt); | |||
| 181 | // Recursively handle this integer conversion, if possible. | |||
| 182 | C = FoldBitCast(C, DestIVTy, DL); | |||
| 183 | ||||
| 184 | // Finally, IR can handle this now that #elts line up. | |||
| 185 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 186 | } | |||
| 187 | ||||
| 188 | // Okay, we know the destination is integer, if the input is FP, convert | |||
| 189 | // it to integer first. | |||
| 190 | if (SrcEltTy->isFloatingPointTy()) { | |||
| 191 | unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); | |||
| 192 | auto *SrcIVTy = FixedVectorType::get( | |||
| 193 | IntegerType::get(C->getContext(), FPWidth), NumSrcElt); | |||
| 194 | // Ask IR to do the conversion now that #elts line up. | |||
| 195 | C = ConstantExpr::getBitCast(C, SrcIVTy); | |||
| 196 | // If IR wasn't able to fold it, bail out. | |||
| 197 | if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector. | |||
| 198 | !isa<ConstantDataVector>(C)) | |||
| 199 | return C; | |||
| 200 | } | |||
| 201 | ||||
| 202 | // Now we know that the input and output vectors are both integer vectors | |||
| 203 | // of the same size, and that their #elements is not the same. Do the | |||
| 204 | // conversion here, which depends on whether the input or output has | |||
| 205 | // more elements. | |||
| 206 | bool isLittleEndian = DL.isLittleEndian(); | |||
| 207 | ||||
| 208 | SmallVector<Constant*, 32> Result; | |||
| 209 | if (NumDstElt < NumSrcElt) { | |||
| 210 | // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) | |||
| 211 | Constant *Zero = Constant::getNullValue(DstEltTy); | |||
| 212 | unsigned Ratio = NumSrcElt/NumDstElt; | |||
| 213 | unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); | |||
| 214 | unsigned SrcElt = 0; | |||
| 215 | for (unsigned i = 0; i != NumDstElt; ++i) { | |||
| 216 | // Build each element of the result. | |||
| 217 | Constant *Elt = Zero; | |||
| 218 | unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); | |||
| 219 | for (unsigned j = 0; j != Ratio; ++j) { | |||
| 220 | Constant *Src = C->getAggregateElement(SrcElt++); | |||
| 221 | if (Src && isa<UndefValue>(Src)) | |||
| 222 | Src = Constant::getNullValue( | |||
| 223 | cast<VectorType>(C->getType())->getElementType()); | |||
| 224 | else | |||
| 225 | Src = dyn_cast_or_null<ConstantInt>(Src); | |||
| 226 | if (!Src) // Reject constantexpr elements. | |||
| 227 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 228 | ||||
| 229 | // Zero extend the element to the right size. | |||
| 230 | Src = ConstantExpr::getZExt(Src, Elt->getType()); | |||
| 231 | ||||
| 232 | // Shift it to the right place, depending on endianness. | |||
| 233 | Src = ConstantExpr::getShl(Src, | |||
| 234 | ConstantInt::get(Src->getType(), ShiftAmt)); | |||
| 235 | ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; | |||
| 236 | ||||
| 237 | // Mix it in. | |||
| 238 | Elt = ConstantExpr::getOr(Elt, Src); | |||
| 239 | } | |||
| 240 | Result.push_back(Elt); | |||
| 241 | } | |||
| 242 | return ConstantVector::get(Result); | |||
| 243 | } | |||
| 244 | ||||
| 245 | // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) | |||
| 246 | unsigned Ratio = NumDstElt/NumSrcElt; | |||
| 247 | unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); | |||
| 248 | ||||
| 249 | // Loop over each source value, expanding into multiple results. | |||
| 250 | for (unsigned i = 0; i != NumSrcElt; ++i) { | |||
| 251 | auto *Element = C->getAggregateElement(i); | |||
| 252 | ||||
| 253 | if (!Element) // Reject constantexpr elements. | |||
| 254 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 255 | ||||
| 256 | if (isa<UndefValue>(Element)) { | |||
| 257 | // Correctly Propagate undef values. | |||
| 258 | Result.append(Ratio, UndefValue::get(DstEltTy)); | |||
| 259 | continue; | |||
| 260 | } | |||
| 261 | ||||
| 262 | auto *Src = dyn_cast<ConstantInt>(Element); | |||
| 263 | if (!Src) | |||
| 264 | return ConstantExpr::getBitCast(C, DestTy); | |||
| 265 | ||||
| 266 | unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); | |||
| 267 | for (unsigned j = 0; j != Ratio; ++j) { | |||
| 268 | // Shift the piece of the value into the right place, depending on | |||
| 269 | // endianness. | |||
| 270 | Constant *Elt = ConstantExpr::getLShr(Src, | |||
| 271 | ConstantInt::get(Src->getType(), ShiftAmt)); | |||
| 272 | ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; | |||
| 273 | ||||
| 274 | // Truncate the element to an integer with the same pointer size and | |||
| 275 | // convert the element back to a pointer using a inttoptr. | |||
| 276 | if (DstEltTy->isPointerTy()) { | |||
| 277 | IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize); | |||
| 278 | Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy); | |||
| 279 | Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy)); | |||
| 280 | continue; | |||
| 281 | } | |||
| 282 | ||||
| 283 | // Truncate and remember this piece. | |||
| 284 | Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); | |||
| 285 | } | |||
| 286 | } | |||
| 287 | ||||
| 288 | return ConstantVector::get(Result); | |||
| 289 | } | |||
| 290 | ||||
| 291 | } // end anonymous namespace | |||
| 292 | ||||
| 293 | /// If this constant is a constant offset from a global, return the global and | |||
| 294 | /// the constant. Because of constantexprs, this function is recursive. | |||
| 295 | bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, | |||
| 296 | APInt &Offset, const DataLayout &DL, | |||
| 297 | DSOLocalEquivalent **DSOEquiv) { | |||
| 298 | if (DSOEquiv) | |||
| 299 | *DSOEquiv = nullptr; | |||
| 300 | ||||
| 301 | // Trivial case, constant is the global. | |||
| 302 | if ((GV = dyn_cast<GlobalValue>(C))) { | |||
| 303 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); | |||
| 304 | Offset = APInt(BitWidth, 0); | |||
| 305 | return true; | |||
| 306 | } | |||
| 307 | ||||
| 308 | if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) { | |||
| 309 | if (DSOEquiv) | |||
| 310 | *DSOEquiv = FoundDSOEquiv; | |||
| 311 | GV = FoundDSOEquiv->getGlobalValue(); | |||
| 312 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); | |||
| 313 | Offset = APInt(BitWidth, 0); | |||
| 314 | return true; | |||
| 315 | } | |||
| 316 | ||||
| 317 | // Otherwise, if this isn't a constant expr, bail out. | |||
| 318 | auto *CE = dyn_cast<ConstantExpr>(C); | |||
| 319 | if (!CE) return false; | |||
| 320 | ||||
| 321 | // Look through ptr->int and ptr->ptr casts. | |||
| 322 | if (CE->getOpcode() == Instruction::PtrToInt || | |||
| 323 | CE->getOpcode() == Instruction::BitCast) | |||
| 324 | return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, | |||
| 325 | DSOEquiv); | |||
| 326 | ||||
| 327 | // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) | |||
| 328 | auto *GEP = dyn_cast<GEPOperator>(CE); | |||
| 329 | if (!GEP) | |||
| 330 | return false; | |||
| 331 | ||||
| 332 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); | |||
| 333 | APInt TmpOffset(BitWidth, 0); | |||
| 334 | ||||
| 335 | // If the base isn't a global+constant, we aren't either. | |||
| 336 | if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, | |||
| 337 | DSOEquiv)) | |||
| 338 | return false; | |||
| 339 | ||||
| 340 | // Otherwise, add any offset that our operands provide. | |||
| 341 | if (!GEP->accumulateConstantOffset(DL, TmpOffset)) | |||
| 342 | return false; | |||
| 343 | ||||
| 344 | Offset = TmpOffset; | |||
| 345 | return true; | |||
| 346 | } | |||
| 347 | ||||
| 348 | Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, | |||
| 349 | const DataLayout &DL) { | |||
| 350 | do { | |||
| 351 | Type *SrcTy = C->getType(); | |||
| 352 | if (SrcTy == DestTy) | |||
| 353 | return C; | |||
| 354 | ||||
| 355 | TypeSize DestSize = DL.getTypeSizeInBits(DestTy); | |||
| 356 | TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy); | |||
| 357 | if (!TypeSize::isKnownGE(SrcSize, DestSize)) | |||
| 358 | return nullptr; | |||
| 359 | ||||
| 360 | // Catch the obvious splat cases (since all-zeros can coerce non-integral | |||
| 361 | // pointers legally). | |||
| 362 | if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy)) | |||
| 363 | return Res; | |||
| 364 | ||||
| 365 | // If the type sizes are the same and a cast is legal, just directly | |||
| 366 | // cast the constant. | |||
| 367 | // But be careful not to coerce non-integral pointers illegally. | |||
| 368 | if (SrcSize == DestSize && | |||
| 369 | DL.isNonIntegralPointerType(SrcTy->getScalarType()) == | |||
| 370 | DL.isNonIntegralPointerType(DestTy->getScalarType())) { | |||
| 371 | Instruction::CastOps Cast = Instruction::BitCast; | |||
| 372 | // If we are going from a pointer to int or vice versa, we spell the cast | |||
| 373 | // differently. | |||
| 374 | if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) | |||
| 375 | Cast = Instruction::IntToPtr; | |||
| 376 | else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) | |||
| 377 | Cast = Instruction::PtrToInt; | |||
| 378 | ||||
| 379 | if (CastInst::castIsValid(Cast, C, DestTy)) | |||
| 380 | return ConstantExpr::getCast(Cast, C, DestTy); | |||
| 381 | } | |||
| 382 | ||||
| 383 | // If this isn't an aggregate type, there is nothing we can do to drill down | |||
| 384 | // and find a bitcastable constant. | |||
| 385 | if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy()) | |||
| 386 | return nullptr; | |||
| 387 | ||||
| 388 | // We're simulating a load through a pointer that was bitcast to point to | |||
| 389 | // a different type, so we can try to walk down through the initial | |||
| 390 | // elements of an aggregate to see if some part of the aggregate is | |||
| 391 | // castable to implement the "load" semantic model. | |||
| 392 | if (SrcTy->isStructTy()) { | |||
| 393 | // Struct types might have leading zero-length elements like [0 x i32], | |||
| 394 | // which are certainly not what we are looking for, so skip them. | |||
| 395 | unsigned Elem = 0; | |||
| 396 | Constant *ElemC; | |||
| 397 | do { | |||
| 398 | ElemC = C->getAggregateElement(Elem++); | |||
| 399 | } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); | |||
| 400 | C = ElemC; | |||
| 401 | } else { | |||
| 402 | // For non-byte-sized vector elements, the first element is not | |||
| 403 | // necessarily located at the vector base address. | |||
| 404 | if (auto *VT = dyn_cast<VectorType>(SrcTy)) | |||
| 405 | if (!DL.typeSizeEqualsStoreSize(VT->getElementType())) | |||
| 406 | return nullptr; | |||
| 407 | ||||
| 408 | C = C->getAggregateElement(0u); | |||
| 409 | } | |||
| 410 | } while (C); | |||
| 411 | ||||
| 412 | return nullptr; | |||
| 413 | } | |||
| 414 | ||||
| 415 | namespace { | |||
| 416 | ||||
| 417 | /// Recursive helper to read bits out of global. C is the constant being copied | |||
| 418 | /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy | |||
| 419 | /// results into and BytesLeft is the number of bytes left in | |||
| 420 | /// the CurPtr buffer. DL is the DataLayout. | |||
| 421 | bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, | |||
| 422 | unsigned BytesLeft, const DataLayout &DL) { | |||
| 423 | assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&(static_cast <bool> (ByteOffset <= DL.getTypeAllocSize (C->getType()) && "Out of range access") ? void (0 ) : __assert_fail ("ByteOffset <= DL.getTypeAllocSize(C->getType()) && \"Out of range access\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 424, __extension__ __PRETTY_FUNCTION__)) | |||
| 424 | "Out of range access")(static_cast <bool> (ByteOffset <= DL.getTypeAllocSize (C->getType()) && "Out of range access") ? void (0 ) : __assert_fail ("ByteOffset <= DL.getTypeAllocSize(C->getType()) && \"Out of range access\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 424, __extension__ __PRETTY_FUNCTION__)); | |||
| 425 | ||||
| 426 | // If this element is zero or undefined, we can just return since *CurPtr is | |||
| 427 | // zero initialized. | |||
| 428 | if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) | |||
| 429 | return true; | |||
| 430 | ||||
| 431 | if (auto *CI = dyn_cast<ConstantInt>(C)) { | |||
| 432 | if (CI->getBitWidth() > 64 || | |||
| 433 | (CI->getBitWidth() & 7) != 0) | |||
| 434 | return false; | |||
| 435 | ||||
| 436 | uint64_t Val = CI->getZExtValue(); | |||
| 437 | unsigned IntBytes = unsigned(CI->getBitWidth()/8); | |||
| 438 | ||||
| 439 | for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { | |||
| 440 | int n = ByteOffset; | |||
| 441 | if (!DL.isLittleEndian()) | |||
| 442 | n = IntBytes - n - 1; | |||
| 443 | CurPtr[i] = (unsigned char)(Val >> (n * 8)); | |||
| 444 | ++ByteOffset; | |||
| 445 | } | |||
| 446 | return true; | |||
| 447 | } | |||
| 448 | ||||
| 449 | if (auto *CFP = dyn_cast<ConstantFP>(C)) { | |||
| 450 | if (CFP->getType()->isDoubleTy()) { | |||
| 451 | C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); | |||
| 452 | return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); | |||
| 453 | } | |||
| 454 | if (CFP->getType()->isFloatTy()){ | |||
| 455 | C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); | |||
| 456 | return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); | |||
| 457 | } | |||
| 458 | if (CFP->getType()->isHalfTy()){ | |||
| 459 | C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); | |||
| 460 | return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); | |||
| 461 | } | |||
| 462 | return false; | |||
| 463 | } | |||
| 464 | ||||
| 465 | if (auto *CS = dyn_cast<ConstantStruct>(C)) { | |||
| 466 | const StructLayout *SL = DL.getStructLayout(CS->getType()); | |||
| 467 | unsigned Index = SL->getElementContainingOffset(ByteOffset); | |||
| 468 | uint64_t CurEltOffset = SL->getElementOffset(Index); | |||
| 469 | ByteOffset -= CurEltOffset; | |||
| 470 | ||||
| 471 | while (true) { | |||
| 472 | // If the element access is to the element itself and not to tail padding, | |||
| 473 | // read the bytes from the element. | |||
| 474 | uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); | |||
| 475 | ||||
| 476 | if (ByteOffset < EltSize && | |||
| 477 | !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, | |||
| 478 | BytesLeft, DL)) | |||
| 479 | return false; | |||
| 480 | ||||
| 481 | ++Index; | |||
| 482 | ||||
| 483 | // Check to see if we read from the last struct element, if so we're done. | |||
| 484 | if (Index == CS->getType()->getNumElements()) | |||
| 485 | return true; | |||
| 486 | ||||
| 487 | // If we read all of the bytes we needed from this element we're done. | |||
| 488 | uint64_t NextEltOffset = SL->getElementOffset(Index); | |||
| 489 | ||||
| 490 | if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) | |||
| 491 | return true; | |||
| 492 | ||||
| 493 | // Move to the next element of the struct. | |||
| 494 | CurPtr += NextEltOffset - CurEltOffset - ByteOffset; | |||
| 495 | BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; | |||
| 496 | ByteOffset = 0; | |||
| 497 | CurEltOffset = NextEltOffset; | |||
| 498 | } | |||
| 499 | // not reached. | |||
| 500 | } | |||
| 501 | ||||
| 502 | if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || | |||
| 503 | isa<ConstantDataSequential>(C)) { | |||
| 504 | uint64_t NumElts; | |||
| 505 | Type *EltTy; | |||
| 506 | if (auto *AT = dyn_cast<ArrayType>(C->getType())) { | |||
| 507 | NumElts = AT->getNumElements(); | |||
| 508 | EltTy = AT->getElementType(); | |||
| 509 | } else { | |||
| 510 | NumElts = cast<FixedVectorType>(C->getType())->getNumElements(); | |||
| 511 | EltTy = cast<FixedVectorType>(C->getType())->getElementType(); | |||
| 512 | } | |||
| 513 | uint64_t EltSize = DL.getTypeAllocSize(EltTy); | |||
| 514 | uint64_t Index = ByteOffset / EltSize; | |||
| 515 | uint64_t Offset = ByteOffset - Index * EltSize; | |||
| 516 | ||||
| 517 | for (; Index != NumElts; ++Index) { | |||
| 518 | if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, | |||
| 519 | BytesLeft, DL)) | |||
| 520 | return false; | |||
| 521 | ||||
| 522 | uint64_t BytesWritten = EltSize - Offset; | |||
| 523 | assert(BytesWritten <= EltSize && "Not indexing into this element?")(static_cast <bool> (BytesWritten <= EltSize && "Not indexing into this element?") ? void (0) : __assert_fail ("BytesWritten <= EltSize && \"Not indexing into this element?\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 523, __extension__ __PRETTY_FUNCTION__)); | |||
| 524 | if (BytesWritten >= BytesLeft) | |||
| 525 | return true; | |||
| 526 | ||||
| 527 | Offset = 0; | |||
| 528 | BytesLeft -= BytesWritten; | |||
| 529 | CurPtr += BytesWritten; | |||
| 530 | } | |||
| 531 | return true; | |||
| 532 | } | |||
| 533 | ||||
| 534 | if (auto *CE = dyn_cast<ConstantExpr>(C)) { | |||
| 535 | if (CE->getOpcode() == Instruction::IntToPtr && | |||
| 536 | CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { | |||
| 537 | return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, | |||
| 538 | BytesLeft, DL); | |||
| 539 | } | |||
| 540 | } | |||
| 541 | ||||
| 542 | // Otherwise, unknown initializer type. | |||
| 543 | return false; | |||
| 544 | } | |||
| 545 | ||||
| 546 | Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, | |||
| 547 | int64_t Offset, const DataLayout &DL) { | |||
| 548 | // Bail out early. Not expect to load from scalable global variable. | |||
| 549 | if (isa<ScalableVectorType>(LoadTy)) | |||
| 550 | return nullptr; | |||
| 551 | ||||
| 552 | auto *IntType = dyn_cast<IntegerType>(LoadTy); | |||
| 553 | ||||
| 554 | // If this isn't an integer load we can't fold it directly. | |||
| 555 | if (!IntType) { | |||
| 556 | // If this is a non-integer load, we can try folding it as an int load and | |||
| 557 | // then bitcast the result. This can be useful for union cases. Note | |||
| 558 | // that address spaces don't matter here since we're not going to result in | |||
| 559 | // an actual new load. | |||
| 560 | if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && | |||
| 561 | !LoadTy->isVectorTy()) | |||
| 562 | return nullptr; | |||
| 563 | ||||
| 564 | Type *MapTy = Type::getIntNTy(C->getContext(), | |||
| 565 | DL.getTypeSizeInBits(LoadTy).getFixedValue()); | |||
| 566 | if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { | |||
| 567 | if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && | |||
| 568 | !LoadTy->isX86_AMXTy()) | |||
| 569 | // Materializing a zero can be done trivially without a bitcast | |||
| 570 | return Constant::getNullValue(LoadTy); | |||
| 571 | Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; | |||
| 572 | Res = FoldBitCast(Res, CastTy, DL); | |||
| 573 | if (LoadTy->isPtrOrPtrVectorTy()) { | |||
| 574 | // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr | |||
| 575 | if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && | |||
| 576 | !LoadTy->isX86_AMXTy()) | |||
| 577 | return Constant::getNullValue(LoadTy); | |||
| 578 | if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) | |||
| 579 | // Be careful not to replace a load of an addrspace value with an inttoptr here | |||
| 580 | return nullptr; | |||
| 581 | Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy); | |||
| 582 | } | |||
| 583 | return Res; | |||
| 584 | } | |||
| 585 | return nullptr; | |||
| 586 | } | |||
| 587 | ||||
| 588 | unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; | |||
| 589 | if (BytesLoaded > 32 || BytesLoaded == 0) | |||
| 590 | return nullptr; | |||
| 591 | ||||
| 592 | // If we're not accessing anything in this constant, the result is undefined. | |||
| 593 | if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) | |||
| 594 | return PoisonValue::get(IntType); | |||
| 595 | ||||
| 596 | // TODO: We should be able to support scalable types. | |||
| 597 | TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); | |||
| 598 | if (InitializerSize.isScalable()) | |||
| 599 | return nullptr; | |||
| 600 | ||||
| 601 | // If we're not accessing anything in this constant, the result is undefined. | |||
| 602 | if (Offset >= (int64_t)InitializerSize.getFixedValue()) | |||
| 603 | return PoisonValue::get(IntType); | |||
| 604 | ||||
| 605 | unsigned char RawBytes[32] = {0}; | |||
| 606 | unsigned char *CurPtr = RawBytes; | |||
| 607 | unsigned BytesLeft = BytesLoaded; | |||
| 608 | ||||
| 609 | // If we're loading off the beginning of the global, some bytes may be valid. | |||
| 610 | if (Offset < 0) { | |||
| 611 | CurPtr += -Offset; | |||
| 612 | BytesLeft += Offset; | |||
| 613 | Offset = 0; | |||
| 614 | } | |||
| 615 | ||||
| 616 | if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL)) | |||
| 617 | return nullptr; | |||
| 618 | ||||
| 619 | APInt ResultVal = APInt(IntType->getBitWidth(), 0); | |||
| 620 | if (DL.isLittleEndian()) { | |||
| 621 | ResultVal = RawBytes[BytesLoaded - 1]; | |||
| 622 | for (unsigned i = 1; i != BytesLoaded; ++i) { | |||
| 623 | ResultVal <<= 8; | |||
| 624 | ResultVal |= RawBytes[BytesLoaded - 1 - i]; | |||
| 625 | } | |||
| 626 | } else { | |||
| 627 | ResultVal = RawBytes[0]; | |||
| 628 | for (unsigned i = 1; i != BytesLoaded; ++i) { | |||
| 629 | ResultVal <<= 8; | |||
| 630 | ResultVal |= RawBytes[i]; | |||
| 631 | } | |||
| 632 | } | |||
| 633 | ||||
| 634 | return ConstantInt::get(IntType->getContext(), ResultVal); | |||
| 635 | } | |||
| 636 | ||||
| 637 | } // anonymous namespace | |||
| 638 | ||||
| 639 | // If GV is a constant with an initializer read its representation starting | |||
| 640 | // at Offset and return it as a constant array of unsigned char. Otherwise | |||
| 641 | // return null. | |||
| 642 | Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV, | |||
| 643 | uint64_t Offset) { | |||
| 644 | if (!GV->isConstant() || !GV->hasDefinitiveInitializer()) | |||
| 645 | return nullptr; | |||
| 646 | ||||
| 647 | const DataLayout &DL = GV->getParent()->getDataLayout(); | |||
| 648 | Constant *Init = const_cast<Constant *>(GV->getInitializer()); | |||
| 649 | TypeSize InitSize = DL.getTypeAllocSize(Init->getType()); | |||
| 650 | if (InitSize < Offset) | |||
| 651 | return nullptr; | |||
| 652 | ||||
| 653 | uint64_t NBytes = InitSize - Offset; | |||
| 654 | if (NBytes > UINT16_MAX(65535)) | |||
| 655 | // Bail for large initializers in excess of 64K to avoid allocating | |||
| 656 | // too much memory. | |||
| 657 | // Offset is assumed to be less than or equal than InitSize (this | |||
| 658 | // is enforced in ReadDataFromGlobal). | |||
| 659 | return nullptr; | |||
| 660 | ||||
| 661 | SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes)); | |||
| 662 | unsigned char *CurPtr = RawBytes.data(); | |||
| 663 | ||||
| 664 | if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL)) | |||
| 665 | return nullptr; | |||
| 666 | ||||
| 667 | return ConstantDataArray::get(GV->getContext(), RawBytes); | |||
| 668 | } | |||
| 669 | ||||
| 670 | /// If this Offset points exactly to the start of an aggregate element, return | |||
| 671 | /// that element, otherwise return nullptr. | |||
| 672 | Constant *getConstantAtOffset(Constant *Base, APInt Offset, | |||
| 673 | const DataLayout &DL) { | |||
| 674 | if (Offset.isZero()) | |||
| 675 | return Base; | |||
| 676 | ||||
| 677 | if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base)) | |||
| 678 | return nullptr; | |||
| 679 | ||||
| 680 | Type *ElemTy = Base->getType(); | |||
| 681 | SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); | |||
| 682 | if (!Offset.isZero() || !Indices[0].isZero()) | |||
| 683 | return nullptr; | |||
| 684 | ||||
| 685 | Constant *C = Base; | |||
| 686 | for (const APInt &Index : drop_begin(Indices)) { | |||
| 687 | if (Index.isNegative() || Index.getActiveBits() >= 32) | |||
| 688 | return nullptr; | |||
| 689 | ||||
| 690 | C = C->getAggregateElement(Index.getZExtValue()); | |||
| 691 | if (!C) | |||
| 692 | return nullptr; | |||
| 693 | } | |||
| 694 | ||||
| 695 | return C; | |||
| 696 | } | |||
| 697 | ||||
| 698 | Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, | |||
| 699 | const APInt &Offset, | |||
| 700 | const DataLayout &DL) { | |||
| 701 | if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL)) | |||
| 702 | if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL)) | |||
| 703 | return Result; | |||
| 704 | ||||
| 705 | // Explicitly check for out-of-bounds access, so we return poison even if the | |||
| 706 | // constant is a uniform value. | |||
| 707 | TypeSize Size = DL.getTypeAllocSize(C->getType()); | |||
| ||||
| 708 | if (!Size.isScalable() && Offset.sge(Size.getFixedValue())) | |||
| 709 | return PoisonValue::get(Ty); | |||
| 710 | ||||
| 711 | // Try an offset-independent fold of a uniform value. | |||
| 712 | if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty)) | |||
| 713 | return Result; | |||
| 714 | ||||
| 715 | // Try hard to fold loads from bitcasted strange and non-type-safe things. | |||
| 716 | if (Offset.getSignificantBits() <= 64) | |||
| 717 | if (Constant *Result = | |||
| 718 | FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL)) | |||
| 719 | return Result; | |||
| 720 | ||||
| 721 | return nullptr; | |||
| 722 | } | |||
| 723 | ||||
| 724 | Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, | |||
| 725 | const DataLayout &DL) { | |||
| 726 | return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL); | |||
| ||||
| 727 | } | |||
| 728 | ||||
| 729 | Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, | |||
| 730 | APInt Offset, | |||
| 731 | const DataLayout &DL) { | |||
| 732 | // We can only fold loads from constant globals with a definitive initializer. | |||
| 733 | // Check this upfront, to skip expensive offset calculations. | |||
| 734 | auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C)); | |||
| 735 | if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) | |||
| 736 | return nullptr; | |||
| 737 | ||||
| 738 | C = cast<Constant>(C->stripAndAccumulateConstantOffsets( | |||
| 739 | DL, Offset, /* AllowNonInbounds */ true)); | |||
| 740 | ||||
| 741 | if (C == GV) | |||
| 742 | if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty, | |||
| 743 | Offset, DL)) | |||
| 744 | return Result; | |||
| 745 | ||||
| 746 | // If this load comes from anywhere in a uniform constant global, the value | |||
| 747 | // is always the same, regardless of the loaded offset. | |||
| 748 | return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty); | |||
| 749 | } | |||
| 750 | ||||
| 751 | Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, | |||
| 752 | const DataLayout &DL) { | |||
| 753 | APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0); | |||
| 754 | return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL); | |||
| 755 | } | |||
| 756 | ||||
| 757 | Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) { | |||
| 758 | if (isa<PoisonValue>(C)) | |||
| 759 | return PoisonValue::get(Ty); | |||
| 760 | if (isa<UndefValue>(C)) | |||
| 761 | return UndefValue::get(Ty); | |||
| 762 | if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) | |||
| 763 | return Constant::getNullValue(Ty); | |||
| 764 | if (C->isAllOnesValue() && | |||
| 765 | (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) | |||
| 766 | return Constant::getAllOnesValue(Ty); | |||
| 767 | return nullptr; | |||
| 768 | } | |||
| 769 | ||||
| 770 | namespace { | |||
| 771 | ||||
| 772 | /// One of Op0/Op1 is a constant expression. | |||
| 773 | /// Attempt to symbolically evaluate the result of a binary operator merging | |||
| 774 | /// these together. If target data info is available, it is provided as DL, | |||
| 775 | /// otherwise DL is null. | |||
| 776 | Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, | |||
| 777 | const DataLayout &DL) { | |||
| 778 | // SROA | |||
| 779 | ||||
| 780 | // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. | |||
| 781 | // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute | |||
| 782 | // bits. | |||
| 783 | ||||
| 784 | if (Opc == Instruction::And) { | |||
| 785 | KnownBits Known0 = computeKnownBits(Op0, DL); | |||
| 786 | KnownBits Known1 = computeKnownBits(Op1, DL); | |||
| 787 | if ((Known1.One | Known0.Zero).isAllOnes()) { | |||
| 788 | // All the bits of Op0 that the 'and' could be masking are already zero. | |||
| 789 | return Op0; | |||
| 790 | } | |||
| 791 | if ((Known0.One | Known1.Zero).isAllOnes()) { | |||
| 792 | // All the bits of Op1 that the 'and' could be masking are already zero. | |||
| 793 | return Op1; | |||
| 794 | } | |||
| 795 | ||||
| 796 | Known0 &= Known1; | |||
| 797 | if (Known0.isConstant()) | |||
| 798 | return ConstantInt::get(Op0->getType(), Known0.getConstant()); | |||
| 799 | } | |||
| 800 | ||||
| 801 | // If the constant expr is something like &A[123] - &A[4].f, fold this into a | |||
| 802 | // constant. This happens frequently when iterating over a global array. | |||
| 803 | if (Opc == Instruction::Sub) { | |||
| 804 | GlobalValue *GV1, *GV2; | |||
| 805 | APInt Offs1, Offs2; | |||
| 806 | ||||
| 807 | if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) | |||
| 808 | if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { | |||
| 809 | unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); | |||
| 810 | ||||
| 811 | // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. | |||
| 812 | // PtrToInt may change the bitwidth so we have convert to the right size | |||
| 813 | // first. | |||
| 814 | return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - | |||
| 815 | Offs2.zextOrTrunc(OpSize)); | |||
| 816 | } | |||
| 817 | } | |||
| 818 | ||||
| 819 | return nullptr; | |||
| 820 | } | |||
| 821 | ||||
| 822 | /// If array indices are not pointer-sized integers, explicitly cast them so | |||
| 823 | /// that they aren't implicitly casted by the getelementptr. | |||
| 824 | Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, | |||
| 825 | Type *ResultTy, bool InBounds, | |||
| 826 | std::optional<unsigned> InRangeIndex, | |||
| 827 | const DataLayout &DL, const TargetLibraryInfo *TLI) { | |||
| 828 | Type *IntIdxTy = DL.getIndexType(ResultTy); | |||
| 829 | Type *IntIdxScalarTy = IntIdxTy->getScalarType(); | |||
| 830 | ||||
| 831 | bool Any = false; | |||
| 832 | SmallVector<Constant*, 32> NewIdxs; | |||
| 833 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) { | |||
| 834 | if ((i == 1 || | |||
| 835 | !isa<StructType>(GetElementPtrInst::getIndexedType( | |||
| 836 | SrcElemTy, Ops.slice(1, i - 1)))) && | |||
| 837 | Ops[i]->getType()->getScalarType() != IntIdxScalarTy) { | |||
| 838 | Any = true; | |||
| 839 | Type *NewType = Ops[i]->getType()->isVectorTy() | |||
| 840 | ? IntIdxTy | |||
| 841 | : IntIdxScalarTy; | |||
| 842 | NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], | |||
| 843 | true, | |||
| 844 | NewType, | |||
| 845 | true), | |||
| 846 | Ops[i], NewType)); | |||
| 847 | } else | |||
| 848 | NewIdxs.push_back(Ops[i]); | |||
| 849 | } | |||
| 850 | ||||
| 851 | if (!Any) | |||
| 852 | return nullptr; | |||
| 853 | ||||
| 854 | Constant *C = ConstantExpr::getGetElementPtr( | |||
| 855 | SrcElemTy, Ops[0], NewIdxs, InBounds, InRangeIndex); | |||
| 856 | return ConstantFoldConstant(C, DL, TLI); | |||
| 857 | } | |||
| 858 | ||||
| 859 | /// Strip the pointer casts, but preserve the address space information. | |||
| 860 | Constant *StripPtrCastKeepAS(Constant *Ptr) { | |||
| 861 | assert(Ptr->getType()->isPointerTy() && "Not a pointer type")(static_cast <bool> (Ptr->getType()->isPointerTy( ) && "Not a pointer type") ? void (0) : __assert_fail ("Ptr->getType()->isPointerTy() && \"Not a pointer type\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 861, __extension__ __PRETTY_FUNCTION__)); | |||
| 862 | auto *OldPtrTy = cast<PointerType>(Ptr->getType()); | |||
| 863 | Ptr = cast<Constant>(Ptr->stripPointerCasts()); | |||
| 864 | auto *NewPtrTy = cast<PointerType>(Ptr->getType()); | |||
| 865 | ||||
| 866 | // Preserve the address space number of the pointer. | |||
| 867 | if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { | |||
| 868 | Ptr = ConstantExpr::getPointerCast( | |||
| 869 | Ptr, PointerType::getWithSamePointeeType(NewPtrTy, | |||
| 870 | OldPtrTy->getAddressSpace())); | |||
| 871 | } | |||
| 872 | return Ptr; | |||
| 873 | } | |||
| 874 | ||||
| 875 | /// If we can symbolically evaluate the GEP constant expression, do so. | |||
| 876 | Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, | |||
| 877 | ArrayRef<Constant *> Ops, | |||
| 878 | const DataLayout &DL, | |||
| 879 | const TargetLibraryInfo *TLI) { | |||
| 880 | const GEPOperator *InnermostGEP = GEP; | |||
| 881 | bool InBounds = GEP->isInBounds(); | |||
| 882 | ||||
| 883 | Type *SrcElemTy = GEP->getSourceElementType(); | |||
| 884 | Type *ResElemTy = GEP->getResultElementType(); | |||
| 885 | Type *ResTy = GEP->getType(); | |||
| 886 | if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy)) | |||
| 887 | return nullptr; | |||
| 888 | ||||
| 889 | if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, | |||
| 890 | GEP->isInBounds(), GEP->getInRangeIndex(), | |||
| 891 | DL, TLI)) | |||
| 892 | return C; | |||
| 893 | ||||
| 894 | Constant *Ptr = Ops[0]; | |||
| 895 | if (!Ptr->getType()->isPointerTy()) | |||
| 896 | return nullptr; | |||
| 897 | ||||
| 898 | Type *IntIdxTy = DL.getIndexType(Ptr->getType()); | |||
| 899 | ||||
| 900 | for (unsigned i = 1, e = Ops.size(); i != e; ++i) | |||
| 901 | if (!isa<ConstantInt>(Ops[i])) | |||
| 902 | return nullptr; | |||
| 903 | ||||
| 904 | unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy); | |||
| 905 | APInt Offset = APInt( | |||
| 906 | BitWidth, | |||
| 907 | DL.getIndexedOffsetInType( | |||
| 908 | SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1))); | |||
| 909 | Ptr = StripPtrCastKeepAS(Ptr); | |||
| 910 | ||||
| 911 | // If this is a GEP of a GEP, fold it all into a single GEP. | |||
| 912 | while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { | |||
| 913 | InnermostGEP = GEP; | |||
| 914 | InBounds &= GEP->isInBounds(); | |||
| 915 | ||||
| 916 | SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands())); | |||
| 917 | ||||
| 918 | // Do not try the incorporate the sub-GEP if some index is not a number. | |||
| 919 | bool AllConstantInt = true; | |||
| 920 | for (Value *NestedOp : NestedOps) | |||
| 921 | if (!isa<ConstantInt>(NestedOp)) { | |||
| 922 | AllConstantInt = false; | |||
| 923 | break; | |||
| 924 | } | |||
| 925 | if (!AllConstantInt) | |||
| 926 | break; | |||
| 927 | ||||
| 928 | Ptr = cast<Constant>(GEP->getOperand(0)); | |||
| 929 | SrcElemTy = GEP->getSourceElementType(); | |||
| 930 | Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps)); | |||
| 931 | Ptr = StripPtrCastKeepAS(Ptr); | |||
| 932 | } | |||
| 933 | ||||
| 934 | // If the base value for this address is a literal integer value, fold the | |||
| 935 | // getelementptr to the resulting integer value casted to the pointer type. | |||
| 936 | APInt BasePtr(BitWidth, 0); | |||
| 937 | if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) { | |||
| 938 | if (CE->getOpcode() == Instruction::IntToPtr) { | |||
| 939 | if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) | |||
| 940 | BasePtr = Base->getValue().zextOrTrunc(BitWidth); | |||
| 941 | } | |||
| 942 | } | |||
| 943 | ||||
| 944 | auto *PTy = cast<PointerType>(Ptr->getType()); | |||
| 945 | if ((Ptr->isNullValue() || BasePtr != 0) && | |||
| 946 | !DL.isNonIntegralPointerType(PTy)) { | |||
| 947 | Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); | |||
| 948 | return ConstantExpr::getIntToPtr(C, ResTy); | |||
| 949 | } | |||
| 950 | ||||
| 951 | // Otherwise form a regular getelementptr. Recompute the indices so that | |||
| 952 | // we eliminate over-indexing of the notional static type array bounds. | |||
| 953 | // This makes it easy to determine if the getelementptr is "inbounds". | |||
| 954 | // Also, this helps GlobalOpt do SROA on GlobalVariables. | |||
| 955 | ||||
| 956 | // For GEPs of GlobalValues, use the value type even for opaque pointers. | |||
| 957 | // Otherwise use an i8 GEP. | |||
| 958 | if (auto *GV = dyn_cast<GlobalValue>(Ptr)) | |||
| 959 | SrcElemTy = GV->getValueType(); | |||
| 960 | else if (!PTy->isOpaque()) | |||
| 961 | SrcElemTy = PTy->getNonOpaquePointerElementType(); | |||
| 962 | else | |||
| 963 | SrcElemTy = Type::getInt8Ty(Ptr->getContext()); | |||
| 964 | ||||
| 965 | if (!SrcElemTy->isSized()) | |||
| 966 | return nullptr; | |||
| 967 | ||||
| 968 | Type *ElemTy = SrcElemTy; | |||
| 969 | SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); | |||
| 970 | if (Offset != 0) | |||
| 971 | return nullptr; | |||
| 972 | ||||
| 973 | // Try to add additional zero indices to reach the desired result element | |||
| 974 | // type. | |||
| 975 | // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and | |||
| 976 | // we'll have to insert a bitcast anyway? | |||
| 977 | while (ElemTy != ResElemTy) { | |||
| 978 | Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0); | |||
| 979 | if (!NextTy) | |||
| 980 | break; | |||
| 981 | ||||
| 982 | Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth)); | |||
| 983 | ElemTy = NextTy; | |||
| 984 | } | |||
| 985 | ||||
| 986 | SmallVector<Constant *, 32> NewIdxs; | |||
| 987 | for (const APInt &Index : Indices) | |||
| 988 | NewIdxs.push_back(ConstantInt::get( | |||
| 989 | Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index)); | |||
| 990 | ||||
| 991 | // Preserve the inrange index from the innermost GEP if possible. We must | |||
| 992 | // have calculated the same indices up to and including the inrange index. | |||
| 993 | std::optional<unsigned> InRangeIndex; | |||
| 994 | if (std::optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex()) | |||
| 995 | if (SrcElemTy == InnermostGEP->getSourceElementType() && | |||
| 996 | NewIdxs.size() > *LastIRIndex) { | |||
| 997 | InRangeIndex = LastIRIndex; | |||
| 998 | for (unsigned I = 0; I <= *LastIRIndex; ++I) | |||
| 999 | if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) | |||
| 1000 | return nullptr; | |||
| 1001 | } | |||
| 1002 | ||||
| 1003 | // Create a GEP. | |||
| 1004 | Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs, | |||
| 1005 | InBounds, InRangeIndex); | |||
| 1006 | assert((static_cast <bool> (cast<PointerType>(C->getType ())->isOpaqueOrPointeeTypeMatches(ElemTy) && "Computed GetElementPtr has unexpected type!" ) ? void (0) : __assert_fail ("cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) && \"Computed GetElementPtr has unexpected type!\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 1008, __extension__ __PRETTY_FUNCTION__)) | |||
| 1007 | cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&(static_cast <bool> (cast<PointerType>(C->getType ())->isOpaqueOrPointeeTypeMatches(ElemTy) && "Computed GetElementPtr has unexpected type!" ) ? void (0) : __assert_fail ("cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) && \"Computed GetElementPtr has unexpected type!\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 1008, __extension__ __PRETTY_FUNCTION__)) | |||
| 1008 | "Computed GetElementPtr has unexpected type!")(static_cast <bool> (cast<PointerType>(C->getType ())->isOpaqueOrPointeeTypeMatches(ElemTy) && "Computed GetElementPtr has unexpected type!" ) ? void (0) : __assert_fail ("cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) && \"Computed GetElementPtr has unexpected type!\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 1008, __extension__ __PRETTY_FUNCTION__)); | |||
| 1009 | ||||
| 1010 | // If we ended up indexing a member with a type that doesn't match | |||
| 1011 | // the type of what the original indices indexed, add a cast. | |||
| 1012 | if (C->getType() != ResTy) | |||
| 1013 | C = FoldBitCast(C, ResTy, DL); | |||
| 1014 | ||||
| 1015 | return C; | |||
| 1016 | } | |||
| 1017 | ||||
| 1018 | /// Attempt to constant fold an instruction with the | |||
| 1019 | /// specified opcode and operands. If successful, the constant result is | |||
| 1020 | /// returned, if not, null is returned. Note that this function can fail when | |||
| 1021 | /// attempting to fold instructions like loads and stores, which have no | |||
| 1022 | /// constant expression form. | |||
| 1023 | Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, | |||
| 1024 | ArrayRef<Constant *> Ops, | |||
| 1025 | const DataLayout &DL, | |||
| 1026 | const TargetLibraryInfo *TLI) { | |||
| 1027 | Type *DestTy = InstOrCE->getType(); | |||
| 1028 | ||||
| 1029 | if (Instruction::isUnaryOp(Opcode)) | |||
| 1030 | return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); | |||
| 1031 | ||||
| 1032 | if (Instruction::isBinaryOp(Opcode)) { | |||
| 1033 | switch (Opcode) { | |||
| 1034 | default: | |||
| 1035 | break; | |||
| 1036 | case Instruction::FAdd: | |||
| 1037 | case Instruction::FSub: | |||
| 1038 | case Instruction::FMul: | |||
| 1039 | case Instruction::FDiv: | |||
| 1040 | case Instruction::FRem: | |||
| 1041 | // Handle floating point instructions separately to account for denormals | |||
| 1042 | // TODO: If a constant expression is being folded rather than an | |||
| 1043 | // instruction, denormals will not be flushed/treated as zero | |||
| 1044 | if (const auto *I = dyn_cast<Instruction>(InstOrCE)) { | |||
| 1045 | return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I); | |||
| 1046 | } | |||
| 1047 | } | |||
| 1048 | return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); | |||
| 1049 | } | |||
| 1050 | ||||
| 1051 | if (Instruction::isCast(Opcode)) | |||
| 1052 | return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL); | |||
| 1053 | ||||
| 1054 | if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) { | |||
| 1055 | Type *SrcElemTy = GEP->getSourceElementType(); | |||
| 1056 | if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy)) | |||
| 1057 | return nullptr; | |||
| 1058 | ||||
| 1059 | if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) | |||
| 1060 | return C; | |||
| 1061 | ||||
| 1062 | return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1), | |||
| 1063 | GEP->isInBounds(), | |||
| 1064 | GEP->getInRangeIndex()); | |||
| 1065 | } | |||
| 1066 | ||||
| 1067 | if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) { | |||
| 1068 | if (CE->isCompare()) | |||
| 1069 | return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], | |||
| 1070 | DL, TLI); | |||
| 1071 | return CE->getWithOperands(Ops); | |||
| 1072 | } | |||
| 1073 | ||||
| 1074 | switch (Opcode) { | |||
| 1075 | default: return nullptr; | |||
| 1076 | case Instruction::ICmp: | |||
| 1077 | case Instruction::FCmp: { | |||
| 1078 | auto *C = cast<CmpInst>(InstOrCE); | |||
| 1079 | return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1], | |||
| 1080 | DL, TLI, C); | |||
| 1081 | } | |||
| 1082 | case Instruction::Freeze: | |||
| 1083 | return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; | |||
| 1084 | case Instruction::Call: | |||
| 1085 | if (auto *F = dyn_cast<Function>(Ops.back())) { | |||
| 1086 | const auto *Call = cast<CallBase>(InstOrCE); | |||
| 1087 | if (canConstantFoldCallTo(Call, F)) | |||
| 1088 | return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI); | |||
| 1089 | } | |||
| 1090 | return nullptr; | |||
| 1091 | case Instruction::Select: | |||
| 1092 | return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]); | |||
| 1093 | case Instruction::ExtractElement: | |||
| 1094 | return ConstantExpr::getExtractElement(Ops[0], Ops[1]); | |||
| 1095 | case Instruction::ExtractValue: | |||
| 1096 | return ConstantFoldExtractValueInstruction( | |||
| 1097 | Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices()); | |||
| 1098 | case Instruction::InsertElement: | |||
| 1099 | return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); | |||
| 1100 | case Instruction::InsertValue: | |||
| 1101 | return ConstantFoldInsertValueInstruction( | |||
| 1102 | Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices()); | |||
| 1103 | case Instruction::ShuffleVector: | |||
| 1104 | return ConstantExpr::getShuffleVector( | |||
| 1105 | Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask()); | |||
| 1106 | case Instruction::Load: { | |||
| 1107 | const auto *LI = dyn_cast<LoadInst>(InstOrCE); | |||
| 1108 | if (LI->isVolatile()) | |||
| 1109 | return nullptr; | |||
| 1110 | return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); | |||
| 1111 | } | |||
| 1112 | } | |||
| 1113 | } | |||
| 1114 | ||||
| 1115 | } // end anonymous namespace | |||
| 1116 | ||||
| 1117 | //===----------------------------------------------------------------------===// | |||
| 1118 | // Constant Folding public APIs | |||
| 1119 | //===----------------------------------------------------------------------===// | |||
| 1120 | ||||
| 1121 | namespace { | |||
| 1122 | ||||
| 1123 | Constant * | |||
| 1124 | ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, | |||
| 1125 | const TargetLibraryInfo *TLI, | |||
| 1126 | SmallDenseMap<Constant *, Constant *> &FoldedOps) { | |||
| 1127 | if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C)) | |||
| 1128 | return const_cast<Constant *>(C); | |||
| 1129 | ||||
| 1130 | SmallVector<Constant *, 8> Ops; | |||
| 1131 | for (const Use &OldU : C->operands()) { | |||
| 1132 | Constant *OldC = cast<Constant>(&OldU); | |||
| 1133 | Constant *NewC = OldC; | |||
| 1134 | // Recursively fold the ConstantExpr's operands. If we have already folded | |||
| 1135 | // a ConstantExpr, we don't have to process it again. | |||
| 1136 | if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) { | |||
| 1137 | auto It = FoldedOps.find(OldC); | |||
| 1138 | if (It == FoldedOps.end()) { | |||
| 1139 | NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps); | |||
| 1140 | FoldedOps.insert({OldC, NewC}); | |||
| 1141 | } else { | |||
| 1142 | NewC = It->second; | |||
| 1143 | } | |||
| 1144 | } | |||
| 1145 | Ops.push_back(NewC); | |||
| 1146 | } | |||
| 1147 | ||||
| 1148 | if (auto *CE = dyn_cast<ConstantExpr>(C)) { | |||
| 1149 | if (Constant *Res = | |||
| 1150 | ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI)) | |||
| 1151 | return Res; | |||
| 1152 | return const_cast<Constant *>(C); | |||
| 1153 | } | |||
| 1154 | ||||
| 1155 | assert(isa<ConstantVector>(C))(static_cast <bool> (isa<ConstantVector>(C)) ? void (0) : __assert_fail ("isa<ConstantVector>(C)", "llvm/lib/Analysis/ConstantFolding.cpp" , 1155, __extension__ __PRETTY_FUNCTION__)); | |||
| 1156 | return ConstantVector::get(Ops); | |||
| 1157 | } | |||
| 1158 | ||||
| 1159 | } // end anonymous namespace | |||
| 1160 | ||||
| 1161 | Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, | |||
| 1162 | const TargetLibraryInfo *TLI) { | |||
| 1163 | // Handle PHI nodes quickly here... | |||
| 1164 | if (auto *PN = dyn_cast<PHINode>(I)) { | |||
| 1165 | Constant *CommonValue = nullptr; | |||
| 1166 | ||||
| 1167 | SmallDenseMap<Constant *, Constant *> FoldedOps; | |||
| 1168 | for (Value *Incoming : PN->incoming_values()) { | |||
| 1169 | // If the incoming value is undef then skip it. Note that while we could | |||
| 1170 | // skip the value if it is equal to the phi node itself we choose not to | |||
| 1171 | // because that would break the rule that constant folding only applies if | |||
| 1172 | // all operands are constants. | |||
| 1173 | if (isa<UndefValue>(Incoming)) | |||
| 1174 | continue; | |||
| 1175 | // If the incoming value is not a constant, then give up. | |||
| 1176 | auto *C = dyn_cast<Constant>(Incoming); | |||
| 1177 | if (!C) | |||
| 1178 | return nullptr; | |||
| 1179 | // Fold the PHI's operands. | |||
| 1180 | C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); | |||
| 1181 | // If the incoming value is a different constant to | |||
| 1182 | // the one we saw previously, then give up. | |||
| 1183 | if (CommonValue && C != CommonValue) | |||
| 1184 | return nullptr; | |||
| 1185 | CommonValue = C; | |||
| 1186 | } | |||
| 1187 | ||||
| 1188 | // If we reach here, all incoming values are the same constant or undef. | |||
| 1189 | return CommonValue ? CommonValue : UndefValue::get(PN->getType()); | |||
| 1190 | } | |||
| 1191 | ||||
| 1192 | // Scan the operand list, checking to see if they are all constants, if so, | |||
| 1193 | // hand off to ConstantFoldInstOperandsImpl. | |||
| 1194 | if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); })) | |||
| 1195 | return nullptr; | |||
| 1196 | ||||
| 1197 | SmallDenseMap<Constant *, Constant *> FoldedOps; | |||
| 1198 | SmallVector<Constant *, 8> Ops; | |||
| 1199 | for (const Use &OpU : I->operands()) { | |||
| 1200 | auto *Op = cast<Constant>(&OpU); | |||
| 1201 | // Fold the Instruction's operands. | |||
| 1202 | Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps); | |||
| 1203 | Ops.push_back(Op); | |||
| 1204 | } | |||
| 1205 | ||||
| 1206 | return ConstantFoldInstOperands(I, Ops, DL, TLI); | |||
| 1207 | } | |||
| 1208 | ||||
| 1209 | Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL, | |||
| 1210 | const TargetLibraryInfo *TLI) { | |||
| 1211 | SmallDenseMap<Constant *, Constant *> FoldedOps; | |||
| 1212 | return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); | |||
| 1213 | } | |||
| 1214 | ||||
| 1215 | Constant *llvm::ConstantFoldInstOperands(Instruction *I, | |||
| 1216 | ArrayRef<Constant *> Ops, | |||
| 1217 | const DataLayout &DL, | |||
| 1218 | const TargetLibraryInfo *TLI) { | |||
| 1219 | return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI); | |||
| 1220 | } | |||
| 1221 | ||||
| 1222 | Constant *llvm::ConstantFoldCompareInstOperands( | |||
| 1223 | unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, | |||
| 1224 | const TargetLibraryInfo *TLI, const Instruction *I) { | |||
| 1225 | CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; | |||
| 1226 | // fold: icmp (inttoptr x), null -> icmp x, 0 | |||
| 1227 | // fold: icmp null, (inttoptr x) -> icmp 0, x | |||
| 1228 | // fold: icmp (ptrtoint x), 0 -> icmp x, null | |||
| 1229 | // fold: icmp 0, (ptrtoint x) -> icmp null, x | |||
| 1230 | // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y | |||
| 1231 | // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y | |||
| 1232 | // | |||
| 1233 | // FIXME: The following comment is out of data and the DataLayout is here now. | |||
| 1234 | // ConstantExpr::getCompare cannot do this, because it doesn't have DL | |||
| 1235 | // around to know if bit truncation is happening. | |||
| 1236 | if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) { | |||
| 1237 | if (Ops1->isNullValue()) { | |||
| 1238 | if (CE0->getOpcode() == Instruction::IntToPtr) { | |||
| 1239 | Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); | |||
| 1240 | // Convert the integer value to the right size to ensure we get the | |||
| 1241 | // proper extension or truncation. | |||
| 1242 | Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), | |||
| 1243 | IntPtrTy, false); | |||
| 1244 | Constant *Null = Constant::getNullValue(C->getType()); | |||
| 1245 | return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); | |||
| 1246 | } | |||
| 1247 | ||||
| 1248 | // Only do this transformation if the int is intptrty in size, otherwise | |||
| 1249 | // there is a truncation or extension that we aren't modeling. | |||
| 1250 | if (CE0->getOpcode() == Instruction::PtrToInt) { | |||
| 1251 | Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); | |||
| 1252 | if (CE0->getType() == IntPtrTy) { | |||
| 1253 | Constant *C = CE0->getOperand(0); | |||
| 1254 | Constant *Null = Constant::getNullValue(C->getType()); | |||
| 1255 | return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); | |||
| 1256 | } | |||
| 1257 | } | |||
| 1258 | } | |||
| 1259 | ||||
| 1260 | if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) { | |||
| 1261 | if (CE0->getOpcode() == CE1->getOpcode()) { | |||
| 1262 | if (CE0->getOpcode() == Instruction::IntToPtr) { | |||
| 1263 | Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); | |||
| 1264 | ||||
| 1265 | // Convert the integer value to the right size to ensure we get the | |||
| 1266 | // proper extension or truncation. | |||
| 1267 | Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), | |||
| 1268 | IntPtrTy, false); | |||
| 1269 | Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), | |||
| 1270 | IntPtrTy, false); | |||
| 1271 | return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); | |||
| 1272 | } | |||
| 1273 | ||||
| 1274 | // Only do this transformation if the int is intptrty in size, otherwise | |||
| 1275 | // there is a truncation or extension that we aren't modeling. | |||
| 1276 | if (CE0->getOpcode() == Instruction::PtrToInt) { | |||
| 1277 | Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); | |||
| 1278 | if (CE0->getType() == IntPtrTy && | |||
| 1279 | CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { | |||
| 1280 | return ConstantFoldCompareInstOperands( | |||
| 1281 | Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); | |||
| 1282 | } | |||
| 1283 | } | |||
| 1284 | } | |||
| 1285 | } | |||
| 1286 | ||||
| 1287 | // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) | |||
| 1288 | // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) | |||
| 1289 | if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && | |||
| 1290 | CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { | |||
| 1291 | Constant *LHS = ConstantFoldCompareInstOperands( | |||
| 1292 | Predicate, CE0->getOperand(0), Ops1, DL, TLI); | |||
| 1293 | Constant *RHS = ConstantFoldCompareInstOperands( | |||
| 1294 | Predicate, CE0->getOperand(1), Ops1, DL, TLI); | |||
| 1295 | unsigned OpC = | |||
| 1296 | Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; | |||
| 1297 | return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); | |||
| 1298 | } | |||
| 1299 | ||||
| 1300 | // Convert pointer comparison (base+offset1) pred (base+offset2) into | |||
| 1301 | // offset1 pred offset2, for the case where the offset is inbounds. This | |||
| 1302 | // only works for equality and unsigned comparison, as inbounds permits | |||
| 1303 | // crossing the sign boundary. However, the offset comparison itself is | |||
| 1304 | // signed. | |||
| 1305 | if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { | |||
| 1306 | unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); | |||
| 1307 | APInt Offset0(IndexWidth, 0); | |||
| 1308 | Value *Stripped0 = | |||
| 1309 | Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); | |||
| 1310 | APInt Offset1(IndexWidth, 0); | |||
| 1311 | Value *Stripped1 = | |||
| 1312 | Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); | |||
| 1313 | if (Stripped0 == Stripped1) | |||
| 1314 | return ConstantExpr::getCompare( | |||
| 1315 | ICmpInst::getSignedPredicate(Predicate), | |||
| 1316 | ConstantInt::get(CE0->getContext(), Offset0), | |||
| 1317 | ConstantInt::get(CE0->getContext(), Offset1)); | |||
| 1318 | } | |||
| 1319 | } else if (isa<ConstantExpr>(Ops1)) { | |||
| 1320 | // If RHS is a constant expression, but the left side isn't, swap the | |||
| 1321 | // operands and try again. | |||
| 1322 | Predicate = ICmpInst::getSwappedPredicate(Predicate); | |||
| 1323 | return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); | |||
| 1324 | } | |||
| 1325 | ||||
| 1326 | // Flush any denormal constant float input according to denormal handling | |||
| 1327 | // mode. | |||
| 1328 | Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false); | |||
| 1329 | if (!Ops0) | |||
| 1330 | return nullptr; | |||
| 1331 | Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false); | |||
| 1332 | if (!Ops1) | |||
| 1333 | return nullptr; | |||
| 1334 | ||||
| 1335 | return ConstantExpr::getCompare(Predicate, Ops0, Ops1); | |||
| 1336 | } | |||
| 1337 | ||||
| 1338 | Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, | |||
| 1339 | const DataLayout &DL) { | |||
| 1340 | assert(Instruction::isUnaryOp(Opcode))(static_cast <bool> (Instruction::isUnaryOp(Opcode)) ? void (0) : __assert_fail ("Instruction::isUnaryOp(Opcode)", "llvm/lib/Analysis/ConstantFolding.cpp" , 1340, __extension__ __PRETTY_FUNCTION__)); | |||
| 1341 | ||||
| 1342 | return ConstantFoldUnaryInstruction(Opcode, Op); | |||
| 1343 | } | |||
| 1344 | ||||
| 1345 | Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, | |||
| 1346 | Constant *RHS, | |||
| 1347 | const DataLayout &DL) { | |||
| 1348 | assert(Instruction::isBinaryOp(Opcode))(static_cast <bool> (Instruction::isBinaryOp(Opcode)) ? void (0) : __assert_fail ("Instruction::isBinaryOp(Opcode)", "llvm/lib/Analysis/ConstantFolding.cpp", 1348, __extension__ __PRETTY_FUNCTION__)); | |||
| 1349 | if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS)) | |||
| 1350 | if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL)) | |||
| 1351 | return C; | |||
| 1352 | ||||
| 1353 | if (ConstantExpr::isDesirableBinOp(Opcode)) | |||
| 1354 | return ConstantExpr::get(Opcode, LHS, RHS); | |||
| 1355 | return ConstantFoldBinaryInstruction(Opcode, LHS, RHS); | |||
| 1356 | } | |||
| 1357 | ||||
| 1358 | Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I, | |||
| 1359 | bool IsOutput) { | |||
| 1360 | if (!I || !I->getParent() || !I->getFunction()) | |||
| 1361 | return Operand; | |||
| 1362 | ||||
| 1363 | ConstantFP *CFP = dyn_cast<ConstantFP>(Operand); | |||
| 1364 | if (!CFP) | |||
| 1365 | return Operand; | |||
| 1366 | ||||
| 1367 | const APFloat &APF = CFP->getValueAPF(); | |||
| 1368 | // TODO: Should this canonicalize nans? | |||
| 1369 | if (!APF.isDenormal()) | |||
| 1370 | return Operand; | |||
| 1371 | ||||
| 1372 | Type *Ty = CFP->getType(); | |||
| 1373 | DenormalMode DenormMode = | |||
| 1374 | I->getFunction()->getDenormalMode(Ty->getFltSemantics()); | |||
| 1375 | DenormalMode::DenormalModeKind Mode = | |||
| 1376 | IsOutput ? DenormMode.Output : DenormMode.Input; | |||
| 1377 | switch (Mode) { | |||
| 1378 | default: | |||
| 1379 | llvm_unreachable("unknown denormal mode")::llvm::llvm_unreachable_internal("unknown denormal mode", "llvm/lib/Analysis/ConstantFolding.cpp" , 1379); | |||
| 1380 | case DenormalMode::Dynamic: | |||
| 1381 | return nullptr; | |||
| 1382 | case DenormalMode::IEEE: | |||
| 1383 | return Operand; | |||
| 1384 | case DenormalMode::PreserveSign: | |||
| 1385 | if (APF.isDenormal()) { | |||
| 1386 | return ConstantFP::get( | |||
| 1387 | Ty->getContext(), | |||
| 1388 | APFloat::getZero(Ty->getFltSemantics(), APF.isNegative())); | |||
| 1389 | } | |||
| 1390 | return Operand; | |||
| 1391 | case DenormalMode::PositiveZero: | |||
| 1392 | if (APF.isDenormal()) { | |||
| 1393 | return ConstantFP::get(Ty->getContext(), | |||
| 1394 | APFloat::getZero(Ty->getFltSemantics(), false)); | |||
| 1395 | } | |||
| 1396 | return Operand; | |||
| 1397 | } | |||
| 1398 | return Operand; | |||
| 1399 | } | |||
| 1400 | ||||
| 1401 | Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, | |||
| 1402 | Constant *RHS, const DataLayout &DL, | |||
| 1403 | const Instruction *I) { | |||
| 1404 | if (Instruction::isBinaryOp(Opcode)) { | |||
| 1405 | // Flush denormal inputs if needed. | |||
| 1406 | Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); | |||
| 1407 | if (!Op0) | |||
| 1408 | return nullptr; | |||
| 1409 | Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); | |||
| 1410 | if (!Op1) | |||
| 1411 | return nullptr; | |||
| 1412 | ||||
| 1413 | // Calculate constant result. | |||
| 1414 | Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); | |||
| 1415 | if (!C) | |||
| 1416 | return nullptr; | |||
| 1417 | ||||
| 1418 | // Flush denormal output if needed. | |||
| 1419 | return FlushFPConstant(C, I, /* IsOutput */ true); | |||
| 1420 | } | |||
| 1421 | // If instruction lacks a parent/function and the denormal mode cannot be | |||
| 1422 | // determined, use the default (IEEE). | |||
| 1423 | return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); | |||
| 1424 | } | |||
| 1425 | ||||
| 1426 | Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, | |||
| 1427 | Type *DestTy, const DataLayout &DL) { | |||
| 1428 | assert(Instruction::isCast(Opcode))(static_cast <bool> (Instruction::isCast(Opcode)) ? void (0) : __assert_fail ("Instruction::isCast(Opcode)", "llvm/lib/Analysis/ConstantFolding.cpp" , 1428, __extension__ __PRETTY_FUNCTION__)); | |||
| 1429 | switch (Opcode) { | |||
| 1430 | default: | |||
| 1431 | llvm_unreachable("Missing case")::llvm::llvm_unreachable_internal("Missing case", "llvm/lib/Analysis/ConstantFolding.cpp" , 1431); | |||
| 1432 | case Instruction::PtrToInt: | |||
| 1433 | if (auto *CE = dyn_cast<ConstantExpr>(C)) { | |||
| 1434 | Constant *FoldedValue = nullptr; | |||
| 1435 | // If the input is a inttoptr, eliminate the pair. This requires knowing | |||
| 1436 | // the width of a pointer, so it can't be done in ConstantExpr::getCast. | |||
| 1437 | if (CE->getOpcode() == Instruction::IntToPtr) { | |||
| 1438 | // zext/trunc the inttoptr to pointer size. | |||
| 1439 | FoldedValue = ConstantExpr::getIntegerCast( | |||
| 1440 | CE->getOperand(0), DL.getIntPtrType(CE->getType()), | |||
| 1441 | /*IsSigned=*/false); | |||
| 1442 | } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) { | |||
| 1443 | // If we have GEP, we can perform the following folds: | |||
| 1444 | // (ptrtoint (gep null, x)) -> x | |||
| 1445 | // (ptrtoint (gep (gep null, x), y) -> x + y, etc. | |||
| 1446 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); | |||
| 1447 | APInt BaseOffset(BitWidth, 0); | |||
| 1448 | auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets( | |||
| 1449 | DL, BaseOffset, /*AllowNonInbounds=*/true)); | |||
| 1450 | if (Base->isNullValue()) { | |||
| 1451 | FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); | |||
| 1452 | } else { | |||
| 1453 | // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V | |||
| 1454 | if (GEP->getNumIndices() == 1 && | |||
| 1455 | GEP->getSourceElementType()->isIntegerTy(8)) { | |||
| 1456 | auto *Ptr = cast<Constant>(GEP->getPointerOperand()); | |||
| 1457 | auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1)); | |||
| 1458 | Type *IntIdxTy = DL.getIndexType(Ptr->getType()); | |||
| 1459 | if (Sub && Sub->getType() == IntIdxTy && | |||
| 1460 | Sub->getOpcode() == Instruction::Sub && | |||
| 1461 | Sub->getOperand(0)->isNullValue()) | |||
| 1462 | FoldedValue = ConstantExpr::getSub( | |||
| 1463 | ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); | |||
| 1464 | } | |||
| 1465 | } | |||
| 1466 | } | |||
| 1467 | if (FoldedValue) { | |||
| 1468 | // Do a zext or trunc to get to the ptrtoint dest size. | |||
| 1469 | return ConstantExpr::getIntegerCast(FoldedValue, DestTy, | |||
| 1470 | /*IsSigned=*/false); | |||
| 1471 | } | |||
| 1472 | } | |||
| 1473 | return ConstantExpr::getCast(Opcode, C, DestTy); | |||
| 1474 | case Instruction::IntToPtr: | |||
| 1475 | // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if | |||
| 1476 | // the int size is >= the ptr size and the address spaces are the same. | |||
| 1477 | // This requires knowing the width of a pointer, so it can't be done in | |||
| 1478 | // ConstantExpr::getCast. | |||
| 1479 | if (auto *CE = dyn_cast<ConstantExpr>(C)) { | |||
| 1480 | if (CE->getOpcode() == Instruction::PtrToInt) { | |||
| 1481 | Constant *SrcPtr = CE->getOperand(0); | |||
| 1482 | unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); | |||
| 1483 | unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); | |||
| 1484 | ||||
| 1485 | if (MidIntSize >= SrcPtrSize) { | |||
| 1486 | unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); | |||
| 1487 | if (SrcAS == DestTy->getPointerAddressSpace()) | |||
| 1488 | return FoldBitCast(CE->getOperand(0), DestTy, DL); | |||
| 1489 | } | |||
| 1490 | } | |||
| 1491 | } | |||
| 1492 | ||||
| 1493 | return ConstantExpr::getCast(Opcode, C, DestTy); | |||
| 1494 | case Instruction::Trunc: | |||
| 1495 | case Instruction::ZExt: | |||
| 1496 | case Instruction::SExt: | |||
| 1497 | case Instruction::FPTrunc: | |||
| 1498 | case Instruction::FPExt: | |||
| 1499 | case Instruction::UIToFP: | |||
| 1500 | case Instruction::SIToFP: | |||
| 1501 | case Instruction::FPToUI: | |||
| 1502 | case Instruction::FPToSI: | |||
| 1503 | case Instruction::AddrSpaceCast: | |||
| 1504 | return ConstantExpr::getCast(Opcode, C, DestTy); | |||
| 1505 | case Instruction::BitCast: | |||
| 1506 | return FoldBitCast(C, DestTy, DL); | |||
| 1507 | } | |||
| 1508 | } | |||
| 1509 | ||||
| 1510 | //===----------------------------------------------------------------------===// | |||
| 1511 | // Constant Folding for Calls | |||
| 1512 | // | |||
| 1513 | ||||
| 1514 | bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { | |||
| 1515 | if (Call->isNoBuiltin()) | |||
| 1516 | return false; | |||
| 1517 | if (Call->getFunctionType() != F->getFunctionType()) | |||
| 1518 | return false; | |||
| 1519 | switch (F->getIntrinsicID()) { | |||
| 1520 | // Operations that do not operate floating-point numbers and do not depend on | |||
| 1521 | // FP environment can be folded even in strictfp functions. | |||
| 1522 | case Intrinsic::bswap: | |||
| 1523 | case Intrinsic::ctpop: | |||
| 1524 | case Intrinsic::ctlz: | |||
| 1525 | case Intrinsic::cttz: | |||
| 1526 | case Intrinsic::fshl: | |||
| 1527 | case Intrinsic::fshr: | |||
| 1528 | case Intrinsic::launder_invariant_group: | |||
| 1529 | case Intrinsic::strip_invariant_group: | |||
| 1530 | case Intrinsic::masked_load: | |||
| 1531 | case Intrinsic::get_active_lane_mask: | |||
| 1532 | case Intrinsic::abs: | |||
| 1533 | case Intrinsic::smax: | |||
| 1534 | case Intrinsic::smin: | |||
| 1535 | case Intrinsic::umax: | |||
| 1536 | case Intrinsic::umin: | |||
| 1537 | case Intrinsic::sadd_with_overflow: | |||
| 1538 | case Intrinsic::uadd_with_overflow: | |||
| 1539 | case Intrinsic::ssub_with_overflow: | |||
| 1540 | case Intrinsic::usub_with_overflow: | |||
| 1541 | case Intrinsic::smul_with_overflow: | |||
| 1542 | case Intrinsic::umul_with_overflow: | |||
| 1543 | case Intrinsic::sadd_sat: | |||
| 1544 | case Intrinsic::uadd_sat: | |||
| 1545 | case Intrinsic::ssub_sat: | |||
| 1546 | case Intrinsic::usub_sat: | |||
| 1547 | case Intrinsic::smul_fix: | |||
| 1548 | case Intrinsic::smul_fix_sat: | |||
| 1549 | case Intrinsic::bitreverse: | |||
| 1550 | case Intrinsic::is_constant: | |||
| 1551 | case Intrinsic::vector_reduce_add: | |||
| 1552 | case Intrinsic::vector_reduce_mul: | |||
| 1553 | case Intrinsic::vector_reduce_and: | |||
| 1554 | case Intrinsic::vector_reduce_or: | |||
| 1555 | case Intrinsic::vector_reduce_xor: | |||
| 1556 | case Intrinsic::vector_reduce_smin: | |||
| 1557 | case Intrinsic::vector_reduce_smax: | |||
| 1558 | case Intrinsic::vector_reduce_umin: | |||
| 1559 | case Intrinsic::vector_reduce_umax: | |||
| 1560 | // Target intrinsics | |||
| 1561 | case Intrinsic::amdgcn_perm: | |||
| 1562 | case Intrinsic::arm_mve_vctp8: | |||
| 1563 | case Intrinsic::arm_mve_vctp16: | |||
| 1564 | case Intrinsic::arm_mve_vctp32: | |||
| 1565 | case Intrinsic::arm_mve_vctp64: | |||
| 1566 | case Intrinsic::aarch64_sve_convert_from_svbool: | |||
| 1567 | // WebAssembly float semantics are always known | |||
| 1568 | case Intrinsic::wasm_trunc_signed: | |||
| 1569 | case Intrinsic::wasm_trunc_unsigned: | |||
| 1570 | return true; | |||
| 1571 | ||||
| 1572 | // Floating point operations cannot be folded in strictfp functions in | |||
| 1573 | // general case. They can be folded if FP environment is known to compiler. | |||
| 1574 | case Intrinsic::minnum: | |||
| 1575 | case Intrinsic::maxnum: | |||
| 1576 | case Intrinsic::minimum: | |||
| 1577 | case Intrinsic::maximum: | |||
| 1578 | case Intrinsic::log: | |||
| 1579 | case Intrinsic::log2: | |||
| 1580 | case Intrinsic::log10: | |||
| 1581 | case Intrinsic::exp: | |||
| 1582 | case Intrinsic::exp2: | |||
| 1583 | case Intrinsic::sqrt: | |||
| 1584 | case Intrinsic::sin: | |||
| 1585 | case Intrinsic::cos: | |||
| 1586 | case Intrinsic::pow: | |||
| 1587 | case Intrinsic::powi: | |||
| 1588 | case Intrinsic::fma: | |||
| 1589 | case Intrinsic::fmuladd: | |||
| 1590 | case Intrinsic::fptoui_sat: | |||
| 1591 | case Intrinsic::fptosi_sat: | |||
| 1592 | case Intrinsic::convert_from_fp16: | |||
| 1593 | case Intrinsic::convert_to_fp16: | |||
| 1594 | case Intrinsic::amdgcn_cos: | |||
| 1595 | case Intrinsic::amdgcn_cubeid: | |||
| 1596 | case Intrinsic::amdgcn_cubema: | |||
| 1597 | case Intrinsic::amdgcn_cubesc: | |||
| 1598 | case Intrinsic::amdgcn_cubetc: | |||
| 1599 | case Intrinsic::amdgcn_fmul_legacy: | |||
| 1600 | case Intrinsic::amdgcn_fma_legacy: | |||
| 1601 | case Intrinsic::amdgcn_fract: | |||
| 1602 | case Intrinsic::amdgcn_ldexp: | |||
| 1603 | case Intrinsic::amdgcn_sin: | |||
| 1604 | // The intrinsics below depend on rounding mode in MXCSR. | |||
| 1605 | case Intrinsic::x86_sse_cvtss2si: | |||
| 1606 | case Intrinsic::x86_sse_cvtss2si64: | |||
| 1607 | case Intrinsic::x86_sse_cvttss2si: | |||
| 1608 | case Intrinsic::x86_sse_cvttss2si64: | |||
| 1609 | case Intrinsic::x86_sse2_cvtsd2si: | |||
| 1610 | case Intrinsic::x86_sse2_cvtsd2si64: | |||
| 1611 | case Intrinsic::x86_sse2_cvttsd2si: | |||
| 1612 | case Intrinsic::x86_sse2_cvttsd2si64: | |||
| 1613 | case Intrinsic::x86_avx512_vcvtss2si32: | |||
| 1614 | case Intrinsic::x86_avx512_vcvtss2si64: | |||
| 1615 | case Intrinsic::x86_avx512_cvttss2si: | |||
| 1616 | case Intrinsic::x86_avx512_cvttss2si64: | |||
| 1617 | case Intrinsic::x86_avx512_vcvtsd2si32: | |||
| 1618 | case Intrinsic::x86_avx512_vcvtsd2si64: | |||
| 1619 | case Intrinsic::x86_avx512_cvttsd2si: | |||
| 1620 | case Intrinsic::x86_avx512_cvttsd2si64: | |||
| 1621 | case Intrinsic::x86_avx512_vcvtss2usi32: | |||
| 1622 | case Intrinsic::x86_avx512_vcvtss2usi64: | |||
| 1623 | case Intrinsic::x86_avx512_cvttss2usi: | |||
| 1624 | case Intrinsic::x86_avx512_cvttss2usi64: | |||
| 1625 | case Intrinsic::x86_avx512_vcvtsd2usi32: | |||
| 1626 | case Intrinsic::x86_avx512_vcvtsd2usi64: | |||
| 1627 | case Intrinsic::x86_avx512_cvttsd2usi: | |||
| 1628 | case Intrinsic::x86_avx512_cvttsd2usi64: | |||
| 1629 | return !Call->isStrictFP(); | |||
| 1630 | ||||
| 1631 | // Sign operations are actually bitwise operations, they do not raise | |||
| 1632 | // exceptions even for SNANs. | |||
| 1633 | case Intrinsic::fabs: | |||
| 1634 | case Intrinsic::copysign: | |||
| 1635 | case Intrinsic::is_fpclass: | |||
| 1636 | // Non-constrained variants of rounding operations means default FP | |||
| 1637 | // environment, they can be folded in any case. | |||
| 1638 | case Intrinsic::ceil: | |||
| 1639 | case Intrinsic::floor: | |||
| 1640 | case Intrinsic::round: | |||
| 1641 | case Intrinsic::roundeven: | |||
| 1642 | case Intrinsic::trunc: | |||
| 1643 | case Intrinsic::nearbyint: | |||
| 1644 | case Intrinsic::rint: | |||
| 1645 | case Intrinsic::canonicalize: | |||
| 1646 | // Constrained intrinsics can be folded if FP environment is known | |||
| 1647 | // to compiler. | |||
| 1648 | case Intrinsic::experimental_constrained_fma: | |||
| 1649 | case Intrinsic::experimental_constrained_fmuladd: | |||
| 1650 | case Intrinsic::experimental_constrained_fadd: | |||
| 1651 | case Intrinsic::experimental_constrained_fsub: | |||
| 1652 | case Intrinsic::experimental_constrained_fmul: | |||
| 1653 | case Intrinsic::experimental_constrained_fdiv: | |||
| 1654 | case Intrinsic::experimental_constrained_frem: | |||
| 1655 | case Intrinsic::experimental_constrained_ceil: | |||
| 1656 | case Intrinsic::experimental_constrained_floor: | |||
| 1657 | case Intrinsic::experimental_constrained_round: | |||
| 1658 | case Intrinsic::experimental_constrained_roundeven: | |||
| 1659 | case Intrinsic::experimental_constrained_trunc: | |||
| 1660 | case Intrinsic::experimental_constrained_nearbyint: | |||
| 1661 | case Intrinsic::experimental_constrained_rint: | |||
| 1662 | case Intrinsic::experimental_constrained_fcmp: | |||
| 1663 | case Intrinsic::experimental_constrained_fcmps: | |||
| 1664 | return true; | |||
| 1665 | default: | |||
| 1666 | return false; | |||
| 1667 | case Intrinsic::not_intrinsic: break; | |||
| 1668 | } | |||
| 1669 | ||||
| 1670 | if (!F->hasName() || Call->isStrictFP()) | |||
| 1671 | return false; | |||
| 1672 | ||||
| 1673 | // In these cases, the check of the length is required. We don't want to | |||
| 1674 | // return true for a name like "cos\0blah" which strcmp would return equal to | |||
| 1675 | // "cos", but has length 8. | |||
| 1676 | StringRef Name = F->getName(); | |||
| 1677 | switch (Name[0]) { | |||
| 1678 | default: | |||
| 1679 | return false; | |||
| 1680 | case 'a': | |||
| 1681 | return Name == "acos" || Name == "acosf" || | |||
| 1682 | Name == "asin" || Name == "asinf" || | |||
| 1683 | Name == "atan" || Name == "atanf" || | |||
| 1684 | Name == "atan2" || Name == "atan2f"; | |||
| 1685 | case 'c': | |||
| 1686 | return Name == "ceil" || Name == "ceilf" || | |||
| 1687 | Name == "cos" || Name == "cosf" || | |||
| 1688 | Name == "cosh" || Name == "coshf"; | |||
| 1689 | case 'e': | |||
| 1690 | return Name == "exp" || Name == "expf" || | |||
| 1691 | Name == "exp2" || Name == "exp2f"; | |||
| 1692 | case 'f': | |||
| 1693 | return Name == "fabs" || Name == "fabsf" || | |||
| 1694 | Name == "floor" || Name == "floorf" || | |||
| 1695 | Name == "fmod" || Name == "fmodf"; | |||
| 1696 | case 'l': | |||
| 1697 | return Name == "log" || Name == "logf" || | |||
| 1698 | Name == "log2" || Name == "log2f" || | |||
| 1699 | Name == "log10" || Name == "log10f"; | |||
| 1700 | case 'n': | |||
| 1701 | return Name == "nearbyint" || Name == "nearbyintf"; | |||
| 1702 | case 'p': | |||
| 1703 | return Name == "pow" || Name == "powf"; | |||
| 1704 | case 'r': | |||
| 1705 | return Name == "remainder" || Name == "remainderf" || | |||
| 1706 | Name == "rint" || Name == "rintf" || | |||
| 1707 | Name == "round" || Name == "roundf"; | |||
| 1708 | case 's': | |||
| 1709 | return Name == "sin" || Name == "sinf" || | |||
| 1710 | Name == "sinh" || Name == "sinhf" || | |||
| 1711 | Name == "sqrt" || Name == "sqrtf"; | |||
| 1712 | case 't': | |||
| 1713 | return Name == "tan" || Name == "tanf" || | |||
| 1714 | Name == "tanh" || Name == "tanhf" || | |||
| 1715 | Name == "trunc" || Name == "truncf"; | |||
| 1716 | case '_': | |||
| 1717 | // Check for various function names that get used for the math functions | |||
| 1718 | // when the header files are preprocessed with the macro | |||
| 1719 | // __FINITE_MATH_ONLY__ enabled. | |||
| 1720 | // The '12' here is the length of the shortest name that can match. | |||
| 1721 | // We need to check the size before looking at Name[1] and Name[2] | |||
| 1722 | // so we may as well check a limit that will eliminate mismatches. | |||
| 1723 | if (Name.size() < 12 || Name[1] != '_') | |||
| 1724 | return false; | |||
| 1725 | switch (Name[2]) { | |||
| 1726 | default: | |||
| 1727 | return false; | |||
| 1728 | case 'a': | |||
| 1729 | return Name == "__acos_finite" || Name == "__acosf_finite" || | |||
| 1730 | Name == "__asin_finite" || Name == "__asinf_finite" || | |||
| 1731 | Name == "__atan2_finite" || Name == "__atan2f_finite"; | |||
| 1732 | case 'c': | |||
| 1733 | return Name == "__cosh_finite" || Name == "__coshf_finite"; | |||
| 1734 | case 'e': | |||
| 1735 | return Name == "__exp_finite" || Name == "__expf_finite" || | |||
| 1736 | Name == "__exp2_finite" || Name == "__exp2f_finite"; | |||
| 1737 | case 'l': | |||
| 1738 | return Name == "__log_finite" || Name == "__logf_finite" || | |||
| 1739 | Name == "__log10_finite" || Name == "__log10f_finite"; | |||
| 1740 | case 'p': | |||
| 1741 | return Name == "__pow_finite" || Name == "__powf_finite"; | |||
| 1742 | case 's': | |||
| 1743 | return Name == "__sinh_finite" || Name == "__sinhf_finite"; | |||
| 1744 | } | |||
| 1745 | } | |||
| 1746 | } | |||
| 1747 | ||||
| 1748 | namespace { | |||
| 1749 | ||||
| 1750 | Constant *GetConstantFoldFPValue(double V, Type *Ty) { | |||
| 1751 | if (Ty->isHalfTy() || Ty->isFloatTy()) { | |||
| 1752 | APFloat APF(V); | |||
| 1753 | bool unused; | |||
| 1754 | APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused); | |||
| 1755 | return ConstantFP::get(Ty->getContext(), APF); | |||
| 1756 | } | |||
| 1757 | if (Ty->isDoubleTy()) | |||
| 1758 | return ConstantFP::get(Ty->getContext(), APFloat(V)); | |||
| 1759 | llvm_unreachable("Can only constant fold half/float/double")::llvm::llvm_unreachable_internal("Can only constant fold half/float/double" , "llvm/lib/Analysis/ConstantFolding.cpp", 1759); | |||
| 1760 | } | |||
| 1761 | ||||
| 1762 | /// Clear the floating-point exception state. | |||
| 1763 | inline void llvm_fenv_clearexcept() { | |||
| 1764 | #if defined(HAVE_FENV_H1) && HAVE_DECL_FE_ALL_EXCEPT1 | |||
| 1765 | feclearexcept(FE_ALL_EXCEPT(0x20 | 0x04 | 0x10 | 0x08 | 0x01)); | |||
| 1766 | #endif | |||
| 1767 | errno(*__errno_location ()) = 0; | |||
| 1768 | } | |||
| 1769 | ||||
| 1770 | /// Test if a floating-point exception was raised. | |||
| 1771 | inline bool llvm_fenv_testexcept() { | |||
| 1772 | int errno_val = errno(*__errno_location ()); | |||
| 1773 | if (errno_val == ERANGE34 || errno_val == EDOM33) | |||
| 1774 | return true; | |||
| 1775 | #if defined(HAVE_FENV_H1) && HAVE_DECL_FE_ALL_EXCEPT1 && HAVE_DECL_FE_INEXACT1 | |||
| 1776 | if (fetestexcept(FE_ALL_EXCEPT(0x20 | 0x04 | 0x10 | 0x08 | 0x01) & ~FE_INEXACT0x20)) | |||
| 1777 | return true; | |||
| 1778 | #endif | |||
| 1779 | return false; | |||
| 1780 | } | |||
| 1781 | ||||
| 1782 | Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, | |||
| 1783 | Type *Ty) { | |||
| 1784 | llvm_fenv_clearexcept(); | |||
| 1785 | double Result = NativeFP(V.convertToDouble()); | |||
| 1786 | if (llvm_fenv_testexcept()) { | |||
| 1787 | llvm_fenv_clearexcept(); | |||
| 1788 | return nullptr; | |||
| 1789 | } | |||
| 1790 | ||||
| 1791 | return GetConstantFoldFPValue(Result, Ty); | |||
| 1792 | } | |||
| 1793 | ||||
| 1794 | Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), | |||
| 1795 | const APFloat &V, const APFloat &W, Type *Ty) { | |||
| 1796 | llvm_fenv_clearexcept(); | |||
| 1797 | double Result = NativeFP(V.convertToDouble(), W.convertToDouble()); | |||
| 1798 | if (llvm_fenv_testexcept()) { | |||
| 1799 | llvm_fenv_clearexcept(); | |||
| 1800 | return nullptr; | |||
| 1801 | } | |||
| 1802 | ||||
| 1803 | return GetConstantFoldFPValue(Result, Ty); | |||
| 1804 | } | |||
| 1805 | ||||
| 1806 | Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { | |||
| 1807 | FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType()); | |||
| 1808 | if (!VT) | |||
| 1809 | return nullptr; | |||
| 1810 | ||||
| 1811 | // This isn't strictly necessary, but handle the special/common case of zero: | |||
| 1812 | // all integer reductions of a zero input produce zero. | |||
| 1813 | if (isa<ConstantAggregateZero>(Op)) | |||
| 1814 | return ConstantInt::get(VT->getElementType(), 0); | |||
| 1815 | ||||
| 1816 | // This is the same as the underlying binops - poison propagates. | |||
| 1817 | if (isa<PoisonValue>(Op) || Op->containsPoisonElement()) | |||
| 1818 | return PoisonValue::get(VT->getElementType()); | |||
| 1819 | ||||
| 1820 | // TODO: Handle undef. | |||
| 1821 | if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op)) | |||
| 1822 | return nullptr; | |||
| 1823 | ||||
| 1824 | auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U)); | |||
| 1825 | if (!EltC) | |||
| 1826 | return nullptr; | |||
| 1827 | ||||
| 1828 | APInt Acc = EltC->getValue(); | |||
| 1829 | for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) { | |||
| 1830 | if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I)))) | |||
| 1831 | return nullptr; | |||
| 1832 | const APInt &X = EltC->getValue(); | |||
| 1833 | switch (IID) { | |||
| 1834 | case Intrinsic::vector_reduce_add: | |||
| 1835 | Acc = Acc + X; | |||
| 1836 | break; | |||
| 1837 | case Intrinsic::vector_reduce_mul: | |||
| 1838 | Acc = Acc * X; | |||
| 1839 | break; | |||
| 1840 | case Intrinsic::vector_reduce_and: | |||
| 1841 | Acc = Acc & X; | |||
| 1842 | break; | |||
| 1843 | case Intrinsic::vector_reduce_or: | |||
| 1844 | Acc = Acc | X; | |||
| 1845 | break; | |||
| 1846 | case Intrinsic::vector_reduce_xor: | |||
| 1847 | Acc = Acc ^ X; | |||
| 1848 | break; | |||
| 1849 | case Intrinsic::vector_reduce_smin: | |||
| 1850 | Acc = APIntOps::smin(Acc, X); | |||
| 1851 | break; | |||
| 1852 | case Intrinsic::vector_reduce_smax: | |||
| 1853 | Acc = APIntOps::smax(Acc, X); | |||
| 1854 | break; | |||
| 1855 | case Intrinsic::vector_reduce_umin: | |||
| 1856 | Acc = APIntOps::umin(Acc, X); | |||
| 1857 | break; | |||
| 1858 | case Intrinsic::vector_reduce_umax: | |||
| 1859 | Acc = APIntOps::umax(Acc, X); | |||
| 1860 | break; | |||
| 1861 | } | |||
| 1862 | } | |||
| 1863 | ||||
| 1864 | return ConstantInt::get(Op->getContext(), Acc); | |||
| 1865 | } | |||
| 1866 | ||||
| 1867 | /// Attempt to fold an SSE floating point to integer conversion of a constant | |||
| 1868 | /// floating point. If roundTowardZero is false, the default IEEE rounding is | |||
| 1869 | /// used (toward nearest, ties to even). This matches the behavior of the | |||
| 1870 | /// non-truncating SSE instructions in the default rounding mode. The desired | |||
| 1871 | /// integer type Ty is used to select how many bits are available for the | |||
| 1872 | /// result. Returns null if the conversion cannot be performed, otherwise | |||
| 1873 | /// returns the Constant value resulting from the conversion. | |||
| 1874 | Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero, | |||
| 1875 | Type *Ty, bool IsSigned) { | |||
| 1876 | // All of these conversion intrinsics form an integer of at most 64bits. | |||
| 1877 | unsigned ResultWidth = Ty->getIntegerBitWidth(); | |||
| 1878 | assert(ResultWidth <= 64 &&(static_cast <bool> (ResultWidth <= 64 && "Can only constant fold conversions to 64 and 32 bit ints" ) ? void (0) : __assert_fail ("ResultWidth <= 64 && \"Can only constant fold conversions to 64 and 32 bit ints\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 1879, __extension__ __PRETTY_FUNCTION__)) | |||
| 1879 | "Can only constant fold conversions to 64 and 32 bit ints")(static_cast <bool> (ResultWidth <= 64 && "Can only constant fold conversions to 64 and 32 bit ints" ) ? void (0) : __assert_fail ("ResultWidth <= 64 && \"Can only constant fold conversions to 64 and 32 bit ints\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 1879, __extension__ __PRETTY_FUNCTION__)); | |||
| 1880 | ||||
| 1881 | uint64_t UIntVal; | |||
| 1882 | bool isExact = false; | |||
| 1883 | APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero | |||
| 1884 | : APFloat::rmNearestTiesToEven; | |||
| 1885 | APFloat::opStatus status = | |||
| 1886 | Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth, | |||
| 1887 | IsSigned, mode, &isExact); | |||
| 1888 | if (status != APFloat::opOK && | |||
| 1889 | (!roundTowardZero || status != APFloat::opInexact)) | |||
| 1890 | return nullptr; | |||
| 1891 | return ConstantInt::get(Ty, UIntVal, IsSigned); | |||
| 1892 | } | |||
| 1893 | ||||
| 1894 | double getValueAsDouble(ConstantFP *Op) { | |||
| 1895 | Type *Ty = Op->getType(); | |||
| 1896 | ||||
| 1897 | if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) | |||
| 1898 | return Op->getValueAPF().convertToDouble(); | |||
| 1899 | ||||
| 1900 | bool unused; | |||
| 1901 | APFloat APF = Op->getValueAPF(); | |||
| 1902 | APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); | |||
| 1903 | return APF.convertToDouble(); | |||
| 1904 | } | |||
| 1905 | ||||
| 1906 | static bool getConstIntOrUndef(Value *Op, const APInt *&C) { | |||
| 1907 | if (auto *CI = dyn_cast<ConstantInt>(Op)) { | |||
| 1908 | C = &CI->getValue(); | |||
| 1909 | return true; | |||
| 1910 | } | |||
| 1911 | if (isa<UndefValue>(Op)) { | |||
| 1912 | C = nullptr; | |||
| 1913 | return true; | |||
| 1914 | } | |||
| 1915 | return false; | |||
| 1916 | } | |||
| 1917 | ||||
| 1918 | /// Checks if the given intrinsic call, which evaluates to constant, is allowed | |||
| 1919 | /// to be folded. | |||
| 1920 | /// | |||
| 1921 | /// \param CI Constrained intrinsic call. | |||
| 1922 | /// \param St Exception flags raised during constant evaluation. | |||
| 1923 | static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, | |||
| 1924 | APFloat::opStatus St) { | |||
| 1925 | std::optional<RoundingMode> ORM = CI->getRoundingMode(); | |||
| 1926 | std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); | |||
| 1927 | ||||
| 1928 | // If the operation does not change exception status flags, it is safe | |||
| 1929 | // to fold. | |||
| 1930 | if (St == APFloat::opStatus::opOK) | |||
| 1931 | return true; | |||
| 1932 | ||||
| 1933 | // If evaluation raised FP exception, the result can depend on rounding | |||
| 1934 | // mode. If the latter is unknown, folding is not possible. | |||
| 1935 | if (ORM && *ORM == RoundingMode::Dynamic) | |||
| 1936 | return false; | |||
| 1937 | ||||
| 1938 | // If FP exceptions are ignored, fold the call, even if such exception is | |||
| 1939 | // raised. | |||
| 1940 | if (EB && *EB != fp::ExceptionBehavior::ebStrict) | |||
| 1941 | return true; | |||
| 1942 | ||||
| 1943 | // Leave the calculation for runtime so that exception flags be correctly set | |||
| 1944 | // in hardware. | |||
| 1945 | return false; | |||
| 1946 | } | |||
| 1947 | ||||
| 1948 | /// Returns the rounding mode that should be used for constant evaluation. | |||
| 1949 | static RoundingMode | |||
| 1950 | getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { | |||
| 1951 | std::optional<RoundingMode> ORM = CI->getRoundingMode(); | |||
| 1952 | if (!ORM || *ORM == RoundingMode::Dynamic) | |||
| 1953 | // Even if the rounding mode is unknown, try evaluating the operation. | |||
| 1954 | // If it does not raise inexact exception, rounding was not applied, | |||
| 1955 | // so the result is exact and does not depend on rounding mode. Whether | |||
| 1956 | // other FP exceptions are raised, it does not depend on rounding mode. | |||
| 1957 | return RoundingMode::NearestTiesToEven; | |||
| 1958 | return *ORM; | |||
| 1959 | } | |||
| 1960 | ||||
| 1961 | /// Try to constant fold llvm.canonicalize for the given caller and value. | |||
| 1962 | static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI, | |||
| 1963 | const APFloat &Src) { | |||
| 1964 | // Zero, positive and negative, is always OK to fold. | |||
| 1965 | if (Src.isZero()) { | |||
| 1966 | // Get a fresh 0, since ppc_fp128 does have non-canonical zeros. | |||
| 1967 | return ConstantFP::get( | |||
| 1968 | CI->getContext(), | |||
| 1969 | APFloat::getZero(Src.getSemantics(), Src.isNegative())); | |||
| 1970 | } | |||
| 1971 | ||||
| 1972 | if (!Ty->isIEEELikeFPTy()) | |||
| 1973 | return nullptr; | |||
| 1974 | ||||
| 1975 | // Zero is always canonical and the sign must be preserved. | |||
| 1976 | // | |||
| 1977 | // Denorms and nans may have special encodings, but it should be OK to fold a | |||
| 1978 | // totally average number. | |||
| 1979 | if (Src.isNormal() || Src.isInfinity()) | |||
| 1980 | return ConstantFP::get(CI->getContext(), Src); | |||
| 1981 | ||||
| 1982 | if (Src.isDenormal() && CI->getParent() && CI->getFunction()) { | |||
| 1983 | DenormalMode DenormMode = | |||
| 1984 | CI->getFunction()->getDenormalMode(Src.getSemantics()); | |||
| 1985 | ||||
| 1986 | // TODO: Should allow folding for pure IEEE. | |||
| 1987 | if (DenormMode == DenormalMode::getIEEE()) | |||
| 1988 | return nullptr; | |||
| 1989 | ||||
| 1990 | if (DenormMode == DenormalMode::getDynamic()) | |||
| 1991 | return nullptr; | |||
| 1992 | ||||
| 1993 | // If we know if either input or output is flushed, we can fold. | |||
| 1994 | if ((DenormMode.Input == DenormalMode::Dynamic && | |||
| 1995 | DenormMode.Output == DenormalMode::IEEE) || | |||
| 1996 | (DenormMode.Input == DenormalMode::IEEE && | |||
| 1997 | DenormMode.Output == DenormalMode::Dynamic)) | |||
| 1998 | return nullptr; | |||
| 1999 | ||||
| 2000 | bool IsPositive = | |||
| 2001 | (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero || | |||
| 2002 | (DenormMode.Output == DenormalMode::PositiveZero && | |||
| 2003 | DenormMode.Input == DenormalMode::IEEE)); | |||
| 2004 | ||||
| 2005 | return ConstantFP::get(CI->getContext(), | |||
| 2006 | APFloat::getZero(Src.getSemantics(), !IsPositive)); | |||
| 2007 | } | |||
| 2008 | ||||
| 2009 | return nullptr; | |||
| 2010 | } | |||
| 2011 | ||||
| 2012 | static Constant *ConstantFoldScalarCall1(StringRef Name, | |||
| 2013 | Intrinsic::ID IntrinsicID, | |||
| 2014 | Type *Ty, | |||
| 2015 | ArrayRef<Constant *> Operands, | |||
| 2016 | const TargetLibraryInfo *TLI, | |||
| 2017 | const CallBase *Call) { | |||
| 2018 | assert(Operands.size() == 1 && "Wrong number of operands.")(static_cast <bool> (Operands.size() == 1 && "Wrong number of operands." ) ? void (0) : __assert_fail ("Operands.size() == 1 && \"Wrong number of operands.\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2018, __extension__ __PRETTY_FUNCTION__)); | |||
| 2019 | ||||
| 2020 | if (IntrinsicID == Intrinsic::is_constant) { | |||
| 2021 | // We know we have a "Constant" argument. But we want to only | |||
| 2022 | // return true for manifest constants, not those that depend on | |||
| 2023 | // constants with unknowable values, e.g. GlobalValue or BlockAddress. | |||
| 2024 | if (Operands[0]->isManifestConstant()) | |||
| 2025 | return ConstantInt::getTrue(Ty->getContext()); | |||
| 2026 | return nullptr; | |||
| 2027 | } | |||
| 2028 | ||||
| 2029 | if (isa<PoisonValue>(Operands[0])) { | |||
| 2030 | // TODO: All of these operations should probably propagate poison. | |||
| 2031 | if (IntrinsicID == Intrinsic::canonicalize) | |||
| 2032 | return PoisonValue::get(Ty); | |||
| 2033 | } | |||
| 2034 | ||||
| 2035 | if (isa<UndefValue>(Operands[0])) { | |||
| 2036 | // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. | |||
| 2037 | // ctpop() is between 0 and bitwidth, pick 0 for undef. | |||
| 2038 | // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). | |||
| 2039 | if (IntrinsicID == Intrinsic::cos || | |||
| 2040 | IntrinsicID == Intrinsic::ctpop || | |||
| 2041 | IntrinsicID == Intrinsic::fptoui_sat || | |||
| 2042 | IntrinsicID == Intrinsic::fptosi_sat || | |||
| 2043 | IntrinsicID == Intrinsic::canonicalize) | |||
| 2044 | return Constant::getNullValue(Ty); | |||
| 2045 | if (IntrinsicID == Intrinsic::bswap || | |||
| 2046 | IntrinsicID == Intrinsic::bitreverse || | |||
| 2047 | IntrinsicID == Intrinsic::launder_invariant_group || | |||
| 2048 | IntrinsicID == Intrinsic::strip_invariant_group) | |||
| 2049 | return Operands[0]; | |||
| 2050 | } | |||
| 2051 | ||||
| 2052 | if (isa<ConstantPointerNull>(Operands[0])) { | |||
| 2053 | // launder(null) == null == strip(null) iff in addrspace 0 | |||
| 2054 | if (IntrinsicID == Intrinsic::launder_invariant_group || | |||
| 2055 | IntrinsicID == Intrinsic::strip_invariant_group) { | |||
| 2056 | // If instruction is not yet put in a basic block (e.g. when cloning | |||
| 2057 | // a function during inlining), Call's caller may not be available. | |||
| 2058 | // So check Call's BB first before querying Call->getCaller. | |||
| 2059 | const Function *Caller = | |||
| 2060 | Call->getParent() ? Call->getCaller() : nullptr; | |||
| 2061 | if (Caller && | |||
| 2062 | !NullPointerIsDefined( | |||
| 2063 | Caller, Operands[0]->getType()->getPointerAddressSpace())) { | |||
| 2064 | return Operands[0]; | |||
| 2065 | } | |||
| 2066 | return nullptr; | |||
| 2067 | } | |||
| 2068 | } | |||
| 2069 | ||||
| 2070 | if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) { | |||
| 2071 | if (IntrinsicID == Intrinsic::convert_to_fp16) { | |||
| 2072 | APFloat Val(Op->getValueAPF()); | |||
| 2073 | ||||
| 2074 | bool lost = false; | |||
| 2075 | Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); | |||
| 2076 | ||||
| 2077 | return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); | |||
| 2078 | } | |||
| 2079 | ||||
| 2080 | APFloat U = Op->getValueAPF(); | |||
| 2081 | ||||
| 2082 | if (IntrinsicID == Intrinsic::wasm_trunc_signed || | |||
| 2083 | IntrinsicID == Intrinsic::wasm_trunc_unsigned) { | |||
| 2084 | bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed; | |||
| 2085 | ||||
| 2086 | if (U.isNaN()) | |||
| 2087 | return nullptr; | |||
| 2088 | ||||
| 2089 | unsigned Width = Ty->getIntegerBitWidth(); | |||
| 2090 | APSInt Int(Width, !Signed); | |||
| 2091 | bool IsExact = false; | |||
| 2092 | APFloat::opStatus Status = | |||
| 2093 | U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); | |||
| 2094 | ||||
| 2095 | if (Status == APFloat::opOK || Status == APFloat::opInexact) | |||
| 2096 | return ConstantInt::get(Ty, Int); | |||
| 2097 | ||||
| 2098 | return nullptr; | |||
| 2099 | } | |||
| 2100 | ||||
| 2101 | if (IntrinsicID == Intrinsic::fptoui_sat || | |||
| 2102 | IntrinsicID == Intrinsic::fptosi_sat) { | |||
| 2103 | // convertToInteger() already has the desired saturation semantics. | |||
| 2104 | APSInt Int(Ty->getIntegerBitWidth(), | |||
| 2105 | IntrinsicID == Intrinsic::fptoui_sat); | |||
| 2106 | bool IsExact; | |||
| 2107 | U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); | |||
| 2108 | return ConstantInt::get(Ty, Int); | |||
| 2109 | } | |||
| 2110 | ||||
| 2111 | if (IntrinsicID == Intrinsic::canonicalize) | |||
| 2112 | return constantFoldCanonicalize(Ty, Call, U); | |||
| 2113 | ||||
| 2114 | if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) | |||
| 2115 | return nullptr; | |||
| 2116 | ||||
| 2117 | // Use internal versions of these intrinsics. | |||
| 2118 | ||||
| 2119 | if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { | |||
| 2120 | U.roundToIntegral(APFloat::rmNearestTiesToEven); | |||
| 2121 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2122 | } | |||
| 2123 | ||||
| 2124 | if (IntrinsicID == Intrinsic::round) { | |||
| 2125 | U.roundToIntegral(APFloat::rmNearestTiesToAway); | |||
| 2126 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2127 | } | |||
| 2128 | ||||
| 2129 | if (IntrinsicID == Intrinsic::roundeven) { | |||
| 2130 | U.roundToIntegral(APFloat::rmNearestTiesToEven); | |||
| 2131 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2132 | } | |||
| 2133 | ||||
| 2134 | if (IntrinsicID == Intrinsic::ceil) { | |||
| 2135 | U.roundToIntegral(APFloat::rmTowardPositive); | |||
| 2136 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2137 | } | |||
| 2138 | ||||
| 2139 | if (IntrinsicID == Intrinsic::floor) { | |||
| 2140 | U.roundToIntegral(APFloat::rmTowardNegative); | |||
| 2141 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2142 | } | |||
| 2143 | ||||
| 2144 | if (IntrinsicID == Intrinsic::trunc) { | |||
| 2145 | U.roundToIntegral(APFloat::rmTowardZero); | |||
| 2146 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2147 | } | |||
| 2148 | ||||
| 2149 | if (IntrinsicID == Intrinsic::fabs) { | |||
| 2150 | U.clearSign(); | |||
| 2151 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2152 | } | |||
| 2153 | ||||
| 2154 | if (IntrinsicID == Intrinsic::amdgcn_fract) { | |||
| 2155 | // The v_fract instruction behaves like the OpenCL spec, which defines | |||
| 2156 | // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is | |||
| 2157 | // there to prevent fract(-small) from returning 1.0. It returns the | |||
| 2158 | // largest positive floating-point number less than 1.0." | |||
| 2159 | APFloat FloorU(U); | |||
| 2160 | FloorU.roundToIntegral(APFloat::rmTowardNegative); | |||
| 2161 | APFloat FractU(U - FloorU); | |||
| 2162 | APFloat AlmostOne(U.getSemantics(), 1); | |||
| 2163 | AlmostOne.next(/*nextDown*/ true); | |||
| 2164 | return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); | |||
| 2165 | } | |||
| 2166 | ||||
| 2167 | // Rounding operations (floor, trunc, ceil, round and nearbyint) do not | |||
| 2168 | // raise FP exceptions, unless the argument is signaling NaN. | |||
| 2169 | ||||
| 2170 | std::optional<APFloat::roundingMode> RM; | |||
| 2171 | switch (IntrinsicID) { | |||
| 2172 | default: | |||
| 2173 | break; | |||
| 2174 | case Intrinsic::experimental_constrained_nearbyint: | |||
| 2175 | case Intrinsic::experimental_constrained_rint: { | |||
| 2176 | auto CI = cast<ConstrainedFPIntrinsic>(Call); | |||
| 2177 | RM = CI->getRoundingMode(); | |||
| 2178 | if (!RM || *RM == RoundingMode::Dynamic) | |||
| 2179 | return nullptr; | |||
| 2180 | break; | |||
| 2181 | } | |||
| 2182 | case Intrinsic::experimental_constrained_round: | |||
| 2183 | RM = APFloat::rmNearestTiesToAway; | |||
| 2184 | break; | |||
| 2185 | case Intrinsic::experimental_constrained_ceil: | |||
| 2186 | RM = APFloat::rmTowardPositive; | |||
| 2187 | break; | |||
| 2188 | case Intrinsic::experimental_constrained_floor: | |||
| 2189 | RM = APFloat::rmTowardNegative; | |||
| 2190 | break; | |||
| 2191 | case Intrinsic::experimental_constrained_trunc: | |||
| 2192 | RM = APFloat::rmTowardZero; | |||
| 2193 | break; | |||
| 2194 | } | |||
| 2195 | if (RM) { | |||
| 2196 | auto CI = cast<ConstrainedFPIntrinsic>(Call); | |||
| 2197 | if (U.isFinite()) { | |||
| 2198 | APFloat::opStatus St = U.roundToIntegral(*RM); | |||
| 2199 | if (IntrinsicID == Intrinsic::experimental_constrained_rint && | |||
| 2200 | St == APFloat::opInexact) { | |||
| 2201 | std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); | |||
| 2202 | if (EB && *EB == fp::ebStrict) | |||
| 2203 | return nullptr; | |||
| 2204 | } | |||
| 2205 | } else if (U.isSignaling()) { | |||
| 2206 | std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior(); | |||
| 2207 | if (EB && *EB != fp::ebIgnore) | |||
| 2208 | return nullptr; | |||
| 2209 | U = APFloat::getQNaN(U.getSemantics()); | |||
| 2210 | } | |||
| 2211 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2212 | } | |||
| 2213 | ||||
| 2214 | /// We only fold functions with finite arguments. Folding NaN and inf is | |||
| 2215 | /// likely to be aborted with an exception anyway, and some host libms | |||
| 2216 | /// have known errors raising exceptions. | |||
| 2217 | if (!U.isFinite()) | |||
| 2218 | return nullptr; | |||
| 2219 | ||||
| 2220 | /// Currently APFloat versions of these functions do not exist, so we use | |||
| 2221 | /// the host native double versions. Float versions are not called | |||
| 2222 | /// directly but for all these it is true (float)(f((double)arg)) == | |||
| 2223 | /// f(arg). Long double not supported yet. | |||
| 2224 | const APFloat &APF = Op->getValueAPF(); | |||
| 2225 | ||||
| 2226 | switch (IntrinsicID) { | |||
| 2227 | default: break; | |||
| 2228 | case Intrinsic::log: | |||
| 2229 | return ConstantFoldFP(log, APF, Ty); | |||
| 2230 | case Intrinsic::log2: | |||
| 2231 | // TODO: What about hosts that lack a C99 library? | |||
| 2232 | return ConstantFoldFP(log2, APF, Ty); | |||
| 2233 | case Intrinsic::log10: | |||
| 2234 | // TODO: What about hosts that lack a C99 library? | |||
| 2235 | return ConstantFoldFP(log10, APF, Ty); | |||
| 2236 | case Intrinsic::exp: | |||
| 2237 | return ConstantFoldFP(exp, APF, Ty); | |||
| 2238 | case Intrinsic::exp2: | |||
| 2239 | // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. | |||
| 2240 | return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); | |||
| 2241 | case Intrinsic::sin: | |||
| 2242 | return ConstantFoldFP(sin, APF, Ty); | |||
| 2243 | case Intrinsic::cos: | |||
| 2244 | return ConstantFoldFP(cos, APF, Ty); | |||
| 2245 | case Intrinsic::sqrt: | |||
| 2246 | return ConstantFoldFP(sqrt, APF, Ty); | |||
| 2247 | case Intrinsic::amdgcn_cos: | |||
| 2248 | case Intrinsic::amdgcn_sin: { | |||
| 2249 | double V = getValueAsDouble(Op); | |||
| 2250 | if (V < -256.0 || V > 256.0) | |||
| 2251 | // The gfx8 and gfx9 architectures handle arguments outside the range | |||
| 2252 | // [-256, 256] differently. This should be a rare case so bail out | |||
| 2253 | // rather than trying to handle the difference. | |||
| 2254 | return nullptr; | |||
| 2255 | bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos; | |||
| 2256 | double V4 = V * 4.0; | |||
| 2257 | if (V4 == floor(V4)) { | |||
| 2258 | // Force exact results for quarter-integer inputs. | |||
| 2259 | const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 }; | |||
| 2260 | V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3]; | |||
| 2261 | } else { | |||
| 2262 | if (IsCos) | |||
| 2263 | V = cos(V * 2.0 * numbers::pi); | |||
| 2264 | else | |||
| 2265 | V = sin(V * 2.0 * numbers::pi); | |||
| 2266 | } | |||
| 2267 | return GetConstantFoldFPValue(V, Ty); | |||
| 2268 | } | |||
| 2269 | } | |||
| 2270 | ||||
| 2271 | if (!TLI) | |||
| 2272 | return nullptr; | |||
| 2273 | ||||
| 2274 | LibFunc Func = NotLibFunc; | |||
| 2275 | if (!TLI->getLibFunc(Name, Func)) | |||
| 2276 | return nullptr; | |||
| 2277 | ||||
| 2278 | switch (Func) { | |||
| 2279 | default: | |||
| 2280 | break; | |||
| 2281 | case LibFunc_acos: | |||
| 2282 | case LibFunc_acosf: | |||
| 2283 | case LibFunc_acos_finite: | |||
| 2284 | case LibFunc_acosf_finite: | |||
| 2285 | if (TLI->has(Func)) | |||
| 2286 | return ConstantFoldFP(acos, APF, Ty); | |||
| 2287 | break; | |||
| 2288 | case LibFunc_asin: | |||
| 2289 | case LibFunc_asinf: | |||
| 2290 | case LibFunc_asin_finite: | |||
| 2291 | case LibFunc_asinf_finite: | |||
| 2292 | if (TLI->has(Func)) | |||
| 2293 | return ConstantFoldFP(asin, APF, Ty); | |||
| 2294 | break; | |||
| 2295 | case LibFunc_atan: | |||
| 2296 | case LibFunc_atanf: | |||
| 2297 | if (TLI->has(Func)) | |||
| 2298 | return ConstantFoldFP(atan, APF, Ty); | |||
| 2299 | break; | |||
| 2300 | case LibFunc_ceil: | |||
| 2301 | case LibFunc_ceilf: | |||
| 2302 | if (TLI->has(Func)) { | |||
| 2303 | U.roundToIntegral(APFloat::rmTowardPositive); | |||
| 2304 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2305 | } | |||
| 2306 | break; | |||
| 2307 | case LibFunc_cos: | |||
| 2308 | case LibFunc_cosf: | |||
| 2309 | if (TLI->has(Func)) | |||
| 2310 | return ConstantFoldFP(cos, APF, Ty); | |||
| 2311 | break; | |||
| 2312 | case LibFunc_cosh: | |||
| 2313 | case LibFunc_coshf: | |||
| 2314 | case LibFunc_cosh_finite: | |||
| 2315 | case LibFunc_coshf_finite: | |||
| 2316 | if (TLI->has(Func)) | |||
| 2317 | return ConstantFoldFP(cosh, APF, Ty); | |||
| 2318 | break; | |||
| 2319 | case LibFunc_exp: | |||
| 2320 | case LibFunc_expf: | |||
| 2321 | case LibFunc_exp_finite: | |||
| 2322 | case LibFunc_expf_finite: | |||
| 2323 | if (TLI->has(Func)) | |||
| 2324 | return ConstantFoldFP(exp, APF, Ty); | |||
| 2325 | break; | |||
| 2326 | case LibFunc_exp2: | |||
| 2327 | case LibFunc_exp2f: | |||
| 2328 | case LibFunc_exp2_finite: | |||
| 2329 | case LibFunc_exp2f_finite: | |||
| 2330 | if (TLI->has(Func)) | |||
| 2331 | // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. | |||
| 2332 | return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); | |||
| 2333 | break; | |||
| 2334 | case LibFunc_fabs: | |||
| 2335 | case LibFunc_fabsf: | |||
| 2336 | if (TLI->has(Func)) { | |||
| 2337 | U.clearSign(); | |||
| 2338 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2339 | } | |||
| 2340 | break; | |||
| 2341 | case LibFunc_floor: | |||
| 2342 | case LibFunc_floorf: | |||
| 2343 | if (TLI->has(Func)) { | |||
| 2344 | U.roundToIntegral(APFloat::rmTowardNegative); | |||
| 2345 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2346 | } | |||
| 2347 | break; | |||
| 2348 | case LibFunc_log: | |||
| 2349 | case LibFunc_logf: | |||
| 2350 | case LibFunc_log_finite: | |||
| 2351 | case LibFunc_logf_finite: | |||
| 2352 | if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) | |||
| 2353 | return ConstantFoldFP(log, APF, Ty); | |||
| 2354 | break; | |||
| 2355 | case LibFunc_log2: | |||
| 2356 | case LibFunc_log2f: | |||
| 2357 | case LibFunc_log2_finite: | |||
| 2358 | case LibFunc_log2f_finite: | |||
| 2359 | if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) | |||
| 2360 | // TODO: What about hosts that lack a C99 library? | |||
| 2361 | return ConstantFoldFP(log2, APF, Ty); | |||
| 2362 | break; | |||
| 2363 | case LibFunc_log10: | |||
| 2364 | case LibFunc_log10f: | |||
| 2365 | case LibFunc_log10_finite: | |||
| 2366 | case LibFunc_log10f_finite: | |||
| 2367 | if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) | |||
| 2368 | // TODO: What about hosts that lack a C99 library? | |||
| 2369 | return ConstantFoldFP(log10, APF, Ty); | |||
| 2370 | break; | |||
| 2371 | case LibFunc_nearbyint: | |||
| 2372 | case LibFunc_nearbyintf: | |||
| 2373 | case LibFunc_rint: | |||
| 2374 | case LibFunc_rintf: | |||
| 2375 | if (TLI->has(Func)) { | |||
| 2376 | U.roundToIntegral(APFloat::rmNearestTiesToEven); | |||
| 2377 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2378 | } | |||
| 2379 | break; | |||
| 2380 | case LibFunc_round: | |||
| 2381 | case LibFunc_roundf: | |||
| 2382 | if (TLI->has(Func)) { | |||
| 2383 | U.roundToIntegral(APFloat::rmNearestTiesToAway); | |||
| 2384 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2385 | } | |||
| 2386 | break; | |||
| 2387 | case LibFunc_sin: | |||
| 2388 | case LibFunc_sinf: | |||
| 2389 | if (TLI->has(Func)) | |||
| 2390 | return ConstantFoldFP(sin, APF, Ty); | |||
| 2391 | break; | |||
| 2392 | case LibFunc_sinh: | |||
| 2393 | case LibFunc_sinhf: | |||
| 2394 | case LibFunc_sinh_finite: | |||
| 2395 | case LibFunc_sinhf_finite: | |||
| 2396 | if (TLI->has(Func)) | |||
| 2397 | return ConstantFoldFP(sinh, APF, Ty); | |||
| 2398 | break; | |||
| 2399 | case LibFunc_sqrt: | |||
| 2400 | case LibFunc_sqrtf: | |||
| 2401 | if (!APF.isNegative() && TLI->has(Func)) | |||
| 2402 | return ConstantFoldFP(sqrt, APF, Ty); | |||
| 2403 | break; | |||
| 2404 | case LibFunc_tan: | |||
| 2405 | case LibFunc_tanf: | |||
| 2406 | if (TLI->has(Func)) | |||
| 2407 | return ConstantFoldFP(tan, APF, Ty); | |||
| 2408 | break; | |||
| 2409 | case LibFunc_tanh: | |||
| 2410 | case LibFunc_tanhf: | |||
| 2411 | if (TLI->has(Func)) | |||
| 2412 | return ConstantFoldFP(tanh, APF, Ty); | |||
| 2413 | break; | |||
| 2414 | case LibFunc_trunc: | |||
| 2415 | case LibFunc_truncf: | |||
| 2416 | if (TLI->has(Func)) { | |||
| 2417 | U.roundToIntegral(APFloat::rmTowardZero); | |||
| 2418 | return ConstantFP::get(Ty->getContext(), U); | |||
| 2419 | } | |||
| 2420 | break; | |||
| 2421 | } | |||
| 2422 | return nullptr; | |||
| 2423 | } | |||
| 2424 | ||||
| 2425 | if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { | |||
| 2426 | switch (IntrinsicID) { | |||
| 2427 | case Intrinsic::bswap: | |||
| 2428 | return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); | |||
| 2429 | case Intrinsic::ctpop: | |||
| 2430 | return ConstantInt::get(Ty, Op->getValue().popcount()); | |||
| 2431 | case Intrinsic::bitreverse: | |||
| 2432 | return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); | |||
| 2433 | case Intrinsic::convert_from_fp16: { | |||
| 2434 | APFloat Val(APFloat::IEEEhalf(), Op->getValue()); | |||
| 2435 | ||||
| 2436 | bool lost = false; | |||
| 2437 | APFloat::opStatus status = Val.convert( | |||
| 2438 | Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); | |||
| 2439 | ||||
| 2440 | // Conversion is always precise. | |||
| 2441 | (void)status; | |||
| 2442 | assert(status != APFloat::opInexact && !lost &&(static_cast <bool> (status != APFloat::opInexact && !lost && "Precision lost during fp16 constfolding") ? void (0) : __assert_fail ("status != APFloat::opInexact && !lost && \"Precision lost during fp16 constfolding\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2443, __extension__ __PRETTY_FUNCTION__)) | |||
| 2443 | "Precision lost during fp16 constfolding")(static_cast <bool> (status != APFloat::opInexact && !lost && "Precision lost during fp16 constfolding") ? void (0) : __assert_fail ("status != APFloat::opInexact && !lost && \"Precision lost during fp16 constfolding\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2443, __extension__ __PRETTY_FUNCTION__)); | |||
| 2444 | ||||
| 2445 | return ConstantFP::get(Ty->getContext(), Val); | |||
| 2446 | } | |||
| 2447 | default: | |||
| 2448 | return nullptr; | |||
| 2449 | } | |||
| 2450 | } | |||
| 2451 | ||||
| 2452 | switch (IntrinsicID) { | |||
| 2453 | default: break; | |||
| 2454 | case Intrinsic::vector_reduce_add: | |||
| 2455 | case Intrinsic::vector_reduce_mul: | |||
| 2456 | case Intrinsic::vector_reduce_and: | |||
| 2457 | case Intrinsic::vector_reduce_or: | |||
| 2458 | case Intrinsic::vector_reduce_xor: | |||
| 2459 | case Intrinsic::vector_reduce_smin: | |||
| 2460 | case Intrinsic::vector_reduce_smax: | |||
| 2461 | case Intrinsic::vector_reduce_umin: | |||
| 2462 | case Intrinsic::vector_reduce_umax: | |||
| 2463 | if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0])) | |||
| 2464 | return C; | |||
| 2465 | break; | |||
| 2466 | } | |||
| 2467 | ||||
| 2468 | // Support ConstantVector in case we have an Undef in the top. | |||
| 2469 | if (isa<ConstantVector>(Operands[0]) || | |||
| 2470 | isa<ConstantDataVector>(Operands[0])) { | |||
| 2471 | auto *Op = cast<Constant>(Operands[0]); | |||
| 2472 | switch (IntrinsicID) { | |||
| 2473 | default: break; | |||
| 2474 | case Intrinsic::x86_sse_cvtss2si: | |||
| 2475 | case Intrinsic::x86_sse_cvtss2si64: | |||
| 2476 | case Intrinsic::x86_sse2_cvtsd2si: | |||
| 2477 | case Intrinsic::x86_sse2_cvtsd2si64: | |||
| 2478 | if (ConstantFP *FPOp = | |||
| 2479 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2480 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2481 | /*roundTowardZero=*/false, Ty, | |||
| 2482 | /*IsSigned*/true); | |||
| 2483 | break; | |||
| 2484 | case Intrinsic::x86_sse_cvttss2si: | |||
| 2485 | case Intrinsic::x86_sse_cvttss2si64: | |||
| 2486 | case Intrinsic::x86_sse2_cvttsd2si: | |||
| 2487 | case Intrinsic::x86_sse2_cvttsd2si64: | |||
| 2488 | if (ConstantFP *FPOp = | |||
| 2489 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2490 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2491 | /*roundTowardZero=*/true, Ty, | |||
| 2492 | /*IsSigned*/true); | |||
| 2493 | break; | |||
| 2494 | } | |||
| 2495 | } | |||
| 2496 | ||||
| 2497 | return nullptr; | |||
| 2498 | } | |||
| 2499 | ||||
| 2500 | static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2, | |||
| 2501 | const ConstrainedFPIntrinsic *Call) { | |||
| 2502 | APFloat::opStatus St = APFloat::opOK; | |||
| 2503 | auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call); | |||
| 2504 | FCmpInst::Predicate Cond = FCmp->getPredicate(); | |||
| 2505 | if (FCmp->isSignaling()) { | |||
| 2506 | if (Op1.isNaN() || Op2.isNaN()) | |||
| 2507 | St = APFloat::opInvalidOp; | |||
| 2508 | } else { | |||
| 2509 | if (Op1.isSignaling() || Op2.isSignaling()) | |||
| 2510 | St = APFloat::opInvalidOp; | |||
| 2511 | } | |||
| 2512 | bool Result = FCmpInst::compare(Op1, Op2, Cond); | |||
| 2513 | if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St)) | |||
| 2514 | return ConstantInt::get(Call->getType()->getScalarType(), Result); | |||
| 2515 | return nullptr; | |||
| 2516 | } | |||
| 2517 | ||||
| 2518 | static Constant *ConstantFoldScalarCall2(StringRef Name, | |||
| 2519 | Intrinsic::ID IntrinsicID, | |||
| 2520 | Type *Ty, | |||
| 2521 | ArrayRef<Constant *> Operands, | |||
| 2522 | const TargetLibraryInfo *TLI, | |||
| 2523 | const CallBase *Call) { | |||
| 2524 | assert(Operands.size() == 2 && "Wrong number of operands.")(static_cast <bool> (Operands.size() == 2 && "Wrong number of operands." ) ? void (0) : __assert_fail ("Operands.size() == 2 && \"Wrong number of operands.\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2524, __extension__ __PRETTY_FUNCTION__)); | |||
| 2525 | ||||
| 2526 | if (Ty->isFloatingPointTy()) { | |||
| 2527 | // TODO: We should have undef handling for all of the FP intrinsics that | |||
| 2528 | // are attempted to be folded in this function. | |||
| 2529 | bool IsOp0Undef = isa<UndefValue>(Operands[0]); | |||
| 2530 | bool IsOp1Undef = isa<UndefValue>(Operands[1]); | |||
| 2531 | switch (IntrinsicID) { | |||
| 2532 | case Intrinsic::maxnum: | |||
| 2533 | case Intrinsic::minnum: | |||
| 2534 | case Intrinsic::maximum: | |||
| 2535 | case Intrinsic::minimum: | |||
| 2536 | // If one argument is undef, return the other argument. | |||
| 2537 | if (IsOp0Undef) | |||
| 2538 | return Operands[1]; | |||
| 2539 | if (IsOp1Undef) | |||
| 2540 | return Operands[0]; | |||
| 2541 | break; | |||
| 2542 | } | |||
| 2543 | } | |||
| 2544 | ||||
| 2545 | if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { | |||
| 2546 | const APFloat &Op1V = Op1->getValueAPF(); | |||
| 2547 | ||||
| 2548 | if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { | |||
| 2549 | if (Op2->getType() != Op1->getType()) | |||
| 2550 | return nullptr; | |||
| 2551 | const APFloat &Op2V = Op2->getValueAPF(); | |||
| 2552 | ||||
| 2553 | if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) { | |||
| 2554 | RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); | |||
| 2555 | APFloat Res = Op1V; | |||
| 2556 | APFloat::opStatus St; | |||
| 2557 | switch (IntrinsicID) { | |||
| 2558 | default: | |||
| 2559 | return nullptr; | |||
| 2560 | case Intrinsic::experimental_constrained_fadd: | |||
| 2561 | St = Res.add(Op2V, RM); | |||
| 2562 | break; | |||
| 2563 | case Intrinsic::experimental_constrained_fsub: | |||
| 2564 | St = Res.subtract(Op2V, RM); | |||
| 2565 | break; | |||
| 2566 | case Intrinsic::experimental_constrained_fmul: | |||
| 2567 | St = Res.multiply(Op2V, RM); | |||
| 2568 | break; | |||
| 2569 | case Intrinsic::experimental_constrained_fdiv: | |||
| 2570 | St = Res.divide(Op2V, RM); | |||
| 2571 | break; | |||
| 2572 | case Intrinsic::experimental_constrained_frem: | |||
| 2573 | St = Res.mod(Op2V); | |||
| 2574 | break; | |||
| 2575 | case Intrinsic::experimental_constrained_fcmp: | |||
| 2576 | case Intrinsic::experimental_constrained_fcmps: | |||
| 2577 | return evaluateCompare(Op1V, Op2V, ConstrIntr); | |||
| 2578 | } | |||
| 2579 | if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), | |||
| 2580 | St)) | |||
| 2581 | return ConstantFP::get(Ty->getContext(), Res); | |||
| 2582 | return nullptr; | |||
| 2583 | } | |||
| 2584 | ||||
| 2585 | switch (IntrinsicID) { | |||
| 2586 | default: | |||
| 2587 | break; | |||
| 2588 | case Intrinsic::copysign: | |||
| 2589 | return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V)); | |||
| 2590 | case Intrinsic::minnum: | |||
| 2591 | return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V)); | |||
| 2592 | case Intrinsic::maxnum: | |||
| 2593 | return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V)); | |||
| 2594 | case Intrinsic::minimum: | |||
| 2595 | return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V)); | |||
| 2596 | case Intrinsic::maximum: | |||
| 2597 | return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V)); | |||
| 2598 | } | |||
| 2599 | ||||
| 2600 | if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) | |||
| 2601 | return nullptr; | |||
| 2602 | ||||
| 2603 | switch (IntrinsicID) { | |||
| 2604 | default: | |||
| 2605 | break; | |||
| 2606 | case Intrinsic::pow: | |||
| 2607 | return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); | |||
| 2608 | case Intrinsic::amdgcn_fmul_legacy: | |||
| 2609 | // The legacy behaviour is that multiplying +/- 0.0 by anything, even | |||
| 2610 | // NaN or infinity, gives +0.0. | |||
| 2611 | if (Op1V.isZero() || Op2V.isZero()) | |||
| 2612 | return ConstantFP::getZero(Ty); | |||
| 2613 | return ConstantFP::get(Ty->getContext(), Op1V * Op2V); | |||
| 2614 | } | |||
| 2615 | ||||
| 2616 | if (!TLI) | |||
| 2617 | return nullptr; | |||
| 2618 | ||||
| 2619 | LibFunc Func = NotLibFunc; | |||
| 2620 | if (!TLI->getLibFunc(Name, Func)) | |||
| 2621 | return nullptr; | |||
| 2622 | ||||
| 2623 | switch (Func) { | |||
| 2624 | default: | |||
| 2625 | break; | |||
| 2626 | case LibFunc_pow: | |||
| 2627 | case LibFunc_powf: | |||
| 2628 | case LibFunc_pow_finite: | |||
| 2629 | case LibFunc_powf_finite: | |||
| 2630 | if (TLI->has(Func)) | |||
| 2631 | return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); | |||
| 2632 | break; | |||
| 2633 | case LibFunc_fmod: | |||
| 2634 | case LibFunc_fmodf: | |||
| 2635 | if (TLI->has(Func)) { | |||
| 2636 | APFloat V = Op1->getValueAPF(); | |||
| 2637 | if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) | |||
| 2638 | return ConstantFP::get(Ty->getContext(), V); | |||
| 2639 | } | |||
| 2640 | break; | |||
| 2641 | case LibFunc_remainder: | |||
| 2642 | case LibFunc_remainderf: | |||
| 2643 | if (TLI->has(Func)) { | |||
| 2644 | APFloat V = Op1->getValueAPF(); | |||
| 2645 | if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF())) | |||
| 2646 | return ConstantFP::get(Ty->getContext(), V); | |||
| 2647 | } | |||
| 2648 | break; | |||
| 2649 | case LibFunc_atan2: | |||
| 2650 | case LibFunc_atan2f: | |||
| 2651 | // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm | |||
| 2652 | // (Solaris), so we do not assume a known result for that. | |||
| 2653 | if (Op1V.isZero() && Op2V.isZero()) | |||
| 2654 | return nullptr; | |||
| 2655 | [[fallthrough]]; | |||
| 2656 | case LibFunc_atan2_finite: | |||
| 2657 | case LibFunc_atan2f_finite: | |||
| 2658 | if (TLI->has(Func)) | |||
| 2659 | return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); | |||
| 2660 | break; | |||
| 2661 | } | |||
| 2662 | } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { | |||
| 2663 | switch (IntrinsicID) { | |||
| 2664 | case Intrinsic::is_fpclass: { | |||
| 2665 | FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue()); | |||
| 2666 | bool Result = | |||
| 2667 | ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) || | |||
| 2668 | ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) || | |||
| 2669 | ((Mask & fcNegInf) && Op1V.isNegInfinity()) || | |||
| 2670 | ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) || | |||
| 2671 | ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) || | |||
| 2672 | ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) || | |||
| 2673 | ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) || | |||
| 2674 | ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) || | |||
| 2675 | ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) || | |||
| 2676 | ((Mask & fcPosInf) && Op1V.isPosInfinity()); | |||
| 2677 | return ConstantInt::get(Ty, Result); | |||
| 2678 | } | |||
| 2679 | default: | |||
| 2680 | break; | |||
| 2681 | } | |||
| 2682 | ||||
| 2683 | if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) | |||
| 2684 | return nullptr; | |||
| 2685 | if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) | |||
| 2686 | return ConstantFP::get( | |||
| 2687 | Ty->getContext(), | |||
| 2688 | APFloat((float)std::pow((float)Op1V.convertToDouble(), | |||
| 2689 | (int)Op2C->getZExtValue()))); | |||
| 2690 | if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy()) | |||
| 2691 | return ConstantFP::get( | |||
| 2692 | Ty->getContext(), | |||
| 2693 | APFloat((float)std::pow((float)Op1V.convertToDouble(), | |||
| 2694 | (int)Op2C->getZExtValue()))); | |||
| 2695 | if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy()) | |||
| 2696 | return ConstantFP::get( | |||
| 2697 | Ty->getContext(), | |||
| 2698 | APFloat((double)std::pow(Op1V.convertToDouble(), | |||
| 2699 | (int)Op2C->getZExtValue()))); | |||
| 2700 | ||||
| 2701 | if (IntrinsicID == Intrinsic::amdgcn_ldexp) { | |||
| 2702 | // FIXME: Should flush denorms depending on FP mode, but that's ignored | |||
| 2703 | // everywhere else. | |||
| 2704 | ||||
| 2705 | // scalbn is equivalent to ldexp with float radix 2 | |||
| 2706 | APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(), | |||
| 2707 | APFloat::rmNearestTiesToEven); | |||
| 2708 | return ConstantFP::get(Ty->getContext(), Result); | |||
| 2709 | } | |||
| 2710 | } | |||
| 2711 | return nullptr; | |||
| 2712 | } | |||
| 2713 | ||||
| 2714 | if (Operands[0]->getType()->isIntegerTy() && | |||
| 2715 | Operands[1]->getType()->isIntegerTy()) { | |||
| 2716 | const APInt *C0, *C1; | |||
| 2717 | if (!getConstIntOrUndef(Operands[0], C0) || | |||
| 2718 | !getConstIntOrUndef(Operands[1], C1)) | |||
| 2719 | return nullptr; | |||
| 2720 | ||||
| 2721 | switch (IntrinsicID) { | |||
| 2722 | default: break; | |||
| 2723 | case Intrinsic::smax: | |||
| 2724 | case Intrinsic::smin: | |||
| 2725 | case Intrinsic::umax: | |||
| 2726 | case Intrinsic::umin: | |||
| 2727 | // This is the same as for binary ops - poison propagates. | |||
| 2728 | // TODO: Poison handling should be consolidated. | |||
| 2729 | if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) | |||
| 2730 | return PoisonValue::get(Ty); | |||
| 2731 | ||||
| 2732 | if (!C0 && !C1) | |||
| 2733 | return UndefValue::get(Ty); | |||
| 2734 | if (!C0 || !C1) | |||
| 2735 | return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty); | |||
| 2736 | return ConstantInt::get( | |||
| 2737 | Ty, ICmpInst::compare(*C0, *C1, | |||
| 2738 | MinMaxIntrinsic::getPredicate(IntrinsicID)) | |||
| 2739 | ? *C0 | |||
| 2740 | : *C1); | |||
| 2741 | ||||
| 2742 | case Intrinsic::usub_with_overflow: | |||
| 2743 | case Intrinsic::ssub_with_overflow: | |||
| 2744 | // X - undef -> { 0, false } | |||
| 2745 | // undef - X -> { 0, false } | |||
| 2746 | if (!C0 || !C1) | |||
| 2747 | return Constant::getNullValue(Ty); | |||
| 2748 | [[fallthrough]]; | |||
| 2749 | case Intrinsic::uadd_with_overflow: | |||
| 2750 | case Intrinsic::sadd_with_overflow: | |||
| 2751 | // X + undef -> { -1, false } | |||
| 2752 | // undef + x -> { -1, false } | |||
| 2753 | if (!C0 || !C1) { | |||
| 2754 | return ConstantStruct::get( | |||
| 2755 | cast<StructType>(Ty), | |||
| 2756 | {Constant::getAllOnesValue(Ty->getStructElementType(0)), | |||
| 2757 | Constant::getNullValue(Ty->getStructElementType(1))}); | |||
| 2758 | } | |||
| 2759 | [[fallthrough]]; | |||
| 2760 | case Intrinsic::smul_with_overflow: | |||
| 2761 | case Intrinsic::umul_with_overflow: { | |||
| 2762 | // undef * X -> { 0, false } | |||
| 2763 | // X * undef -> { 0, false } | |||
| 2764 | if (!C0 || !C1) | |||
| 2765 | return Constant::getNullValue(Ty); | |||
| 2766 | ||||
| 2767 | APInt Res; | |||
| 2768 | bool Overflow; | |||
| 2769 | switch (IntrinsicID) { | |||
| 2770 | default: llvm_unreachable("Invalid case")::llvm::llvm_unreachable_internal("Invalid case", "llvm/lib/Analysis/ConstantFolding.cpp" , 2770); | |||
| 2771 | case Intrinsic::sadd_with_overflow: | |||
| 2772 | Res = C0->sadd_ov(*C1, Overflow); | |||
| 2773 | break; | |||
| 2774 | case Intrinsic::uadd_with_overflow: | |||
| 2775 | Res = C0->uadd_ov(*C1, Overflow); | |||
| 2776 | break; | |||
| 2777 | case Intrinsic::ssub_with_overflow: | |||
| 2778 | Res = C0->ssub_ov(*C1, Overflow); | |||
| 2779 | break; | |||
| 2780 | case Intrinsic::usub_with_overflow: | |||
| 2781 | Res = C0->usub_ov(*C1, Overflow); | |||
| 2782 | break; | |||
| 2783 | case Intrinsic::smul_with_overflow: | |||
| 2784 | Res = C0->smul_ov(*C1, Overflow); | |||
| 2785 | break; | |||
| 2786 | case Intrinsic::umul_with_overflow: | |||
| 2787 | Res = C0->umul_ov(*C1, Overflow); | |||
| 2788 | break; | |||
| 2789 | } | |||
| 2790 | Constant *Ops[] = { | |||
| 2791 | ConstantInt::get(Ty->getContext(), Res), | |||
| 2792 | ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) | |||
| 2793 | }; | |||
| 2794 | return ConstantStruct::get(cast<StructType>(Ty), Ops); | |||
| 2795 | } | |||
| 2796 | case Intrinsic::uadd_sat: | |||
| 2797 | case Intrinsic::sadd_sat: | |||
| 2798 | // This is the same as for binary ops - poison propagates. | |||
| 2799 | // TODO: Poison handling should be consolidated. | |||
| 2800 | if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) | |||
| 2801 | return PoisonValue::get(Ty); | |||
| 2802 | ||||
| 2803 | if (!C0 && !C1) | |||
| 2804 | return UndefValue::get(Ty); | |||
| 2805 | if (!C0 || !C1) | |||
| 2806 | return Constant::getAllOnesValue(Ty); | |||
| 2807 | if (IntrinsicID == Intrinsic::uadd_sat) | |||
| 2808 | return ConstantInt::get(Ty, C0->uadd_sat(*C1)); | |||
| 2809 | else | |||
| 2810 | return ConstantInt::get(Ty, C0->sadd_sat(*C1)); | |||
| 2811 | case Intrinsic::usub_sat: | |||
| 2812 | case Intrinsic::ssub_sat: | |||
| 2813 | // This is the same as for binary ops - poison propagates. | |||
| 2814 | // TODO: Poison handling should be consolidated. | |||
| 2815 | if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) | |||
| 2816 | return PoisonValue::get(Ty); | |||
| 2817 | ||||
| 2818 | if (!C0 && !C1) | |||
| 2819 | return UndefValue::get(Ty); | |||
| 2820 | if (!C0 || !C1) | |||
| 2821 | return Constant::getNullValue(Ty); | |||
| 2822 | if (IntrinsicID == Intrinsic::usub_sat) | |||
| 2823 | return ConstantInt::get(Ty, C0->usub_sat(*C1)); | |||
| 2824 | else | |||
| 2825 | return ConstantInt::get(Ty, C0->ssub_sat(*C1)); | |||
| 2826 | case Intrinsic::cttz: | |||
| 2827 | case Intrinsic::ctlz: | |||
| 2828 | assert(C1 && "Must be constant int")(static_cast <bool> (C1 && "Must be constant int" ) ? void (0) : __assert_fail ("C1 && \"Must be constant int\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2828, __extension__ __PRETTY_FUNCTION__)); | |||
| 2829 | ||||
| 2830 | // cttz(0, 1) and ctlz(0, 1) are poison. | |||
| 2831 | if (C1->isOne() && (!C0 || C0->isZero())) | |||
| 2832 | return PoisonValue::get(Ty); | |||
| 2833 | if (!C0) | |||
| 2834 | return Constant::getNullValue(Ty); | |||
| 2835 | if (IntrinsicID == Intrinsic::cttz) | |||
| 2836 | return ConstantInt::get(Ty, C0->countr_zero()); | |||
| 2837 | else | |||
| 2838 | return ConstantInt::get(Ty, C0->countl_zero()); | |||
| 2839 | ||||
| 2840 | case Intrinsic::abs: | |||
| 2841 | assert(C1 && "Must be constant int")(static_cast <bool> (C1 && "Must be constant int" ) ? void (0) : __assert_fail ("C1 && \"Must be constant int\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2841, __extension__ __PRETTY_FUNCTION__)); | |||
| 2842 | assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1")(static_cast <bool> ((C1->isOne() || C1->isZero() ) && "Must be 0 or 1") ? void (0) : __assert_fail ("(C1->isOne() || C1->isZero()) && \"Must be 0 or 1\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 2842, __extension__ __PRETTY_FUNCTION__)); | |||
| 2843 | ||||
| 2844 | // Undef or minimum val operand with poison min --> undef | |||
| 2845 | if (C1->isOne() && (!C0 || C0->isMinSignedValue())) | |||
| 2846 | return UndefValue::get(Ty); | |||
| 2847 | ||||
| 2848 | // Undef operand with no poison min --> 0 (sign bit must be clear) | |||
| 2849 | if (!C0) | |||
| 2850 | return Constant::getNullValue(Ty); | |||
| 2851 | ||||
| 2852 | return ConstantInt::get(Ty, C0->abs()); | |||
| 2853 | } | |||
| 2854 | ||||
| 2855 | return nullptr; | |||
| 2856 | } | |||
| 2857 | ||||
| 2858 | // Support ConstantVector in case we have an Undef in the top. | |||
| 2859 | if ((isa<ConstantVector>(Operands[0]) || | |||
| 2860 | isa<ConstantDataVector>(Operands[0])) && | |||
| 2861 | // Check for default rounding mode. | |||
| 2862 | // FIXME: Support other rounding modes? | |||
| 2863 | isa<ConstantInt>(Operands[1]) && | |||
| 2864 | cast<ConstantInt>(Operands[1])->getValue() == 4) { | |||
| 2865 | auto *Op = cast<Constant>(Operands[0]); | |||
| 2866 | switch (IntrinsicID) { | |||
| 2867 | default: break; | |||
| 2868 | case Intrinsic::x86_avx512_vcvtss2si32: | |||
| 2869 | case Intrinsic::x86_avx512_vcvtss2si64: | |||
| 2870 | case Intrinsic::x86_avx512_vcvtsd2si32: | |||
| 2871 | case Intrinsic::x86_avx512_vcvtsd2si64: | |||
| 2872 | if (ConstantFP *FPOp = | |||
| 2873 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2874 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2875 | /*roundTowardZero=*/false, Ty, | |||
| 2876 | /*IsSigned*/true); | |||
| 2877 | break; | |||
| 2878 | case Intrinsic::x86_avx512_vcvtss2usi32: | |||
| 2879 | case Intrinsic::x86_avx512_vcvtss2usi64: | |||
| 2880 | case Intrinsic::x86_avx512_vcvtsd2usi32: | |||
| 2881 | case Intrinsic::x86_avx512_vcvtsd2usi64: | |||
| 2882 | if (ConstantFP *FPOp = | |||
| 2883 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2884 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2885 | /*roundTowardZero=*/false, Ty, | |||
| 2886 | /*IsSigned*/false); | |||
| 2887 | break; | |||
| 2888 | case Intrinsic::x86_avx512_cvttss2si: | |||
| 2889 | case Intrinsic::x86_avx512_cvttss2si64: | |||
| 2890 | case Intrinsic::x86_avx512_cvttsd2si: | |||
| 2891 | case Intrinsic::x86_avx512_cvttsd2si64: | |||
| 2892 | if (ConstantFP *FPOp = | |||
| 2893 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2894 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2895 | /*roundTowardZero=*/true, Ty, | |||
| 2896 | /*IsSigned*/true); | |||
| 2897 | break; | |||
| 2898 | case Intrinsic::x86_avx512_cvttss2usi: | |||
| 2899 | case Intrinsic::x86_avx512_cvttss2usi64: | |||
| 2900 | case Intrinsic::x86_avx512_cvttsd2usi: | |||
| 2901 | case Intrinsic::x86_avx512_cvttsd2usi64: | |||
| 2902 | if (ConstantFP *FPOp = | |||
| 2903 | dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U))) | |||
| 2904 | return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), | |||
| 2905 | /*roundTowardZero=*/true, Ty, | |||
| 2906 | /*IsSigned*/false); | |||
| 2907 | break; | |||
| 2908 | } | |||
| 2909 | } | |||
| 2910 | return nullptr; | |||
| 2911 | } | |||
| 2912 | ||||
| 2913 | static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID, | |||
| 2914 | const APFloat &S0, | |||
| 2915 | const APFloat &S1, | |||
| 2916 | const APFloat &S2) { | |||
| 2917 | unsigned ID; | |||
| 2918 | const fltSemantics &Sem = S0.getSemantics(); | |||
| 2919 | APFloat MA(Sem), SC(Sem), TC(Sem); | |||
| 2920 | if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) { | |||
| 2921 | if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) { | |||
| 2922 | // S2 < 0 | |||
| 2923 | ID = 5; | |||
| 2924 | SC = -S0; | |||
| 2925 | } else { | |||
| 2926 | ID = 4; | |||
| 2927 | SC = S0; | |||
| 2928 | } | |||
| 2929 | MA = S2; | |||
| 2930 | TC = -S1; | |||
| 2931 | } else if (abs(S1) >= abs(S0)) { | |||
| 2932 | if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) { | |||
| 2933 | // S1 < 0 | |||
| 2934 | ID = 3; | |||
| 2935 | TC = -S2; | |||
| 2936 | } else { | |||
| 2937 | ID = 2; | |||
| 2938 | TC = S2; | |||
| 2939 | } | |||
| 2940 | MA = S1; | |||
| 2941 | SC = S0; | |||
| 2942 | } else { | |||
| 2943 | if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) { | |||
| 2944 | // S0 < 0 | |||
| 2945 | ID = 1; | |||
| 2946 | SC = S2; | |||
| 2947 | } else { | |||
| 2948 | ID = 0; | |||
| 2949 | SC = -S2; | |||
| 2950 | } | |||
| 2951 | MA = S0; | |||
| 2952 | TC = -S1; | |||
| 2953 | } | |||
| 2954 | switch (IntrinsicID) { | |||
| 2955 | default: | |||
| 2956 | llvm_unreachable("unhandled amdgcn cube intrinsic")::llvm::llvm_unreachable_internal("unhandled amdgcn cube intrinsic" , "llvm/lib/Analysis/ConstantFolding.cpp", 2956); | |||
| 2957 | case Intrinsic::amdgcn_cubeid: | |||
| 2958 | return APFloat(Sem, ID); | |||
| 2959 | case Intrinsic::amdgcn_cubema: | |||
| 2960 | return MA + MA; | |||
| 2961 | case Intrinsic::amdgcn_cubesc: | |||
| 2962 | return SC; | |||
| 2963 | case Intrinsic::amdgcn_cubetc: | |||
| 2964 | return TC; | |||
| 2965 | } | |||
| 2966 | } | |||
| 2967 | ||||
| 2968 | static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands, | |||
| 2969 | Type *Ty) { | |||
| 2970 | const APInt *C0, *C1, *C2; | |||
| 2971 | if (!getConstIntOrUndef(Operands[0], C0) || | |||
| 2972 | !getConstIntOrUndef(Operands[1], C1) || | |||
| 2973 | !getConstIntOrUndef(Operands[2], C2)) | |||
| 2974 | return nullptr; | |||
| 2975 | ||||
| 2976 | if (!C2) | |||
| 2977 | return UndefValue::get(Ty); | |||
| 2978 | ||||
| 2979 | APInt Val(32, 0); | |||
| 2980 | unsigned NumUndefBytes = 0; | |||
| 2981 | for (unsigned I = 0; I < 32; I += 8) { | |||
| 2982 | unsigned Sel = C2->extractBitsAsZExtValue(8, I); | |||
| 2983 | unsigned B = 0; | |||
| 2984 | ||||
| 2985 | if (Sel >= 13) | |||
| 2986 | B = 0xff; | |||
| 2987 | else if (Sel == 12) | |||
| 2988 | B = 0x00; | |||
| 2989 | else { | |||
| 2990 | const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1; | |||
| 2991 | if (!Src) | |||
| 2992 | ++NumUndefBytes; | |||
| 2993 | else if (Sel < 8) | |||
| 2994 | B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8); | |||
| 2995 | else | |||
| 2996 | B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff; | |||
| 2997 | } | |||
| 2998 | ||||
| 2999 | Val.insertBits(B, I, 8); | |||
| 3000 | } | |||
| 3001 | ||||
| 3002 | if (NumUndefBytes == 4) | |||
| 3003 | return UndefValue::get(Ty); | |||
| 3004 | ||||
| 3005 | return ConstantInt::get(Ty, Val); | |||
| 3006 | } | |||
| 3007 | ||||
| 3008 | static Constant *ConstantFoldScalarCall3(StringRef Name, | |||
| 3009 | Intrinsic::ID IntrinsicID, | |||
| 3010 | Type *Ty, | |||
| 3011 | ArrayRef<Constant *> Operands, | |||
| 3012 | const TargetLibraryInfo *TLI, | |||
| 3013 | const CallBase *Call) { | |||
| 3014 | assert(Operands.size() == 3 && "Wrong number of operands.")(static_cast <bool> (Operands.size() == 3 && "Wrong number of operands." ) ? void (0) : __assert_fail ("Operands.size() == 3 && \"Wrong number of operands.\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 3014, __extension__ __PRETTY_FUNCTION__)); | |||
| 3015 | ||||
| 3016 | if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) { | |||
| 3017 | if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) { | |||
| 3018 | if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) { | |||
| 3019 | const APFloat &C1 = Op1->getValueAPF(); | |||
| 3020 | const APFloat &C2 = Op2->getValueAPF(); | |||
| 3021 | const APFloat &C3 = Op3->getValueAPF(); | |||
| 3022 | ||||
| 3023 | if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) { | |||
| 3024 | RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); | |||
| 3025 | APFloat Res = C1; | |||
| 3026 | APFloat::opStatus St; | |||
| 3027 | switch (IntrinsicID) { | |||
| 3028 | default: | |||
| 3029 | return nullptr; | |||
| 3030 | case Intrinsic::experimental_constrained_fma: | |||
| 3031 | case Intrinsic::experimental_constrained_fmuladd: | |||
| 3032 | St = Res.fusedMultiplyAdd(C2, C3, RM); | |||
| 3033 | break; | |||
| 3034 | } | |||
| 3035 | if (mayFoldConstrained( | |||
| 3036 | const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St)) | |||
| 3037 | return ConstantFP::get(Ty->getContext(), Res); | |||
| 3038 | return nullptr; | |||
| 3039 | } | |||
| 3040 | ||||
| 3041 | switch (IntrinsicID) { | |||
| 3042 | default: break; | |||
| 3043 | case Intrinsic::amdgcn_fma_legacy: { | |||
| 3044 | // The legacy behaviour is that multiplying +/- 0.0 by anything, even | |||
| 3045 | // NaN or infinity, gives +0.0. | |||
| 3046 | if (C1.isZero() || C2.isZero()) { | |||
| 3047 | // It's tempting to just return C3 here, but that would give the | |||
| 3048 | // wrong result if C3 was -0.0. | |||
| 3049 | return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); | |||
| 3050 | } | |||
| 3051 | [[fallthrough]]; | |||
| 3052 | } | |||
| 3053 | case Intrinsic::fma: | |||
| 3054 | case Intrinsic::fmuladd: { | |||
| 3055 | APFloat V = C1; | |||
| 3056 | V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven); | |||
| 3057 | return ConstantFP::get(Ty->getContext(), V); | |||
| 3058 | } | |||
| 3059 | case Intrinsic::amdgcn_cubeid: | |||
| 3060 | case Intrinsic::amdgcn_cubema: | |||
| 3061 | case Intrinsic::amdgcn_cubesc: | |||
| 3062 | case Intrinsic::amdgcn_cubetc: { | |||
| 3063 | APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3); | |||
| 3064 | return ConstantFP::get(Ty->getContext(), V); | |||
| 3065 | } | |||
| 3066 | } | |||
| 3067 | } | |||
| 3068 | } | |||
| 3069 | } | |||
| 3070 | ||||
| 3071 | if (IntrinsicID == Intrinsic::smul_fix || | |||
| 3072 | IntrinsicID == Intrinsic::smul_fix_sat) { | |||
| 3073 | // poison * C -> poison | |||
| 3074 | // C * poison -> poison | |||
| 3075 | if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1])) | |||
| 3076 | return PoisonValue::get(Ty); | |||
| 3077 | ||||
| 3078 | const APInt *C0, *C1; | |||
| 3079 | if (!getConstIntOrUndef(Operands[0], C0) || | |||
| 3080 | !getConstIntOrUndef(Operands[1], C1)) | |||
| 3081 | return nullptr; | |||
| 3082 | ||||
| 3083 | // undef * C -> 0 | |||
| 3084 | // C * undef -> 0 | |||
| 3085 | if (!C0 || !C1) | |||
| 3086 | return Constant::getNullValue(Ty); | |||
| 3087 | ||||
| 3088 | // This code performs rounding towards negative infinity in case the result | |||
| 3089 | // cannot be represented exactly for the given scale. Targets that do care | |||
| 3090 | // about rounding should use a target hook for specifying how rounding | |||
| 3091 | // should be done, and provide their own folding to be consistent with | |||
| 3092 | // rounding. This is the same approach as used by | |||
| 3093 | // DAGTypeLegalizer::ExpandIntRes_MULFIX. | |||
| 3094 | unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue(); | |||
| 3095 | unsigned Width = C0->getBitWidth(); | |||
| 3096 | assert(Scale < Width && "Illegal scale.")(static_cast <bool> (Scale < Width && "Illegal scale." ) ? void (0) : __assert_fail ("Scale < Width && \"Illegal scale.\"" , "llvm/lib/Analysis/ConstantFolding.cpp", 3096, __extension__ __PRETTY_FUNCTION__)); | |||
| 3097 | unsigned ExtendedWidth = Width * 2; | |||
| 3098 | APInt Product = | |||
| 3099 | (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale); | |||
| 3100 | if (IntrinsicID == Intrinsic::smul_fix_sat) { | |||
| 3101 | APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth); | |||
| 3102 | APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth); | |||
| 3103 | Product = APIntOps::smin(Product, Max); | |||
| 3104 | Product = APIntOps::smax(Product, Min); | |||
| 3105 | } | |||
| 3106 | return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width)); | |||
| 3107 | } | |||
| 3108 | ||||
| 3109 | if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { | |||
| 3110 | const APInt *C0, *C1, *C2; | |||
| 3111 | if (!getConstIntOrUndef(Operands[0], C0) || | |||
| 3112 | !getConstIntOrUndef(Operands[1], C1) || | |||
| 3113 | !getConstIntOrUndef(Operands[2], C2)) | |||
| 3114 | return nullptr; | |||
| 3115 | ||||
| 3116 | bool IsRight = IntrinsicID == Intrinsic::fshr; | |||
| 3117 | if (!C2) | |||
| 3118 | return Operands[IsRight ? 1 : 0]; | |||
| 3119 | if (!C0 && !C1) | |||
| 3120 | return UndefValue::get(Ty); | |||
| 3121 | ||||
| 3122 | // The shift amount is interpreted as modulo the bitwidth. If the shift | |||
| 3123 | // amount is effectively 0, avoid UB due to oversized inverse shift below. | |||
| 3124 | unsigned BitWidth = C2->getBitWidth(); | |||
| 3125 | unsigned ShAmt = C2->urem(BitWidth); | |||
| 3126 | if (!ShAmt) | |||
| 3127 | return Operands[IsRight ? 1 : 0]; | |||
| 3128 | ||||
| 3129 | // (C0 << ShlAmt) | (C1 >> LshrAmt) | |||
| 3130 | unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt; | |||
| 3131 | unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt; | |||
| 3132 | if (!C0) | |||
| 3133 | return ConstantInt::get(Ty, C1->lshr(LshrAmt)); | |||
| 3134 | if (!C1) | |||
| 3135 | return ConstantInt::get(Ty, C0->shl(ShlAmt)); | |||
| 3136 | return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt)); | |||
| 3137 | } | |||
| 3138 | ||||
| 3139 | if (IntrinsicID == Intrinsic::amdgcn_perm) | |||
| 3140 | return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty); | |||
| 3141 | ||||
| 3142 | return nullptr; | |||
| 3143 | } | |||
| 3144 | ||||
| 3145 | static Constant *ConstantFoldScalarCall(StringRef Name, | |||
| 3146 | Intrinsic::ID IntrinsicID, | |||
| 3147 | Type *Ty, | |||
| 3148 | ArrayRef<Constant *> Operands, | |||
| 3149 | const TargetLibraryInfo *TLI, | |||
| 3150 | const CallBase *Call) { | |||
| 3151 | if (Operands.size() == 1) | |||
| 3152 | return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call); | |||
| 3153 | ||||
| 3154 | if (Operands.size() == 2) | |||
| 3155 | return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call); | |||
| 3156 | ||||
| 3157 | if (Operands.size() == 3) | |||
| 3158 | return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call); | |||
| 3159 | ||||
| 3160 | return nullptr; | |||
| 3161 | } | |||
| 3162 | ||||
| 3163 | static Constant *ConstantFoldFixedVectorCall( | |||
| 3164 | StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy, | |||
| 3165 | ArrayRef<Constant *> Operands, const DataLayout &DL, | |||
| 3166 | const TargetLibraryInfo *TLI, const CallBase *Call) { | |||
| 3167 | SmallVector<Constant *, 4> Result(FVTy->getNumElements()); | |||
| 3168 | SmallVector<Constant *, 4> Lane(Operands.size()); | |||
| 3169 | Type *Ty = FVTy->getElementType(); | |||
| 3170 | ||||
| 3171 | switch (IntrinsicID) { | |||
| 3172 | case Intrinsic::masked_load: { | |||
| 3173 | auto *SrcPtr = Operands[0]; | |||
| 3174 | auto *Mask = Operands[2]; | |||
| 3175 | auto *Passthru = Operands[3]; | |||
| 3176 | ||||
| 3177 | Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL); | |||
| 3178 | ||||
| 3179 | SmallVector<Constant *, 32> NewElements; | |||
| 3180 | for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { | |||
| 3181 | auto *MaskElt = Mask->getAggregateElement(I); | |||
| 3182 | if (!MaskElt) | |||
| 3183 | break; | |||
| 3184 | auto *PassthruElt = Passthru->getAggregateElement(I); | |||
| 3185 | auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr; | |||
| 3186 | if (isa<UndefValue>(MaskElt)) { | |||
| 3187 | if (PassthruElt) | |||
| 3188 | NewElements.push_back(PassthruElt); | |||
| 3189 | else if (VecElt) | |||
| 3190 | NewElements.push_back(VecElt); | |||
| 3191 | else | |||
| 3192 | return nullptr; | |||
| 3193 | } | |||
| 3194 | if (MaskElt->isNullValue()) { | |||
| 3195 | if (!PassthruElt) | |||
| 3196 | return nullptr; | |||
| 3197 | NewElements.push_back(PassthruElt); | |||
| 3198 | } else if (MaskElt->isOneValue()) { | |||
| 3199 | if (!VecElt) | |||
| 3200 | return nullptr; | |||
| 3201 | NewElements.push_back(VecElt); | |||
| 3202 | } else { | |||
| 3203 | return nullptr; | |||
| 3204 | } | |||
| 3205 | } | |||
| 3206 | if (NewElements.size() != FVTy->getNumElements()) | |||
| 3207 | return nullptr; | |||
| 3208 | return ConstantVector::get(NewElements); | |||
| 3209 | } | |||
| 3210 | case Intrinsic::arm_mve_vctp8: | |||
| 3211 | case Intrinsic::arm_mve_vctp16: | |||
| 3212 | case Intrinsic::arm_mve_vctp32: | |||
| 3213 | case Intrinsic::arm_mve_vctp64: { | |||
| 3214 | if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) { | |||
| 3215 | unsigned Lanes = FVTy->getNumElements(); | |||
| 3216 | uint64_t Limit = Op->getZExtValue(); | |||
| 3217 | ||||
| 3218 | SmallVector<Constant *, 16> NCs; | |||
| 3219 | for (unsigned i = 0; i < Lanes; i++) { | |||
| 3220 | if (i < Limit) | |||
| 3221 | NCs.push_back(ConstantInt::getTrue(Ty)); | |||
| 3222 | else | |||
| 3223 | NCs.push_back(ConstantInt::getFalse(Ty)); | |||
| 3224 | } | |||
| 3225 | return ConstantVector::get(NCs); | |||
| 3226 | } | |||
| 3227 | return nullptr; | |||
| 3228 | } | |||
| 3229 | case Intrinsic::get_active_lane_mask: { | |||
| 3230 | auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); | |||
| 3231 | auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); | |||
| 3232 | if (Op0 && Op1) { | |||
| 3233 | unsigned Lanes = FVTy->getNumElements(); | |||
| 3234 | uint64_t Base = Op0->getZExtValue(); | |||
| 3235 | uint64_t Limit = Op1->getZExtValue(); | |||
| 3236 | ||||
| 3237 | SmallVector<Constant *, 16> NCs; | |||
| 3238 | for (unsigned i = 0; i < Lanes; i++) { | |||
| 3239 | if (Base + i < Limit) | |||
| 3240 | NCs.push_back(ConstantInt::getTrue(Ty)); | |||
| 3241 | else | |||
| 3242 | NCs.push_back(ConstantInt::getFalse(Ty)); | |||
| 3243 | } | |||
| 3244 | return ConstantVector::get(NCs); | |||
| 3245 | } | |||
| 3246 | return nullptr; | |||
| 3247 | } | |||
| 3248 | default: | |||
| 3249 | break; | |||
| 3250 | } | |||
| 3251 | ||||
| 3252 | for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { | |||
| 3253 | // Gather a column of constants. | |||
| 3254 | for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { | |||
| 3255 | // Some intrinsics use a scalar type for certain arguments. | |||
| 3256 | if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) { | |||
| 3257 | Lane[J] = Operands[J]; | |||
| 3258 | continue; | |||
| 3259 | } | |||
| 3260 | ||||
| 3261 | Constant *Agg = Operands[J]->getAggregateElement(I); | |||
| 3262 | if (!Agg) | |||
| 3263 | return nullptr; | |||
| 3264 | ||||
| 3265 | Lane[J] = Agg; | |||
| 3266 | } | |||
| 3267 | ||||
| 3268 | // Use the regular scalar folding to simplify this column. | |||
| 3269 | Constant *Folded = | |||
| 3270 | ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call); | |||
| 3271 | if (!Folded) | |||
| 3272 | return nullptr; | |||
| 3273 | Result[I] = Folded; | |||
| 3274 | } | |||
| 3275 | ||||
| 3276 | return ConstantVector::get(Result); | |||
| 3277 | } | |||
| 3278 | ||||
| 3279 | static Constant *ConstantFoldScalableVectorCall( | |||
| 3280 | StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy, | |||
| 3281 | ArrayRef<Constant *> Operands, const DataLayout &DL, | |||
| 3282 | const TargetLibraryInfo *TLI, const CallBase *Call) { | |||
| 3283 | switch (IntrinsicID) { | |||
| 3284 | case Intrinsic::aarch64_sve_convert_from_svbool: { | |||
| 3285 | auto *Src = dyn_cast<Constant>(Operands[0]); | |||
| 3286 | if (!Src || !Src->isNullValue()) | |||
| 3287 | break; | |||
| 3288 | ||||
| 3289 | return ConstantInt::getFalse(SVTy); | |||
| 3290 | } | |||
| 3291 | default: | |||
| 3292 | break; | |||
| 3293 | } | |||
| 3294 | return nullptr; | |||
| 3295 | } | |||
| 3296 | ||||
| 3297 | } // end anonymous namespace | |||
| 3298 | ||||
| 3299 | Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, | |||
| 3300 | ArrayRef<Constant *> Operands, | |||
| 3301 | const TargetLibraryInfo *TLI) { | |||
| 3302 | if (Call->isNoBuiltin()) | |||
| 3303 | return nullptr; | |||
| 3304 | if (!F->hasName()) | |||
| 3305 | return nullptr; | |||
| 3306 | ||||
| 3307 | // If this is not an intrinsic and not recognized as a library call, bail out. | |||
| 3308 | if (F->getIntrinsicID() == Intrinsic::not_intrinsic) { | |||
| 3309 | if (!TLI) | |||
| 3310 | return nullptr; | |||
| 3311 | LibFunc LibF; | |||
| 3312 | if (!TLI->getLibFunc(*F, LibF)) | |||
| 3313 | return nullptr; | |||
| 3314 | } | |||
| 3315 | ||||
| 3316 | StringRef Name = F->getName(); | |||
| 3317 | Type *Ty = F->getReturnType(); | |||
| 3318 | if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) | |||
| 3319 | return ConstantFoldFixedVectorCall( | |||
| 3320 | Name, F->getIntrinsicID(), FVTy, Operands, | |||
| 3321 | F->getParent()->getDataLayout(), TLI, Call); | |||
| 3322 | ||||
| 3323 | if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty)) | |||
| 3324 | return ConstantFoldScalableVectorCall( | |||
| 3325 | Name, F->getIntrinsicID(), SVTy, Operands, | |||
| 3326 | F->getParent()->getDataLayout(), TLI, Call); | |||
| 3327 | ||||
| 3328 | // TODO: If this is a library function, we already discovered that above, | |||
| 3329 | // so we should pass the LibFunc, not the name (and it might be better | |||
| 3330 | // still to separate intrinsic handling from libcalls). | |||
| 3331 | return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, | |||
| 3332 | Call); | |||
| 3333 | } | |||
| 3334 | ||||
| 3335 | bool llvm::isMathLibCallNoop(const CallBase *Call, | |||
| 3336 | const TargetLibraryInfo *TLI) { | |||
| 3337 | // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap | |||
| 3338 | // (and to some extent ConstantFoldScalarCall). | |||
| 3339 | if (Call->isNoBuiltin() || Call->isStrictFP()) | |||
| 3340 | return false; | |||
| 3341 | Function *F = Call->getCalledFunction(); | |||
| 3342 | if (!F) | |||
| 3343 | return false; | |||
| 3344 | ||||
| 3345 | LibFunc Func; | |||
| 3346 | if (!TLI || !TLI->getLibFunc(*F, Func)) | |||
| 3347 | return false; | |||
| 3348 | ||||
| 3349 | if (Call->arg_size() == 1) { | |||
| 3350 | if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) { | |||
| 3351 | const APFloat &Op = OpC->getValueAPF(); | |||
| 3352 | switch (Func) { | |||
| 3353 | case LibFunc_logl: | |||
| 3354 | case LibFunc_log: | |||
| 3355 | case LibFunc_logf: | |||
| 3356 | case LibFunc_log2l: | |||
| 3357 | case LibFunc_log2: | |||
| 3358 | case LibFunc_log2f: | |||
| 3359 | case LibFunc_log10l: | |||
| 3360 | case LibFunc_log10: | |||
| 3361 | case LibFunc_log10f: | |||
| 3362 | return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); | |||
| 3363 | ||||
| 3364 | case LibFunc_expl: | |||
| 3365 | case LibFunc_exp: | |||
| 3366 | case LibFunc_expf: | |||
| 3367 | // FIXME: These boundaries are slightly conservative. | |||
| 3368 | if (OpC->getType()->isDoubleTy()) | |||
| 3369 | return !(Op < APFloat(-745.0) || Op > APFloat(709.0)); | |||
| 3370 | if (OpC->getType()->isFloatTy()) | |||
| 3371 | return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f)); | |||
| 3372 | break; | |||
| 3373 | ||||
| 3374 | case LibFunc_exp2l: | |||
| 3375 | case LibFunc_exp2: | |||
| 3376 | case LibFunc_exp2f: | |||
| 3377 | // FIXME: These boundaries are slightly conservative. | |||
| 3378 | if (OpC->getType()->isDoubleTy()) | |||
| 3379 | return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0)); | |||
| 3380 | if (OpC->getType()->isFloatTy()) | |||
| 3381 | return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f)); | |||
| 3382 | break; | |||
| 3383 | ||||
| 3384 | case LibFunc_sinl: | |||
| 3385 | case LibFunc_sin: | |||
| 3386 | case LibFunc_sinf: | |||
| 3387 | case LibFunc_cosl: | |||
| 3388 | case LibFunc_cos: | |||
| 3389 | case LibFunc_cosf: | |||
| 3390 | return !Op.isInfinity(); | |||
| 3391 | ||||
| 3392 | case LibFunc_tanl: | |||
| 3393 | case LibFunc_tan: | |||
| 3394 | case LibFunc_tanf: { | |||
| 3395 | // FIXME: Stop using the host math library. | |||
| 3396 | // FIXME: The computation isn't done in the right precision. | |||
| 3397 | Type *Ty = OpC->getType(); | |||
| 3398 | if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) | |||
| 3399 | return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr; | |||
| 3400 | break; | |||
| 3401 | } | |||
| 3402 | ||||
| 3403 | case LibFunc_atan: | |||
| 3404 | case LibFunc_atanf: | |||
| 3405 | case LibFunc_atanl: | |||
| 3406 | // Per POSIX, this MAY fail if Op is denormal. We choose not failing. | |||
| 3407 | return true; | |||
| 3408 | ||||
| 3409 | ||||
| 3410 | case LibFunc_asinl: | |||
| 3411 | case LibFunc_asin: | |||
| 3412 | case LibFunc_asinf: | |||
| 3413 | case LibFunc_acosl: | |||
| 3414 | case LibFunc_acos: | |||
| 3415 | case LibFunc_acosf: | |||
| 3416 | return !(Op < APFloat(Op.getSemantics(), "-1") || | |||
| 3417 | Op > APFloat(Op.getSemantics(), "1")); | |||
| 3418 | ||||
| 3419 | case LibFunc_sinh: | |||
| 3420 | case LibFunc_cosh: | |||
| 3421 | case LibFunc_sinhf: | |||
| 3422 | case LibFunc_coshf: | |||
| 3423 | case LibFunc_sinhl: | |||
| 3424 | case LibFunc_coshl: | |||
| 3425 | // FIXME: These boundaries are slightly conservative. | |||
| 3426 | if (OpC->getType()->isDoubleTy()) | |||
| 3427 | return !(Op < APFloat(-710.0) || Op > APFloat(710.0)); | |||
| 3428 | if (OpC->getType()->isFloatTy()) | |||
| 3429 | return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f)); | |||
| 3430 | break; | |||
| 3431 | ||||
| 3432 | case LibFunc_sqrtl: | |||
| 3433 | case LibFunc_sqrt: | |||
| 3434 | case LibFunc_sqrtf: | |||
| 3435 | return Op.isNaN() || Op.isZero() || !Op.isNegative(); | |||
| 3436 | ||||
| 3437 | // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, | |||
| 3438 | // maybe others? | |||
| 3439 | default: | |||
| 3440 | break; | |||
| 3441 | } | |||
| 3442 | } | |||
| 3443 | } | |||
| 3444 | ||||
| 3445 | if (Call->arg_size() == 2) { | |||
| 3446 | ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0)); | |||
| 3447 | ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1)); | |||
| 3448 | if (Op0C && Op1C) { | |||
| 3449 | const APFloat &Op0 = Op0C->getValueAPF(); | |||
| 3450 | const APFloat &Op1 = Op1C->getValueAPF(); | |||
| 3451 | ||||
| 3452 | switch (Func) { | |||
| 3453 | case LibFunc_powl: | |||
| 3454 | case LibFunc_pow: | |||
| 3455 | case LibFunc_powf: { | |||
| 3456 | // FIXME: Stop using the host math library. | |||
| 3457 | // FIXME: The computation isn't done in the right precision. | |||
| 3458 | Type *Ty = Op0C->getType(); | |||
| 3459 | if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { | |||
| 3460 | if (Ty == Op1C->getType()) | |||
| 3461 | return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr; | |||
| 3462 | } | |||
| 3463 | break; | |||
| 3464 | } | |||
| 3465 | ||||
| 3466 | case LibFunc_fmodl: | |||
| 3467 | case LibFunc_fmod: | |||
| 3468 | case LibFunc_fmodf: | |||
| 3469 | case LibFunc_remainderl: | |||
| 3470 | case LibFunc_remainder: | |||
| 3471 | case LibFunc_remainderf: | |||
| 3472 | return Op0.isNaN() || Op1.isNaN() || | |||
| 3473 | (!Op0.isInfinity() && !Op1.isZero()); | |||
| 3474 | ||||
| 3475 | case LibFunc_atan2: | |||
| 3476 | case LibFunc_atan2f: | |||
| 3477 | case LibFunc_atan2l: | |||
| 3478 | // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and | |||
| 3479 | // GLIBC and MSVC do not appear to raise an error on those, we | |||
| 3480 | // cannot rely on that behavior. POSIX and C11 say that a domain error | |||
| 3481 | // may occur, so allow for that possibility. | |||
| 3482 | return !Op0.isZero() || !Op1.isZero(); | |||
| 3483 | ||||
| 3484 | default: | |||
| 3485 | break; | |||
| 3486 | } | |||
| 3487 | } | |||
| 3488 | } | |||
| 3489 | ||||
| 3490 | return false; | |||
| 3491 | } | |||
| 3492 | ||||
| 3493 | void TargetFolder::anchor() {} |