LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Bitset.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/Attributes.h"
34#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DataLayout.h"
39#include "llvm/IR/DebugInfo.h"
41#include "llvm/IR/Function.h"
43#include "llvm/IR/InlineAsm.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/IR/IntrinsicsAArch64.h"
50#include "llvm/IR/IntrinsicsAMDGPU.h"
51#include "llvm/IR/IntrinsicsARM.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Statepoint.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/User.h"
60#include "llvm/IR/Value.h"
61#include "llvm/IR/ValueHandle.h"
66#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82#include <vector>
83
84#define DEBUG_TYPE "instcombine"
86
87using namespace llvm;
88using namespace PatternMatch;
89
90STATISTIC(NumSimplified, "Number of library calls simplified");
91
93 "instcombine-guard-widening-window",
94 cl::init(3),
95 cl::desc("How wide an instruction window to bypass looking for "
96 "another guard"));
97
98/// Return the specified type promoted as it would be to pass though a va_arg
99/// area.
101 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
102 if (ITy->getBitWidth() < 32)
103 return Type::getInt32Ty(Ty->getContext());
104 }
105 return Ty;
106}
107
108/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
109/// TODO: This should probably be integrated with visitAllocSites, but that
110/// requires a deeper change to allow either unread or unwritten objects.
112 auto *Src = MI->getRawSource();
113 while (isa<GetElementPtrInst>(Src)) {
114 if (!Src->hasOneUse())
115 return false;
116 Src = cast<Instruction>(Src)->getOperand(0);
117 }
118 return isa<AllocaInst>(Src) && Src->hasOneUse();
119}
120
122 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
123 MaybeAlign CopyDstAlign = MI->getDestAlign();
124 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
125 MI->setDestAlignment(DstAlign);
126 return MI;
127 }
128
129 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
130 MaybeAlign CopySrcAlign = MI->getSourceAlign();
131 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
132 MI->setSourceAlignment(SrcAlign);
133 return MI;
134 }
135
136 // If we have a store to a location which is known constant, we can conclude
137 // that the store must be storing the constant value (else the memory
138 // wouldn't be constant), and this must be a noop.
139 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
140 // Set the size of the copy to 0, it will be deleted on the next iteration.
141 MI->setLength((uint64_t)0);
142 return MI;
143 }
144
145 // If the source is provably undef, the memcpy/memmove doesn't do anything
146 // (unless the transfer is volatile).
147 if (hasUndefSource(MI) && !MI->isVolatile()) {
148 // Set the size of the copy to 0, it will be deleted on the next iteration.
149 MI->setLength((uint64_t)0);
150 return MI;
151 }
152
153 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
154 // load/store.
155 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
156 if (!MemOpLength) return nullptr;
157
158 // Source and destination pointer types are always "i8*" for intrinsic. See
159 // if the size is something we can handle with a single primitive load/store.
160 // A single load+store correctly handles overlapping memory in the memmove
161 // case.
162 uint64_t Size = MemOpLength->getLimitedValue();
163 assert(Size && "0-sized memory transferring should be removed already.");
164
165 if (Size > 8 || (Size&(Size-1)))
166 return nullptr; // If not 1/2/4/8 bytes, exit.
167
168 // If it is an atomic and alignment is less than the size then we will
169 // introduce the unaligned memory access which will be later transformed
170 // into libcall in CodeGen. This is not evident performance gain so disable
171 // it now.
172 if (MI->isAtomic())
173 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
174 return nullptr;
175
176 // Use an integer load+store unless we can find something better.
177 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
178
179 // If the memcpy has metadata describing the members, see if we can get the
180 // TBAA, scope and noalias tags describing our copy.
181 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
182
183 Value *Src = MI->getArgOperand(1);
184 Value *Dest = MI->getArgOperand(0);
185 LoadInst *L = Builder.CreateLoad(IntType, Src);
186 // Alignment from the mem intrinsic will be better, so use it.
187 L->setAlignment(*CopySrcAlign);
188 L->setAAMetadata(AACopyMD);
189 MDNode *LoopMemParallelMD =
190 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
191 if (LoopMemParallelMD)
192 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
193 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
194 if (AccessGroupMD)
195 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
196
197 StoreInst *S = Builder.CreateStore(L, Dest);
198 // Alignment from the mem intrinsic will be better, so use it.
199 S->setAlignment(*CopyDstAlign);
200 S->setAAMetadata(AACopyMD);
201 if (LoopMemParallelMD)
202 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
203 if (AccessGroupMD)
204 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
205 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
206
207 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
208 // non-atomics can be volatile
209 L->setVolatile(MT->isVolatile());
210 S->setVolatile(MT->isVolatile());
211 }
212 if (MI->isAtomic()) {
213 // atomics have to be unordered
214 L->setOrdering(AtomicOrdering::Unordered);
216 }
217
218 // Set the size of the copy to 0, it will be deleted on the next iteration.
219 MI->setLength((uint64_t)0);
220 return MI;
221}
222
224 const Align KnownAlignment =
225 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
226 MaybeAlign MemSetAlign = MI->getDestAlign();
227 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
228 MI->setDestAlignment(KnownAlignment);
229 return MI;
230 }
231
232 // If we have a store to a location which is known constant, we can conclude
233 // that the store must be storing the constant value (else the memory
234 // wouldn't be constant), and this must be a noop.
235 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
236 // Set the size of the copy to 0, it will be deleted on the next iteration.
237 MI->setLength((uint64_t)0);
238 return MI;
239 }
240
241 // Remove memset with an undef value.
242 // FIXME: This is technically incorrect because it might overwrite a poison
243 // value. Change to PoisonValue once #52930 is resolved.
244 if (isa<UndefValue>(MI->getValue())) {
245 // Set the size of the copy to 0, it will be deleted on the next iteration.
246 MI->setLength((uint64_t)0);
247 return MI;
248 }
249
250 // Extract the length and alignment and fill if they are constant.
251 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
252 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
253 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
254 return nullptr;
255 const uint64_t Len = LenC->getLimitedValue();
256 assert(Len && "0-sized memory setting should be removed already.");
257 const Align Alignment = MI->getDestAlign().valueOrOne();
258
259 // If it is an atomic and alignment is less than the size then we will
260 // introduce the unaligned memory access which will be later transformed
261 // into libcall in CodeGen. This is not evident performance gain so disable
262 // it now.
263 if (MI->isAtomic() && Alignment < Len)
264 return nullptr;
265
266 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
267 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
268 Value *Dest = MI->getDest();
269
270 // Extract the fill value and store.
271 Constant *FillVal = ConstantInt::get(
272 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
273 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
274 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
275 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
276 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
277 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
278 }
279
280 S->setAlignment(Alignment);
281 if (MI->isAtomic())
283
284 // Set the size of the copy to 0, it will be deleted on the next iteration.
285 MI->setLength((uint64_t)0);
286 return MI;
287 }
288
289 return nullptr;
290}
291
292// TODO, Obvious Missing Transforms:
293// * Narrow width by halfs excluding zero/undef lanes
294Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
295 Value *LoadPtr = II.getArgOperand(0);
296 const Align Alignment = II.getParamAlign(0).valueOrOne();
297 Value *Mask = II.getArgOperand(1);
298
299 // If the mask is all ones or poison, this is a plain vector load of the 1st
300 // argument.
301 if (match(Mask, m_AllOnesOrPoison())) {
302 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
303 "unmaskedload");
304 L->copyMetadata(II);
305 return L;
306 }
307
308 // If we can unconditionally load from this address, replace with a
309 // load/select idiom.
310 if (isDereferenceablePointer(LoadPtr, II.getType(),
312 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
313 "unmaskedload");
314 LI->copyMetadata(II);
315 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
316 }
317
318 return nullptr;
319}
320
321// TODO, Obvious Missing Transforms:
322// * Single constant active lane -> store
323// * Narrow width by halfs excluding zero/undef lanes
324Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
325 Value *StorePtr = II.getArgOperand(1);
326 Align Alignment = II.getParamAlign(1).valueOrOne();
327 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
328 if (!ConstMask)
329 return nullptr;
330
331 // If the mask is all zeros or poison, this instruction does nothing.
332 if (match(ConstMask, m_ZeroOrPoison()))
334
335 // If the mask is all ones or poison, this is a plain vector store of the 1st
336 // argument.
337 if (match(ConstMask, m_AllOnesOrPoison())) {
338 StoreInst *S =
339 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
340 S->copyMetadata(II);
341 return S;
342 }
343
344 if (isa<ScalableVectorType>(ConstMask->getType()))
345 return nullptr;
346
347 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
348 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
349 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
350 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
351 PoisonElts))
352 return replaceOperand(II, 0, V);
353
354 return nullptr;
355}
356
357// TODO, Obvious Missing Transforms:
358// * Single constant active lane load -> load
359// * Dereferenceable address & few lanes -> scalarize speculative load/selects
360// * Adjacent vector addresses -> masked.load
361// * Narrow width by halfs excluding zero/undef lanes
362// * Vector incrementing address -> vector masked load
363Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
364 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
365 if (!ConstMask)
366 return nullptr;
367
368 // Vector splat address w/known mask -> scalar load
369 // Fold the gather to load the source vector first lane
370 // because it is reloading the same value each time
371 if (ConstMask->isAllOnesValue())
372 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
373 auto *VecTy = cast<VectorType>(II.getType());
374 const Align Alignment = II.getParamAlign(0).valueOrOne();
375 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
376 Alignment, "load.scalar");
377 Value *Shuf =
378 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
380 }
381
382 return nullptr;
383}
384
385// TODO, Obvious Missing Transforms:
386// * Single constant active lane -> store
387// * Adjacent vector addresses -> masked.store
388// * Narrow store width by halfs excluding zero/undef lanes
389// * Vector incrementing address -> vector masked store
390Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
391 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
392 if (!ConstMask)
393 return nullptr;
394
395 // If the mask is all zeros or poison, a scatter does nothing.
396 if (match(ConstMask, m_ZeroOrPoison()))
398
399 // Vector splat address -> scalar store
400 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
401 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
402 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
403 if (maskContainsAllOneOrUndef(ConstMask)) {
404 Align Alignment = II.getParamAlign(1).valueOrOne();
405 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
406 Alignment);
407 S->copyMetadata(II);
408 return S;
409 }
410 }
411 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
412 // lastlane), ptr
413 if (ConstMask->isAllOnesValue()) {
414 Align Alignment = II.getParamAlign(1).valueOrOne();
415 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
416 ElementCount VF = WideLoadTy->getElementCount();
417 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
418 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
419 Value *Extract =
420 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
421 StoreInst *S =
422 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
423 S->copyMetadata(II);
424 return S;
425 }
426 }
427 if (isa<ScalableVectorType>(ConstMask->getType()))
428 return nullptr;
429
430 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
431 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
432 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
433 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
434 PoisonElts))
435 return replaceOperand(II, 0, V);
436 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
437 PoisonElts))
438 return replaceOperand(II, 1, V);
439
440 return nullptr;
441}
442
443/// This function transforms launder.invariant.group and strip.invariant.group
444/// like:
445/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
446/// launder(strip(%x)) -> launder(%x)
447/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
448/// strip(launder(%x)) -> strip(%x)
449/// This is legal because it preserves the most recent information about
450/// the presence or absence of invariant.group.
452 InstCombinerImpl &IC) {
453 auto *Arg = II.getArgOperand(0);
454 auto *StrippedArg = Arg->stripPointerCasts();
455 auto *StrippedInvariantGroupsArg = StrippedArg;
456 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
457 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
458 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
459 break;
460 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
461 }
462 if (StrippedArg == StrippedInvariantGroupsArg)
463 return nullptr; // No launders/strips to remove.
464
465 Value *Result = nullptr;
466
467 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
468 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
469 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
470 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
471 else
473 "simplifyInvariantGroupIntrinsic only handles launder and strip");
474 if (Result->getType()->getPointerAddressSpace() !=
475 II.getType()->getPointerAddressSpace())
476 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
477
478 return cast<Instruction>(Result);
479}
480
482 assert((II.getIntrinsicID() == Intrinsic::cttz ||
483 II.getIntrinsicID() == Intrinsic::ctlz) &&
484 "Expected cttz or ctlz intrinsic");
485 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
486 Value *Op0 = II.getArgOperand(0);
487 Value *Op1 = II.getArgOperand(1);
488 Value *X;
489 // ctlz(bitreverse(x)) -> cttz(x)
490 // cttz(bitreverse(x)) -> ctlz(x)
491 if (match(Op0, m_BitReverse(m_Value(X)))) {
492 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
493 Function *F =
494 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
495 return CallInst::Create(F, {X, II.getArgOperand(1)});
496 }
497
498 if (II.getType()->isIntOrIntVectorTy(1)) {
499 // ctlz/cttz i1 Op0 --> not Op0
500 if (match(Op1, m_Zero()))
501 return BinaryOperator::CreateNot(Op0);
502 // If zero is poison, then the input can be assumed to be "true", so the
503 // instruction simplifies to "false".
504 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
505 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
506 }
507
508 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
509 if (II.hasOneUse() && match(Op1, m_Zero()) &&
510 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
511 II.dropUBImplyingAttrsAndMetadata();
512 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
513 }
514
515 Constant *C;
516
517 if (IsTZ) {
518 // cttz(-x) -> cttz(x)
519 if (match(Op0, m_Neg(m_Value(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(-x & x) -> cttz(x)
523 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
524 return IC.replaceOperand(II, 0, X);
525
526 // cttz(sext(x)) -> cttz(zext(x))
527 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
528 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
529 auto *CttzZext =
530 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
531 return IC.replaceInstUsesWith(II, CttzZext);
532 }
533
534 // Zext doesn't change the number of trailing zeros, so narrow:
535 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
536 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
537 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
538 IC.Builder.getTrue());
539 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
540 return IC.replaceInstUsesWith(II, ZextCttz);
541 }
542
543 // cttz(abs(x)) -> cttz(x)
544 // cttz(nabs(x)) -> cttz(x)
545 Value *Y;
547 if (SPF == SPF_ABS || SPF == SPF_NABS)
548 return IC.replaceOperand(II, 0, X);
549
551 return IC.replaceOperand(II, 0, X);
552
553 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
554 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
555 match(Op1, m_One())) {
556 Value *ConstCttz =
557 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
558 return BinaryOperator::CreateAdd(ConstCttz, X);
559 }
560
561 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
562 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
563 match(Op1, m_One())) {
564 Value *ConstCttz =
565 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
566 return BinaryOperator::CreateSub(ConstCttz, X);
567 }
568
569 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
570 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
571 Value *Width =
572 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
573 return BinaryOperator::CreateSub(Width, X);
574 }
575 } else {
576 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
577 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
578 match(Op1, m_One())) {
579 Value *ConstCtlz =
580 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
581 return BinaryOperator::CreateAdd(ConstCtlz, X);
582 }
583
584 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
585 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
586 match(Op1, m_One())) {
587 Value *ConstCtlz =
588 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
589 return BinaryOperator::CreateSub(ConstCtlz, X);
590 }
591
592 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
593 if (Op0->hasOneUse() &&
594 match(Op0,
596 Type *Ty = II.getType();
597 unsigned BitWidth = Ty->getScalarSizeInBits();
598 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
599 {X, IC.Builder.getFalse()});
600 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
601 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
602 }
603 }
604
605 // cttz(Pow2) -> Log2(Pow2)
606 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
607 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
608 if (IsTZ)
609 return IC.replaceInstUsesWith(II, R);
610 BinaryOperator *BO = BinaryOperator::CreateSub(
611 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
612 R);
613 BO->setHasNoSignedWrap();
615 return BO;
616 }
617
618 KnownBits Known = IC.computeKnownBits(Op0, &II);
619
620 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
621 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
622 : Known.countMaxLeadingZeros();
623 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
624 : Known.countMinLeadingZeros();
625
626 // If all bits above (ctlz) or below (cttz) the first known one are known
627 // zero, this value is constant.
628 // FIXME: This should be in InstSimplify because we're replacing an
629 // instruction with a constant.
630 if (PossibleZeros == DefiniteZeros) {
631 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
632 return IC.replaceInstUsesWith(II, C);
633 }
634
635 // If the input to cttz/ctlz is known to be non-zero,
636 // then change the 'ZeroIsPoison' parameter to 'true'
637 // because we know the zero behavior can't affect the result.
638 if (!Known.One.isZero() ||
640 if (!match(II.getArgOperand(1), m_One()))
641 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
642 }
643
644 // Add range attribute since known bits can't completely reflect what we know.
645 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
646 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
647 !II.getMetadata(LLVMContext::MD_range)) {
648 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
649 APInt(BitWidth, PossibleZeros + 1));
650 II.addRangeRetAttr(Range);
651 return &II;
652 }
653
654 return nullptr;
655}
656
658 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
659 "Expected ctpop intrinsic");
660 Type *Ty = II.getType();
661 unsigned BitWidth = Ty->getScalarSizeInBits();
662 Value *Op0 = II.getArgOperand(0);
663 Value *X, *Y;
664
665 // ctpop(bitreverse(x)) -> ctpop(x)
666 // ctpop(bswap(x)) -> ctpop(x)
667 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
668 return IC.replaceOperand(II, 0, X);
669
670 // ctpop(rot(x)) -> ctpop(x)
671 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
672 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
673 X == Y)
674 return IC.replaceOperand(II, 0, X);
675
676 // ctpop(x | -x) -> bitwidth - cttz(x, false)
677 if (Op0->hasOneUse() &&
678 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
679 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
680 {X, IC.Builder.getFalse()});
681 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
682 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
683 }
684
685 // ctpop(~x & (x - 1)) -> cttz(x, false)
686 if (match(Op0,
688 Function *F =
689 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
690 return CallInst::Create(F, {X, IC.Builder.getFalse()});
691 }
692
693 // Zext doesn't change the number of set bits, so narrow:
694 // ctpop (zext X) --> zext (ctpop X)
695 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
696 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
697 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
698 }
699
700 KnownBits Known(BitWidth);
701 IC.computeKnownBits(Op0, Known, &II);
702
703 // If all bits are zero except for exactly one fixed bit, then the result
704 // must be 0 or 1, and we can get that answer by shifting to LSB:
705 // ctpop (X & 32) --> (X & 32) >> 5
706 // TODO: Investigate removing this as its likely unnecessary given the below
707 // `isKnownToBeAPowerOfTwo` check.
708 if ((~Known.Zero).isPowerOf2())
709 return BinaryOperator::CreateLShr(
710 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
711
712 // More generally we can also handle non-constant power of 2 patterns such as
713 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
714 // ctpop(Pow2OrZero) --> icmp ne X, 0
715 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
716 return CastInst::Create(Instruction::ZExt,
719 Ty);
720
721 // Add range attribute since known bits can't completely reflect what we know.
722 if (BitWidth != 1) {
723 ConstantRange OldRange =
724 II.getRange().value_or(ConstantRange::getFull(BitWidth));
725
726 unsigned Lower = Known.countMinPopulation();
727 unsigned Upper = Known.countMaxPopulation() + 1;
728
729 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
731 Lower = 1;
732
734 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
735
736 if (Range != OldRange) {
737 II.addRangeRetAttr(Range);
738 return &II;
739 }
740 }
741
742 return nullptr;
743}
744
745/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
746/// at most two source operands are actually referenced.
748 bool IsExtension) {
749 // Bail out if the mask is not a constant.
750 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
751 if (!C)
752 return nullptr;
753
754 auto *RetTy = cast<FixedVectorType>(II.getType());
755 unsigned NumIndexes = RetTy->getNumElements();
756
757 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
758 if (!RetTy->getElementType()->isIntegerTy(8) ||
759 (NumIndexes != 8 && NumIndexes != 16))
760 return nullptr;
761
762 // For tbx instructions, the first argument is the "fallback" vector, which
763 // has the same length as the mask and return type.
764 unsigned int StartIndex = (unsigned)IsExtension;
765 auto *SourceTy =
766 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
767 // Note that the element count of each source vector does *not* need to be the
768 // same as the element count of the return type and mask! All source vectors
769 // must have the same element count as each other, though.
770 unsigned NumElementsPerSource = SourceTy->getNumElements();
771
772 // There are no tbl/tbx intrinsics for which the destination size exceeds the
773 // source size. However, our definitions of the intrinsics, at least in
774 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
775 // *could* technically happen.
776 if (NumIndexes > NumElementsPerSource)
777 return nullptr;
778
779 // The tbl/tbx intrinsics take several source operands followed by a mask
780 // operand.
781 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
782
783 // Map input operands to shuffle indices. This also helpfully deduplicates the
784 // input arguments, in case the same value is passed as an argument multiple
785 // times.
786 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
787 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
788 PoisonValue::get(SourceTy)};
789
790 int Indexes[16];
791 for (unsigned I = 0; I < NumIndexes; ++I) {
792 Constant *COp = C->getAggregateElement(I);
793
794 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
795 return nullptr;
796
797 if (isa<UndefValue>(COp)) {
798 Indexes[I] = -1;
799 continue;
800 }
801
802 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
803 // The index of the input argument that this index references (0 = first
804 // source argument, etc).
805 unsigned SourceOperandIndex = Index / NumElementsPerSource;
806 // The index of the element at that source operand.
807 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
808
809 Value *SourceOperand;
810 if (SourceOperandIndex >= NumSourceOperands) {
811 // This index is out of bounds. Map it to index into either the fallback
812 // vector (tbx) or vector of zeroes (tbl).
813 SourceOperandIndex = NumSourceOperands;
814 if (IsExtension) {
815 // For out-of-bounds indices in tbx, choose the `I`th element of the
816 // fallback.
817 SourceOperand = II.getArgOperand(0);
818 SourceOperandElementIndex = I;
819 } else {
820 // Otherwise, choose some element from the dummy vector of zeroes (we'll
821 // always choose the first).
822 SourceOperand = Constant::getNullValue(SourceTy);
823 SourceOperandElementIndex = 0;
824 }
825 } else {
826 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
827 }
828
829 // The source operand may be the fallback vector, which may not have the
830 // same number of elements as the source vector. In that case, we *could*
831 // choose to extend its length with another shufflevector, but it's simpler
832 // to just bail instead.
833 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
834 NumElementsPerSource)
835 return nullptr;
836
837 // We now know the source operand referenced by this index. Make it a
838 // shufflevector operand, if it isn't already.
839 unsigned NumSlots = ValueToShuffleSlot.size();
840 // This shuffle references more than two sources, and hence cannot be
841 // represented as a shufflevector.
842 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
843 return nullptr;
844
845 auto [It, Inserted] =
846 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
847 if (Inserted)
848 ShuffleOperands[It->getSecond()] = SourceOperand;
849
850 unsigned RemappedIndex =
851 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
852 Indexes[I] = RemappedIndex;
853 }
854
856 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
857 return IC.replaceInstUsesWith(II, Shuf);
858}
859
860// Returns true iff the 2 intrinsics have the same operands, limiting the
861// comparison to the first NumOperands.
862static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
863 unsigned NumOperands) {
864 assert(I.arg_size() >= NumOperands && "Not enough operands");
865 assert(E.arg_size() >= NumOperands && "Not enough operands");
866 for (unsigned i = 0; i < NumOperands; i++)
867 if (I.getArgOperand(i) != E.getArgOperand(i))
868 return false;
869 return true;
870}
871
872// Remove trivially empty start/end intrinsic ranges, i.e. a start
873// immediately followed by an end (ignoring debuginfo or other
874// start/end intrinsics in between). As this handles only the most trivial
875// cases, tracking the nesting level is not needed:
876//
877// call @llvm.foo.start(i1 0)
878// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
879// call @llvm.foo.end(i1 0)
880// call @llvm.foo.end(i1 0) ; &I
881static bool
883 std::function<bool(const IntrinsicInst &)> IsStart) {
884 // We start from the end intrinsic and scan backwards, so that InstCombine
885 // has already processed (and potentially removed) all the instructions
886 // before the end intrinsic.
887 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
888 for (; BI != BE; ++BI) {
889 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
890 if (I->isDebugOrPseudoInst() ||
891 I->getIntrinsicID() == EndI.getIntrinsicID())
892 continue;
893 if (IsStart(*I)) {
894 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
896 IC.eraseInstFromFunction(EndI);
897 return true;
898 }
899 // Skip start intrinsics that don't pair with this end intrinsic.
900 continue;
901 }
902 }
903 break;
904 }
905
906 return false;
907}
908
910 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
911 // Bail out on the case where the source va_list of a va_copy is destroyed
912 // immediately by a follow-up va_end.
913 return II.getIntrinsicID() == Intrinsic::vastart ||
914 (II.getIntrinsicID() == Intrinsic::vacopy &&
915 I.getArgOperand(0) != II.getArgOperand(1));
916 });
917 return nullptr;
918}
919
921 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
922 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
923 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
924 Call.setArgOperand(0, Arg1);
925 Call.setArgOperand(1, Arg0);
926 return &Call;
927 }
928 return nullptr;
929}
930
931/// Creates a result tuple for an overflow intrinsic \p II with a given
932/// \p Result and a constant \p Overflow value.
934 Constant *Overflow) {
935 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
936 StructType *ST = cast<StructType>(II->getType());
937 Constant *Struct = ConstantStruct::get(ST, V);
938 return InsertValueInst::Create(Struct, Result, 0);
939}
940
942InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
943 WithOverflowInst *WO = cast<WithOverflowInst>(II);
944 Value *OperationResult = nullptr;
945 Constant *OverflowResult = nullptr;
946 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
947 WO->getRHS(), *WO, OperationResult, OverflowResult))
948 return createOverflowTuple(WO, OperationResult, OverflowResult);
949
950 // See whether we can optimize the overflow check with assumption information.
951 for (User *U : WO->users()) {
952 if (!match(U, m_ExtractValue<1>(m_Value())))
953 continue;
954
955 for (auto &AssumeVH : AC.assumptionsFor(U)) {
956 if (!AssumeVH)
957 continue;
958 CallInst *I = cast<CallInst>(AssumeVH);
959 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
960 continue;
961 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
962 /*AllowEphemerals=*/true))
963 continue;
964 Value *Result =
965 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
966 Result->takeName(WO);
967 if (auto *Inst = dyn_cast<Instruction>(Result)) {
968 if (WO->isSigned())
969 Inst->setHasNoSignedWrap();
970 else
971 Inst->setHasNoUnsignedWrap();
972 }
973 return createOverflowTuple(WO, Result,
974 ConstantInt::getFalse(U->getType()));
975 }
976 }
977
978 return nullptr;
979}
980
981static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
982 Ty = Ty->getScalarType();
983 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
984}
985
986static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
987 Ty = Ty->getScalarType();
988 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
989}
990
991/// \returns the compare predicate type if the test performed by
992/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
993/// floating-point environment assumed for \p F for type \p Ty
995 const Function &F, Type *Ty) {
996 switch (static_cast<unsigned>(Mask)) {
997 case fcZero:
998 if (inputDenormalIsIEEE(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcZero | fcSubnormal:
1002 if (inputDenormalIsDAZ(F, Ty))
1003 return FCmpInst::FCMP_OEQ;
1004 break;
1005 case fcPositive | fcNegZero:
1006 if (inputDenormalIsIEEE(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1010 if (inputDenormalIsDAZ(F, Ty))
1011 return FCmpInst::FCMP_OGE;
1012 break;
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OGT;
1016 break;
1017 case fcNegative | fcPosZero:
1018 if (inputDenormalIsIEEE(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1022 if (inputDenormalIsDAZ(F, Ty))
1023 return FCmpInst::FCMP_OLE;
1024 break;
1026 if (inputDenormalIsIEEE(F, Ty))
1027 return FCmpInst::FCMP_OLT;
1028 break;
1029 case fcPosNormal | fcPosInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OGT;
1032 break;
1033 case fcNegNormal | fcNegInf:
1034 if (inputDenormalIsDAZ(F, Ty))
1035 return FCmpInst::FCMP_OLT;
1036 break;
1037 case ~fcZero & ~fcNan:
1038 if (inputDenormalIsIEEE(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 case ~(fcZero | fcSubnormal) & ~fcNan:
1042 if (inputDenormalIsDAZ(F, Ty))
1043 return FCmpInst::FCMP_ONE;
1044 break;
1045 default:
1046 break;
1047 }
1048
1050}
1051
1052Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1053 Value *Src0 = II.getArgOperand(0);
1054 Value *Src1 = II.getArgOperand(1);
1055 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1056 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1057 const bool IsUnordered = (Mask & fcNan) == fcNan;
1058 const bool IsOrdered = (Mask & fcNan) == fcNone;
1059 const FPClassTest OrderedMask = Mask & ~fcNan;
1060 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1061
1062 const bool IsStrict =
1063 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1064
1065 Value *FNegSrc;
1066 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1067 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1068
1069 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1070 return replaceOperand(II, 0, FNegSrc);
1071 }
1072
1073 Value *FAbsSrc;
1074 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1075 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1076 return replaceOperand(II, 0, FAbsSrc);
1077 }
1078
1079 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1080 (IsOrdered || IsUnordered) && !IsStrict) {
1081 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1082 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1083 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1084 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1086 FCmpInst::Predicate Pred =
1087 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1088 if (OrderedInvertedMask == fcInf)
1089 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1090
1091 Value *Fabs = Builder.CreateFAbs(Src0);
1092 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1093 CmpInf->takeName(&II);
1094 return replaceInstUsesWith(II, CmpInf);
1095 }
1096
1097 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1098 (IsOrdered || IsUnordered) && !IsStrict) {
1099 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1100 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1101 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1102 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1103 Constant *Inf =
1104 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1105 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1106 : Builder.CreateFCmpOEQ(Src0, Inf);
1107
1108 EqInf->takeName(&II);
1109 return replaceInstUsesWith(II, EqInf);
1110 }
1111
1112 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1113 (IsOrdered || IsUnordered) && !IsStrict) {
1114 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1115 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1116 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1117 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1119 OrderedInvertedMask == fcNegInf);
1120 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1121 : Builder.CreateFCmpONE(Src0, Inf);
1122 NeInf->takeName(&II);
1123 return replaceInstUsesWith(II, NeInf);
1124 }
1125
1126 if (Mask == fcNan && !IsStrict) {
1127 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1128 // exceptions.
1129 Value *IsNan =
1130 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1131 IsNan->takeName(&II);
1132 return replaceInstUsesWith(II, IsNan);
1133 }
1134
1135 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1136 // Equivalent of !isnan. Replace with standard fcmp.
1137 Value *FCmp =
1138 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1139 FCmp->takeName(&II);
1140 return replaceInstUsesWith(II, FCmp);
1141 }
1142
1144
1145 // Try to replace with an fcmp with 0
1146 //
1147 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1148 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1149 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1150 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1151 //
1152 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1153 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1154 //
1155 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1156 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1157 //
1158 if (!IsStrict && (IsOrdered || IsUnordered) &&
1159 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1160 Src0->getType())) !=
1163 // Equivalent of == 0.
1164 Value *FCmp = Builder.CreateFCmp(
1165 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1166 Src0, Zero);
1167
1168 FCmp->takeName(&II);
1169 return replaceInstUsesWith(II, FCmp);
1170 }
1171
1172 KnownFPClass Known =
1173 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1174
1175 // Clear test bits we know must be false from the source value.
1176 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1177 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1178 if ((Mask & Known.KnownFPClasses) != Mask) {
1179 II.setArgOperand(
1180 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1181 return &II;
1182 }
1183
1184 // If none of the tests which can return false are possible, fold to true.
1185 // fp_class (nnan x), ~(qnan|snan) -> true
1186 // fp_class (ninf x), ~(ninf|pinf) -> true
1187 if (Mask == Known.KnownFPClasses)
1188 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1189
1190 return nullptr;
1191}
1192
1193static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1194 KnownBits Known = computeKnownBits(Op, SQ);
1195 if (Known.isNonNegative())
1196 return false;
1197 if (Known.isNegative())
1198 return true;
1199
1200 Value *X, *Y;
1201 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1203
1204 return std::nullopt;
1205}
1206
1207static std::optional<bool> getKnownSignOrZero(Value *Op,
1208 const SimplifyQuery &SQ) {
1209 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1210 return Sign;
1211
1212 Value *X, *Y;
1213 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1215
1216 return std::nullopt;
1217}
1218
1219/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1220static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1221 const SimplifyQuery &SQ) {
1222 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1223 if (!Known1)
1224 return false;
1225 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1226 if (!Known0)
1227 return false;
1228 return *Known0 == *Known1;
1229}
1230
1231// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1232//
1233// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1234// produce 0 or inf.
1235static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1236 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1237 if (!APFloat::semanticsHasInf(FltSem))
1238 return false;
1239
1240 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1241 // reasonable fp type (for example, `double` only has 11 exponent bits).
1242 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1243 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1244 int SignedMin = static_cast<int>(minIntN(ExpBits));
1245 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1247 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1249 return ScaledUp.isInfinity() && ScaledDown.isZero();
1250}
1251
1252/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1253/// can trigger other combines.
1255 InstCombiner::BuilderTy &Builder) {
1256 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1257 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1258 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1259 "Expected a min or max intrinsic");
1260
1261 // TODO: Match vectors with undef elements, but undef may not propagate.
1262 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1263 Value *X;
1264 const APInt *C0, *C1;
1265 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1266 !match(Op1, m_APInt(C1)))
1267 return nullptr;
1268
1269 // Check for necessary no-wrap and overflow constraints.
1270 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1271 auto *Add = cast<BinaryOperator>(Op0);
1272 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1273 (!IsSigned && !Add->hasNoUnsignedWrap()))
1274 return nullptr;
1275
1276 // If the constant difference overflows, then instsimplify should reduce the
1277 // min/max to the add or C1.
1278 bool Overflow;
1279 APInt CDiff =
1280 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1281 assert(!Overflow && "Expected simplify of min/max");
1282
1283 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1284 // Note: the "mismatched" no-overflow setting does not propagate.
1285 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1286 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1287 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1288 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1289}
1290/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1291Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1292 Type *Ty = MinMax1.getType();
1293
1294 // We are looking for a tree of:
1295 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1296 // Where the min and max could be reversed
1297 Instruction *MinMax2;
1298 BinaryOperator *AddSub;
1299 const APInt *MinValue, *MaxValue;
1300 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1301 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1302 return nullptr;
1303 } else if (match(&MinMax1,
1304 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1305 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1306 return nullptr;
1307 } else
1308 return nullptr;
1309
1310 // Check that the constants clamp a saturate, and that the new type would be
1311 // sensible to convert to.
1312 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1313 return nullptr;
1314 // In what bitwidth can this be treated as saturating arithmetics?
1315 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1316 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1317 // good first approximation for what should be done there.
1318 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1319 return nullptr;
1320
1321 // Also make sure that the inner min/max and the add/sub have one use.
1322 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1323 return nullptr;
1324
1325 // Create the new type (which can be a vector type)
1326 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1327
1328 Intrinsic::ID IntrinsicID;
1329 if (AddSub->getOpcode() == Instruction::Add)
1330 IntrinsicID = Intrinsic::sadd_sat;
1331 else if (AddSub->getOpcode() == Instruction::Sub)
1332 IntrinsicID = Intrinsic::ssub_sat;
1333 else
1334 return nullptr;
1335
1336 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1337 // is usually achieved via a sext from a smaller type.
1338 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1339 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1340 return nullptr;
1341
1342 // Finally create and return the sat intrinsic, truncated to the new type
1343 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1344 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1345 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1346 return CastInst::Create(Instruction::SExt, Sat, Ty);
1347}
1348
1349
1350/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1351/// can only be one of two possible constant values -- turn that into a select
1352/// of constants.
1354 InstCombiner::BuilderTy &Builder) {
1355 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1356 Value *X;
1357 const APInt *C0, *C1;
1358 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1359 return nullptr;
1360
1362 switch (II->getIntrinsicID()) {
1363 case Intrinsic::smax:
1364 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1365 Pred = ICmpInst::ICMP_SGT;
1366 break;
1367 case Intrinsic::smin:
1368 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1369 Pred = ICmpInst::ICMP_SLT;
1370 break;
1371 case Intrinsic::umax:
1372 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1373 Pred = ICmpInst::ICMP_UGT;
1374 break;
1375 case Intrinsic::umin:
1376 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1377 Pred = ICmpInst::ICMP_ULT;
1378 break;
1379 default:
1380 llvm_unreachable("Expected min/max intrinsic");
1381 }
1382 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1383 return nullptr;
1384
1385 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1386 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1387 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1388 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1389}
1390
1391/// If this min/max has a constant operand and an operand that is a matching
1392/// min/max with a constant operand, constant-fold the 2 constant operands.
1394 IRBuilderBase &Builder,
1395 const SimplifyQuery &SQ) {
1396 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1397 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1398 if (!LHS)
1399 return nullptr;
1400
1401 Constant *C0, *C1;
1402 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1403 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1404 return nullptr;
1405
1406 // max (max X, C0), C1 --> max X, (max C0, C1)
1407 // min (min X, C0), C1 --> min X, (min C0, C1)
1408 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1409 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1410 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1411 if (InnerMinMaxID != MinMaxID &&
1412 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1413 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1414 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1415 return nullptr;
1416
1418 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1419 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1420 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1421 {LHS->getArgOperand(0), NewC});
1422}
1423
1424/// If this min/max has a matching min/max operand with a constant, try to push
1425/// the constant operand into this instruction. This can enable more folds.
1426static Instruction *
1428 InstCombiner::BuilderTy &Builder) {
1429 // Match and capture a min/max operand candidate.
1430 Value *X, *Y;
1431 Constant *C;
1432 Instruction *Inner;
1434 m_Instruction(Inner),
1436 m_Value(Y))))
1437 return nullptr;
1438
1439 // The inner op must match. Check for constants to avoid infinite loops.
1440 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1441 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1442 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1444 return nullptr;
1445
1446 // max (max X, C), Y --> max (max X, Y), C
1448 MinMaxID, II->getType());
1449 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1450 NewInner->takeName(Inner);
1451 return CallInst::Create(MinMax, {NewInner, C});
1452}
1453
1454/// Reduce a sequence of min/max intrinsics with a common operand.
1456 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1457 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1458 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1459 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1460 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1461 RHS->getIntrinsicID() != MinMaxID ||
1462 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1463 return nullptr;
1464
1465 Value *A = LHS->getArgOperand(0);
1466 Value *B = LHS->getArgOperand(1);
1467 Value *C = RHS->getArgOperand(0);
1468 Value *D = RHS->getArgOperand(1);
1469
1470 // Look for a common operand.
1471 Value *MinMaxOp = nullptr;
1472 Value *ThirdOp = nullptr;
1473 if (LHS->hasOneUse()) {
1474 // If the LHS is only used in this chain and the RHS is used outside of it,
1475 // reuse the RHS min/max because that will eliminate the LHS.
1476 if (D == A || C == A) {
1477 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1478 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1479 MinMaxOp = RHS;
1480 ThirdOp = B;
1481 } else if (D == B || C == B) {
1482 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1483 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1484 MinMaxOp = RHS;
1485 ThirdOp = A;
1486 }
1487 } else {
1488 assert(RHS->hasOneUse() && "Expected one-use operand");
1489 // Reuse the LHS. This will eliminate the RHS.
1490 if (D == A || D == B) {
1491 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1492 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1493 MinMaxOp = LHS;
1494 ThirdOp = C;
1495 } else if (C == A || C == B) {
1496 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1497 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1498 MinMaxOp = LHS;
1499 ThirdOp = D;
1500 }
1501 }
1502
1503 if (!MinMaxOp || !ThirdOp)
1504 return nullptr;
1505
1506 Module *Mod = II->getModule();
1507 Function *MinMax =
1508 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1509 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1510}
1511
1512/// If all arguments of the intrinsic are unary shuffles with the same mask,
1513/// try to shuffle after the intrinsic.
1516 if (!II->getType()->isVectorTy() ||
1517 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1518 !II->getCalledFunction()->isSpeculatable())
1519 return nullptr;
1520
1521 Value *X;
1522 Constant *C;
1523 ArrayRef<int> Mask;
1524 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1525 return isa<Constant>(Arg.get()) ||
1526 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1527 Arg.getOperandNo(), nullptr);
1528 });
1529 if (!NonConstArg ||
1530 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1531 return nullptr;
1532
1533 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1534 // instructions.
1535 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1536 return nullptr;
1537
1538 // See if all arguments are shuffled with the same mask.
1540 Type *SrcTy = X->getType();
1541 for (Use &Arg : II->args()) {
1542 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1543 Arg.getOperandNo(), nullptr))
1544 NewArgs.push_back(Arg);
1545 else if (match(&Arg,
1546 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1547 X->getType() == SrcTy)
1548 NewArgs.push_back(X);
1549 else if (match(&Arg, m_ImmConstant(C))) {
1550 // If it's a constant, try find the constant that would be shuffled to C.
1551 if (Constant *ShuffledC =
1552 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1553 NewArgs.push_back(ShuffledC);
1554 else
1555 return nullptr;
1556 } else
1557 return nullptr;
1558 }
1559
1560 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1561 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1562 // Result type might be a different vector width.
1563 // TODO: Check that the result type isn't widened?
1564 VectorType *ResTy =
1565 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1566 Value *NewIntrinsic =
1567 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1568 return new ShuffleVectorInst(NewIntrinsic, Mask);
1569}
1570
1571/// If all arguments of the intrinsic are reverses, try to pull the reverse
1572/// after the intrinsic.
1574 if (!II->getType()->isVectorTy() ||
1575 !isTriviallyVectorizable(II->getIntrinsicID()))
1576 return nullptr;
1577
1578 // At least 1 operand must be a reverse with 1 use because we are creating 2
1579 // instructions.
1580 if (none_of(II->args(), [](Value *V) {
1581 return match(V, m_OneUse(m_VecReverse(m_Value())));
1582 }))
1583 return nullptr;
1584
1585 Value *X;
1586 Constant *C;
1587 SmallVector<Value *> NewArgs;
1588 for (Use &Arg : II->args()) {
1589 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1590 Arg.getOperandNo(), nullptr))
1591 NewArgs.push_back(Arg);
1592 else if (match(&Arg, m_VecReverse(m_Value(X))))
1593 NewArgs.push_back(X);
1594 else if (isSplatValue(Arg))
1595 NewArgs.push_back(Arg);
1596 else if (match(&Arg, m_ImmConstant(C)))
1597 NewArgs.push_back(Builder.CreateVectorReverse(C));
1598 else
1599 return nullptr;
1600 }
1601
1602 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1603 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1604 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1605 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1606 return Builder.CreateVectorReverse(NewIntrinsic);
1607}
1608
1609/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1610/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1611/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1612template <Intrinsic::ID IntrID>
1614 InstCombiner::BuilderTy &Builder) {
1615 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1616 "This helper only supports BSWAP and BITREVERSE intrinsics");
1617
1618 Value *X, *Y;
1619 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1620 // don't match ConstantExpr that aren't meaningful for this transform.
1623 Value *OldReorderX, *OldReorderY;
1625
1626 // If both X and Y are bswap/bitreverse, the transform reduces the number
1627 // of instructions even if there's multiuse.
1628 // If only one operand is bswap/bitreverse, we need to ensure the operand
1629 // have only one use.
1630 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1631 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1632 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1633 }
1634
1635 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1636 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1637 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1638 }
1639
1640 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1641 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1642 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1643 }
1644 }
1645 return nullptr;
1646}
1647
1648/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1649/// `f(f(x, y), y) == f(x, y)` holds.
1651 switch (IID) {
1652 case Intrinsic::smax:
1653 case Intrinsic::smin:
1654 case Intrinsic::umax:
1655 case Intrinsic::umin:
1656 case Intrinsic::maximum:
1657 case Intrinsic::minimum:
1658 case Intrinsic::maximumnum:
1659 case Intrinsic::minimumnum:
1660 case Intrinsic::maxnum:
1661 case Intrinsic::minnum:
1662 return true;
1663 default:
1664 return false;
1665 }
1666}
1667
1668/// Attempt to simplify value-accumulating recurrences of kind:
1669/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1670/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1671/// And let the idempotent binary intrinsic be hoisted, when the operands are
1672/// known to be loop-invariant.
1674 IntrinsicInst *II) {
1675 PHINode *PN;
1676 Value *Init, *OtherOp;
1677
1678 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1679 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1680 auto IID = II->getIntrinsicID();
1681 if (!isIdempotentBinaryIntrinsic(IID) ||
1683 !IC.getDominatorTree().dominates(OtherOp, PN))
1684 return nullptr;
1685
1686 auto *InvariantBinaryInst =
1687 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1688 if (isa<FPMathOperator>(InvariantBinaryInst))
1689 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1690 return InvariantBinaryInst;
1691}
1692
1693static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1694 if (!CanReorderLanes)
1695 return nullptr;
1696
1697 Value *V;
1698 if (match(Arg, m_VecReverse(m_Value(V))))
1699 return V;
1700
1701 ArrayRef<int> Mask;
1702 if (!isa<FixedVectorType>(Arg->getType()) ||
1703 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1704 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1705 return nullptr;
1706
1707 int Sz = Mask.size();
1708 SmallBitVector UsedIndices(Sz);
1709 for (int Idx : Mask) {
1710 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1711 return nullptr;
1712 UsedIndices.set(Idx);
1713 }
1714
1715 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1716 // other changes.
1717 return UsedIndices.all() ? V : nullptr;
1718}
1719
1720/// Fold an unsigned minimum of trailing or leading zero bits counts:
1721/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1722/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1723/// >> ConstOp))
1724/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1725/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1726template <Intrinsic::ID IntrID>
1727static Value *
1729 const DataLayout &DL,
1730 InstCombiner::BuilderTy &Builder) {
1731 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1732 "This helper only supports cttz and ctlz intrinsics");
1733
1734 Value *CtOp1, *CtOp2;
1735 Value *ZeroUndef1, *ZeroUndef2;
1736 if (!match(I0, m_OneUse(
1737 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1738 return nullptr;
1739
1740 if (match(I1,
1741 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1742 return Builder.CreateBinaryIntrinsic(
1743 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1744 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1745
1746 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1747 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1748 if (!match(I1, m_CheckedInt(LessBitWidth)))
1749 // We have a constant >= BitWidth (which can be handled by CVP)
1750 // or a non-splat vector with elements < and >= BitWidth
1751 return nullptr;
1752
1753 Type *Ty = I1->getType();
1755 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1756 IntrID == Intrinsic::cttz
1757 ? ConstantInt::get(Ty, 1)
1758 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1759 cast<Constant>(I1), DL);
1760 return Builder.CreateBinaryIntrinsic(
1761 IntrID, Builder.CreateOr(CtOp1, NewConst),
1762 ConstantInt::getTrue(ZeroUndef1->getType()));
1763}
1764
1765/// Return whether "X LOp (Y ROp Z)" is always equal to
1766/// "(X LOp Y) ROp (X LOp Z)".
1768 bool HasNSW, Intrinsic::ID ROp) {
1769 switch (ROp) {
1770 case Intrinsic::umax:
1771 case Intrinsic::umin:
1772 if (HasNUW && LOp == Instruction::Add)
1773 return true;
1774 if (HasNUW && LOp == Instruction::Shl)
1775 return true;
1776 return false;
1777 case Intrinsic::smax:
1778 case Intrinsic::smin:
1779 return HasNSW && LOp == Instruction::Add;
1780 default:
1781 return false;
1782 }
1783}
1784
1785/// Return whether "(X ROp Y) LOp Z" is always equal to
1786/// "(X LOp Z) ROp (Y LOp Z)".
1788 bool HasNSW, Intrinsic::ID ROp) {
1789 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1790 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1791 switch (ROp) {
1792 case Intrinsic::umax:
1793 case Intrinsic::umin:
1794 return HasNUW && LOp == Instruction::Sub;
1795 case Intrinsic::smax:
1796 case Intrinsic::smin:
1797 return HasNSW && LOp == Instruction::Sub;
1798 default:
1799 return false;
1800 }
1801}
1802
1803// Attempts to factorise a common term
1804// in an instruction that has the form "(A op' B) op (C op' D)
1805// where op is an intrinsic and op' is a binop
1806static Value *
1808 InstCombiner::BuilderTy &Builder) {
1809 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1810 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1811
1814
1815 if (!Op0 || !Op1)
1816 return nullptr;
1817
1818 if (Op0->getOpcode() != Op1->getOpcode())
1819 return nullptr;
1820
1821 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1822 return nullptr;
1823
1824 Instruction::BinaryOps InnerOpcode =
1825 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1826 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1827 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1828
1829 Value *A = Op0->getOperand(0);
1830 Value *B = Op0->getOperand(1);
1831 Value *C = Op1->getOperand(0);
1832 Value *D = Op1->getOperand(1);
1833
1834 // Attempts to swap variables such that A equals C or B equals D,
1835 // if the inner operation is commutative.
1836 if (Op0->isCommutative() && A != C && B != D) {
1837 if (A == D || B == C)
1838 std::swap(C, D);
1839 else
1840 return nullptr;
1841 }
1842
1843 BinaryOperator *NewBinop;
1844 if (A == C &&
1845 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1846 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1847 NewBinop =
1848 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1849 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1850 TopLevelOpcode)) {
1851 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1852 NewBinop =
1853 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1854 } else {
1855 return nullptr;
1856 }
1857
1858 NewBinop->setHasNoUnsignedWrap(HasNUW);
1859 NewBinop->setHasNoSignedWrap(HasNSW);
1860
1861 return NewBinop;
1862}
1863
1865 Value *Arg0 = II->getArgOperand(0);
1866 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1867 if (!ShiftConst)
1868 return nullptr;
1869
1870 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1871 bool AllPositive = true;
1872 bool AllNegative = true;
1873
1874 auto Check = [&](Constant *C) -> bool {
1875 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1876 const APInt &V = CI->getValue();
1877 if (V.isNonNegative()) {
1878 AllNegative = false;
1879 return AllPositive && V.ult(ElemBits);
1880 }
1881 AllPositive = false;
1882 return AllNegative && V.sgt(-ElemBits);
1883 }
1884 return false;
1885 };
1886
1887 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1888 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1889 if (!Check(ShiftConst->getAggregateElement(I)))
1890 return nullptr;
1891 }
1892
1893 } else if (!Check(ShiftConst))
1894 return nullptr;
1895
1896 IRBuilderBase &B = IC.Builder;
1897 if (AllPositive)
1898 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1899
1900 Value *NegAmt = B.CreateNeg(ShiftConst);
1901 Intrinsic::ID IID = II->getIntrinsicID();
1902 const bool IsSigned =
1903 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1904 Value *Result =
1905 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1906 return IC.replaceInstUsesWith(*II, Result);
1907}
1908
1909/// CallInst simplification. This mostly only handles folding of intrinsic
1910/// instructions. For normal calls, it allows visitCallBase to do the heavy
1911/// lifting.
1913 // Don't try to simplify calls without uses. It will not do anything useful,
1914 // but will result in the following folds being skipped.
1915 if (!CI.use_empty()) {
1916 SmallVector<Value *, 8> Args(CI.args());
1917 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1918 SQ.getWithInstruction(&CI)))
1919 return replaceInstUsesWith(CI, V);
1920 }
1921
1922 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1923 return visitFree(CI, FreedOp);
1924
1925 // If the caller function (i.e. us, the function that contains this CallInst)
1926 // is nounwind, mark the call as nounwind, even if the callee isn't.
1927 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1928 CI.setDoesNotThrow();
1929 return &CI;
1930 }
1931
1933 if (!II)
1934 return visitCallBase(CI);
1935
1936 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1937 // instead of in visitCallBase.
1938 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1939 if (auto NumBytes = MI->getLengthInBytes()) {
1940 // memmove/cpy/set of zero bytes is a noop.
1941 if (NumBytes->isZero())
1942 return eraseInstFromFunction(CI);
1943
1944 // For atomic unordered mem intrinsics if len is not a positive or
1945 // not a multiple of element size then behavior is undefined.
1946 if (MI->isAtomic() &&
1947 (NumBytes->isNegative() ||
1948 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1950 assert(MI->getType()->isVoidTy() &&
1951 "non void atomic unordered mem intrinsic");
1952 return eraseInstFromFunction(*MI);
1953 }
1954 }
1955
1956 // No other transformations apply to volatile transfers.
1957 if (MI->isVolatile())
1958 return nullptr;
1959
1961 // memmove(x,x,size) -> noop.
1962 if (MTI->getSource() == MTI->getDest())
1963 return eraseInstFromFunction(CI);
1964 }
1965
1966 auto IsPointerUndefined = [MI](Value *Ptr) {
1967 return isa<ConstantPointerNull>(Ptr) &&
1969 MI->getFunction(),
1970 cast<PointerType>(Ptr->getType())->getAddressSpace());
1971 };
1972 bool SrcIsUndefined = false;
1973 // If we can determine a pointer alignment that is bigger than currently
1974 // set, update the alignment.
1975 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1977 return I;
1978 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1979 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1980 if (Instruction *I = SimplifyAnyMemSet(MSI))
1981 return I;
1982 }
1983
1984 // If src/dest is null, this memory intrinsic must be a noop.
1985 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1986 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1987 return eraseInstFromFunction(CI);
1988 }
1989
1990 // If we have a memmove and the source operation is a constant global,
1991 // then the source and dest pointers can't alias, so we can change this
1992 // into a call to memcpy.
1993 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1994 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1995 if (GVSrc->isConstant()) {
1996 Module *M = CI.getModule();
1997 Intrinsic::ID MemCpyID =
1998 MMI->isAtomic()
1999 ? Intrinsic::memcpy_element_unordered_atomic
2000 : Intrinsic::memcpy;
2001 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2002 CI.getArgOperand(1)->getType(),
2003 CI.getArgOperand(2)->getType() };
2005 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2006 return II;
2007 }
2008 }
2009 }
2010
2011 // For fixed width vector result intrinsics, use the generic demanded vector
2012 // support.
2013 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2014 auto VWidth = IIFVTy->getNumElements();
2015 APInt PoisonElts(VWidth, 0);
2016 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2017 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2018 if (V != II)
2019 return replaceInstUsesWith(*II, V);
2020 return II;
2021 }
2022 }
2023
2024 if (II->isCommutative()) {
2025 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2026 replaceOperand(*II, 0, Pair->first);
2027 replaceOperand(*II, 1, Pair->second);
2028 return II;
2029 }
2030
2031 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2032 return NewCall;
2033 }
2034
2035 // Unused constrained FP intrinsic calls may have declared side effect, which
2036 // prevents it from being removed. In some cases however the side effect is
2037 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2038 // returns a replacement, the call may be removed.
2039 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2040 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2041 return eraseInstFromFunction(CI);
2042 }
2043
2044 Intrinsic::ID IID = II->getIntrinsicID();
2045 switch (IID) {
2046 case Intrinsic::objectsize: {
2047 SmallVector<Instruction *> InsertedInstructions;
2048 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2049 &InsertedInstructions)) {
2050 for (Instruction *Inserted : InsertedInstructions)
2051 Worklist.add(Inserted);
2052 return replaceInstUsesWith(CI, V);
2053 }
2054 return nullptr;
2055 }
2056 case Intrinsic::abs: {
2057 Value *IIOperand = II->getArgOperand(0);
2058 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2059
2060 // abs(-x) -> abs(x)
2061 Value *X;
2062 if (match(IIOperand, m_Neg(m_Value(X)))) {
2063 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2064 replaceOperand(*II, 1, Builder.getTrue());
2065 return replaceOperand(*II, 0, X);
2066 }
2067 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2068 return replaceOperand(*II, 0, X);
2069
2070 Value *Y;
2071 // abs(a * abs(b)) -> abs(a * b)
2072 if (match(IIOperand,
2075 bool NSW =
2076 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2077 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2078 return replaceOperand(*II, 0, XY);
2079 }
2080
2081 if (std::optional<bool> Known =
2082 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2083 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2084 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2085 if (!*Known)
2086 return replaceInstUsesWith(*II, IIOperand);
2087
2088 // abs(x) -> -x if x < 0
2089 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2090 if (IntMinIsPoison)
2091 return BinaryOperator::CreateNSWNeg(IIOperand);
2092 return BinaryOperator::CreateNeg(IIOperand);
2093 }
2094
2095 // abs (sext X) --> zext (abs X*)
2096 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2097 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2098 Value *NarrowAbs =
2099 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2100 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2101 }
2102
2103 // Match a complicated way to check if a number is odd/even:
2104 // abs (srem X, 2) --> and X, 1
2105 const APInt *C;
2106 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2107 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2108
2109 break;
2110 }
2111 case Intrinsic::umin: {
2112 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2113 // umin(x, 1) == zext(x != 0)
2114 if (match(I1, m_One())) {
2115 assert(II->getType()->getScalarSizeInBits() != 1 &&
2116 "Expected simplify of umin with max constant");
2117 Value *Zero = Constant::getNullValue(I0->getType());
2118 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2119 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2120 }
2121 // umin(cttz(x), const) --> cttz(x | (1 << const))
2122 if (Value *FoldedCttz =
2124 I0, I1, DL, Builder))
2125 return replaceInstUsesWith(*II, FoldedCttz);
2126 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2127 if (Value *FoldedCtlz =
2129 I0, I1, DL, Builder))
2130 return replaceInstUsesWith(*II, FoldedCtlz);
2131 [[fallthrough]];
2132 }
2133 case Intrinsic::umax: {
2134 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2135 Value *X, *Y;
2136 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2137 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2138 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2139 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2140 }
2141 Constant *C;
2142 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2143 I0->hasOneUse()) {
2144 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2145 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2146 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2147 }
2148 }
2149 // If C is not 0:
2150 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2151 // If C is not 0 or 1:
2152 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2153 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2154 const APInt *C;
2155 Value *X;
2156 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2157 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2158 return nullptr;
2159 if (C->isZero())
2160 return nullptr;
2161 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2162 return nullptr;
2163
2164 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2165 Value *NewSelect = nullptr;
2166 NewSelect = Builder.CreateSelectWithUnknownProfile(
2167 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2168 return replaceInstUsesWith(*II, NewSelect);
2169 };
2170
2171 if (IID == Intrinsic::umax) {
2172 if (Instruction *I = foldMaxMulShift(I0, I1))
2173 return I;
2174 if (Instruction *I = foldMaxMulShift(I1, I0))
2175 return I;
2176 }
2177
2178 // If both operands of unsigned min/max are sign-extended, it is still ok
2179 // to narrow the operation.
2180 [[fallthrough]];
2181 }
2182 case Intrinsic::smax:
2183 case Intrinsic::smin: {
2184 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2185 Value *X, *Y;
2186 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2187 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2188 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2189 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2190 }
2191
2192 Constant *C;
2193 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2194 I0->hasOneUse()) {
2195 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2196 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2197 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2198 }
2199 }
2200
2201 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2202 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2203 const APInt *MinC, *MaxC;
2204 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2205 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2206 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2207 Value *NewMax = Builder.CreateBinaryIntrinsic(
2208 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2209 return replaceInstUsesWith(
2210 *II, Builder.CreateBinaryIntrinsic(
2211 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2212 };
2213 if (IID == Intrinsic::smax &&
2215 m_APInt(MinC)))) &&
2216 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2217 return CreateCanonicalClampForm(true);
2218 if (IID == Intrinsic::umax &&
2220 m_APInt(MinC)))) &&
2221 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2222 return CreateCanonicalClampForm(false);
2223
2224 // umin(i1 X, i1 Y) -> and i1 X, Y
2225 // smax(i1 X, i1 Y) -> and i1 X, Y
2226 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2227 II->getType()->isIntOrIntVectorTy(1)) {
2228 return BinaryOperator::CreateAnd(I0, I1);
2229 }
2230
2231 // umax(i1 X, i1 Y) -> or i1 X, Y
2232 // smin(i1 X, i1 Y) -> or i1 X, Y
2233 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2234 II->getType()->isIntOrIntVectorTy(1)) {
2235 return BinaryOperator::CreateOr(I0, I1);
2236 }
2237
2238 // smin(smax(X, -1), 1) -> scmp(X, 0)
2239 // smax(smin(X, 1), -1) -> scmp(X, 0)
2240 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2241 // And i1's have been changed to and/ors
2242 // So we only need to check for smin
2243 if (IID == Intrinsic::smin) {
2244 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2245 match(I1, m_One())) {
2246 Value *Zero = ConstantInt::get(X->getType(), 0);
2247 return replaceInstUsesWith(
2248 CI,
2249 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2250 }
2251 }
2252
2253 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2254 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2255 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2256 // TODO: Canonicalize neg after min/max if I1 is constant.
2257 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2258 (I0->hasOneUse() || I1->hasOneUse())) {
2260 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2261 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2262 }
2263 }
2264
2265 // (umax X, (xor X, Pow2))
2266 // -> (or X, Pow2)
2267 // (umin X, (xor X, Pow2))
2268 // -> (and X, ~Pow2)
2269 // (smax X, (xor X, Pos_Pow2))
2270 // -> (or X, Pos_Pow2)
2271 // (smin X, (xor X, Pos_Pow2))
2272 // -> (and X, ~Pos_Pow2)
2273 // (smax X, (xor X, Neg_Pow2))
2274 // -> (and X, ~Neg_Pow2)
2275 // (smin X, (xor X, Neg_Pow2))
2276 // -> (or X, Neg_Pow2)
2277 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2278 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2279 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2280 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2281 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2282
2283 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2284 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2285 if (KnownSign == std::nullopt) {
2286 UseOr = false;
2287 UseAndN = false;
2288 } else if (*KnownSign /* true is Signed. */) {
2289 UseOr ^= true;
2290 UseAndN ^= true;
2291 Type *Ty = I0->getType();
2292 // Negative power of 2 must be IntMin. It's possible to be able to
2293 // prove negative / power of 2 without actually having known bits, so
2294 // just get the value by hand.
2296 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2297 }
2298 }
2299 if (UseOr)
2300 return BinaryOperator::CreateOr(I0, X);
2301 else if (UseAndN)
2302 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2303 }
2304
2305 // If we can eliminate ~A and Y is free to invert:
2306 // max ~A, Y --> ~(min A, ~Y)
2307 //
2308 // Examples:
2309 // max ~A, ~Y --> ~(min A, Y)
2310 // max ~A, C --> ~(min A, ~C)
2311 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2312 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2313 Value *A;
2314 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2315 !isFreeToInvert(A, A->hasOneUse())) {
2316 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2318 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2319 return BinaryOperator::CreateNot(InvMaxMin);
2320 }
2321 }
2322 return nullptr;
2323 };
2324
2325 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2326 return I;
2327 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2328 return I;
2329
2331 return I;
2332
2333 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2334 const APInt *RHSC;
2335 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2336 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2337 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2338 ConstantInt::get(II->getType(), *RHSC));
2339
2340 // smax(X, -X) --> abs(X)
2341 // smin(X, -X) --> -abs(X)
2342 // umax(X, -X) --> -abs(X)
2343 // umin(X, -X) --> abs(X)
2344 if (isKnownNegation(I0, I1)) {
2345 // We can choose either operand as the input to abs(), but if we can
2346 // eliminate the only use of a value, that's better for subsequent
2347 // transforms/analysis.
2348 if (I0->hasOneUse() && !I1->hasOneUse())
2349 std::swap(I0, I1);
2350
2351 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2352 // operation and potentially its negation.
2353 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2354 Value *Abs = Builder.CreateBinaryIntrinsic(
2355 Intrinsic::abs, I0,
2356 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2357
2358 // We don't have a "nabs" intrinsic, so negate if needed based on the
2359 // max/min operation.
2360 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2361 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2362 return replaceInstUsesWith(CI, Abs);
2363 }
2364
2366 return Sel;
2367
2368 if (Instruction *SAdd = matchSAddSubSat(*II))
2369 return SAdd;
2370
2371 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2372 return replaceInstUsesWith(*II, NewMinMax);
2373
2375 return R;
2376
2377 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2378 return NewMinMax;
2379
2380 // Try to fold minmax with constant RHS based on range information
2381 if (match(I1, m_APIntAllowPoison(RHSC))) {
2382 ICmpInst::Predicate Pred =
2384 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2386 I0, IsSigned, SQ.getWithInstruction(II));
2387 if (!LHS_CR.isFullSet()) {
2388 if (LHS_CR.icmp(Pred, *RHSC))
2389 return replaceInstUsesWith(*II, I0);
2390 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2391 return replaceInstUsesWith(*II,
2392 ConstantInt::get(II->getType(), *RHSC));
2393 }
2394 }
2395
2397 return replaceInstUsesWith(*II, V);
2398
2399 break;
2400 }
2401 case Intrinsic::scmp: {
2402 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2403 Value *LHS, *RHS;
2404 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2405 return replaceInstUsesWith(
2406 CI,
2407 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2408 break;
2409 }
2410 case Intrinsic::bitreverse: {
2411 Value *IIOperand = II->getArgOperand(0);
2412 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2413 Value *X;
2414 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2415 X->getType()->isIntOrIntVectorTy(1)) {
2416 Type *Ty = II->getType();
2417 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2418 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2420 }
2421
2422 if (Instruction *crossLogicOpFold =
2424 return crossLogicOpFold;
2425
2426 break;
2427 }
2428 case Intrinsic::bswap: {
2429 Value *IIOperand = II->getArgOperand(0);
2430
2431 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2432 // inverse-shift-of-bswap:
2433 // bswap (shl X, Y) --> lshr (bswap X), Y
2434 // bswap (lshr X, Y) --> shl (bswap X), Y
2435 Value *X, *Y;
2436 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2437 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2439 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2440 BinaryOperator::BinaryOps InverseShift =
2441 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2442 ? Instruction::LShr
2443 : Instruction::Shl;
2444 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2445 }
2446 }
2447
2448 KnownBits Known = computeKnownBits(IIOperand, II);
2449 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2450 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2451 unsigned BW = Known.getBitWidth();
2452
2453 // bswap(x) -> shift(x) if x has exactly one "active byte"
2454 if (BW - LZ - TZ == 8) {
2455 assert(LZ != TZ && "active byte cannot be in the middle");
2456 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2457 return BinaryOperator::CreateNUWShl(
2458 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2459 // -> lshr(x) if the "active byte" is in the high part of x
2460 return BinaryOperator::CreateExactLShr(
2461 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2462 }
2463
2464 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2465 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2466 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2467 Value *CV = ConstantInt::get(X->getType(), C);
2468 Value *V = Builder.CreateLShr(X, CV);
2469 return new TruncInst(V, IIOperand->getType());
2470 }
2471
2472 if (Instruction *crossLogicOpFold =
2474 return crossLogicOpFold;
2475 }
2476
2477 // Try to fold into bitreverse if bswap is the root of the expression tree.
2478 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2479 /*MatchBitReversals*/ true))
2480 return BitOp;
2481 break;
2482 }
2483 case Intrinsic::masked_load:
2484 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2485 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2486 break;
2487 case Intrinsic::masked_store:
2488 return simplifyMaskedStore(*II);
2489 case Intrinsic::masked_gather:
2490 return simplifyMaskedGather(*II);
2491 case Intrinsic::masked_scatter:
2492 return simplifyMaskedScatter(*II);
2493 case Intrinsic::launder_invariant_group:
2494 case Intrinsic::strip_invariant_group:
2495 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2496 return replaceInstUsesWith(*II, SkippedBarrier);
2497 break;
2498 case Intrinsic::powi: {
2499 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2500 // 0 and 1 are handled in instsimplify
2501 // powi(x, -1) -> 1/x
2502 if (Power->isMinusOne())
2503 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2504 II->getArgOperand(0), II);
2505 // powi(x, 2) -> x*x
2506 if (Power->equalsInt(2))
2507 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2508 II->getArgOperand(0), II);
2509
2510 if (!Power->getValue()[0]) {
2511 Value *X;
2512 // If power is even:
2513 // powi(-x, p) -> powi(x, p)
2514 // powi(fabs(x), p) -> powi(x, p)
2515 // powi(copysign(x, y), p) -> powi(x, p)
2516 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2517 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2518 match(II->getArgOperand(0),
2520 return replaceOperand(*II, 0, X);
2521 }
2522 }
2523 if (ConstantFP *Base = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2524 Value *Exp = II->getArgOperand(1);
2525 Type *Ty = Base->getType();
2526 // powi(2.0, p) -> ldexp(1.0, p)
2527 if (II->hasApproxFunc() && Base->isExactlyValue(2.0)) {
2528 ConstantFP *One = ConstantFP::get(Ty, 1.0);
2529 if (auto *VTy = dyn_cast<VectorType>(Ty))
2530 Exp = Builder.CreateVectorSplat(VTy->getElementCount(), Exp);
2531 Value *Ldexp = Builder.CreateLdexp(One, Exp, II);
2532 return replaceInstUsesWith(*II, Ldexp);
2533 }
2534 }
2535 break;
2536 }
2537
2538 case Intrinsic::cttz:
2539 case Intrinsic::ctlz:
2540 if (auto *I = foldCttzCtlz(*II, *this))
2541 return I;
2542 break;
2543
2544 case Intrinsic::ctpop:
2545 if (auto *I = foldCtpop(*II, *this))
2546 return I;
2547 break;
2548
2549 case Intrinsic::fshl:
2550 case Intrinsic::fshr: {
2551 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2552 Type *Ty = II->getType();
2553 unsigned BitWidth = Ty->getScalarSizeInBits();
2554 Constant *ShAmtC;
2555 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2556 // Canonicalize a shift amount constant operand to modulo the bit-width.
2557 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2558 Constant *ModuloC =
2559 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2560 if (!ModuloC)
2561 return nullptr;
2562 if (ModuloC != ShAmtC)
2563 return replaceOperand(*II, 2, ModuloC);
2564
2566 ShAmtC, DL),
2567 m_One()) &&
2568 "Shift amount expected to be modulo bitwidth");
2569
2570 // Canonicalize funnel shift right by constant to funnel shift left. This
2571 // is not entirely arbitrary. For historical reasons, the backend may
2572 // recognize rotate left patterns but miss rotate right patterns.
2573 if (IID == Intrinsic::fshr) {
2574 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2575 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2576 return nullptr;
2577
2578 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2579 Module *Mod = II->getModule();
2580 Function *Fshl =
2581 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2582 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2583 }
2584 assert(IID == Intrinsic::fshl &&
2585 "All funnel shifts by simple constants should go left");
2586
2587 // fshl(X, 0, C) --> shl X, C
2588 // fshl(X, undef, C) --> shl X, C
2589 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2590 return BinaryOperator::CreateShl(Op0, ShAmtC);
2591
2592 // fshl(0, X, C) --> lshr X, (BW-C)
2593 // fshl(undef, X, C) --> lshr X, (BW-C)
2594 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2595 return BinaryOperator::CreateLShr(Op1,
2596 ConstantExpr::getSub(WidthC, ShAmtC));
2597
2598 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2599 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2600 Module *Mod = II->getModule();
2601 Function *Bswap =
2602 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2603 return CallInst::Create(Bswap, { Op0 });
2604 }
2605 if (Instruction *BitOp =
2606 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2607 /*MatchBitReversals*/ true))
2608 return BitOp;
2609
2610 // R = fshl(X, X, C2)
2611 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2612 Value *InnerOp;
2613 const APInt *ShAmtInnerC, *ShAmtOuterC;
2614 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2615 m_APInt(ShAmtInnerC))) &&
2616 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2617 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2618 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2619 if (Modulo.isZero())
2620 return replaceInstUsesWith(*II, InnerOp);
2621 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2623 {InnerOp, InnerOp, ModuloC});
2624 }
2625 }
2626
2627 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2628 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2629 // if BitWidth is a power-of-2
2630 Value *Y;
2631 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2632 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2633 Module *Mod = II->getModule();
2635 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2636 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2637 }
2638
2639 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2640 // power-of-2
2641 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2642 match(Op1, m_ZeroInt())) {
2643 Value *Op2 = II->getArgOperand(2);
2644 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2645 return BinaryOperator::CreateShl(Op0, And);
2646 }
2647
2648 // Left or right might be masked.
2650 return &CI;
2651
2652 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2653 // so only the low bits of the shift amount are demanded if the bitwidth is
2654 // a power-of-2.
2655 if (!isPowerOf2_32(BitWidth))
2656 break;
2658 KnownBits Op2Known(BitWidth);
2659 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2660 return &CI;
2661 break;
2662 }
2663 case Intrinsic::ptrmask: {
2664 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2665 KnownBits Known(BitWidth);
2667 return II;
2668
2669 Value *InnerPtr, *InnerMask;
2670 bool Changed = false;
2671 // Combine:
2672 // (ptrmask (ptrmask p, A), B)
2673 // -> (ptrmask p, (and A, B))
2674 if (match(II->getArgOperand(0),
2676 m_Value(InnerMask))))) {
2677 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2678 "Mask types must match");
2679 // TODO: If InnerMask == Op1, we could copy attributes from inner
2680 // callsite -> outer callsite.
2681 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2682 replaceOperand(CI, 0, InnerPtr);
2683 replaceOperand(CI, 1, NewMask);
2684 Changed = true;
2685 }
2686
2687 // See if we can deduce non-null.
2688 if (!CI.hasRetAttr(Attribute::NonNull) &&
2689 (Known.isNonZero() ||
2690 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2691 CI.addRetAttr(Attribute::NonNull);
2692 Changed = true;
2693 }
2694
2695 unsigned NewAlignmentLog =
2697 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2698 // Known bits will capture if we had alignment information associated with
2699 // the pointer argument.
2700 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2702 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2703 Changed = true;
2704 }
2705 if (Changed)
2706 return &CI;
2707 break;
2708 }
2709 case Intrinsic::uadd_with_overflow:
2710 case Intrinsic::sadd_with_overflow: {
2711 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2712 return I;
2713
2714 // Given 2 constant operands whose sum does not overflow:
2715 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2716 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2717 Value *X;
2718 const APInt *C0, *C1;
2719 Value *Arg0 = II->getArgOperand(0);
2720 Value *Arg1 = II->getArgOperand(1);
2721 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2722 bool HasNWAdd = IsSigned
2723 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2724 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2725 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2726 bool Overflow;
2727 APInt NewC =
2728 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2729 if (!Overflow)
2730 return replaceInstUsesWith(
2731 *II, Builder.CreateBinaryIntrinsic(
2732 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2733 }
2734 break;
2735 }
2736
2737 case Intrinsic::umul_with_overflow:
2738 case Intrinsic::smul_with_overflow:
2739 case Intrinsic::usub_with_overflow:
2740 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2741 return I;
2742 break;
2743
2744 case Intrinsic::ssub_with_overflow: {
2745 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2746 return I;
2747
2748 Constant *C;
2749 Value *Arg0 = II->getArgOperand(0);
2750 Value *Arg1 = II->getArgOperand(1);
2751 // Given a constant C that is not the minimum signed value
2752 // for an integer of a given bit width:
2753 //
2754 // ssubo X, C -> saddo X, -C
2755 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2756 Value *NegVal = ConstantExpr::getNeg(C);
2757 // Build a saddo call that is equivalent to the discovered
2758 // ssubo call.
2759 return replaceInstUsesWith(
2760 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2761 Arg0, NegVal));
2762 }
2763
2764 break;
2765 }
2766
2767 case Intrinsic::uadd_sat:
2768 case Intrinsic::sadd_sat:
2769 case Intrinsic::usub_sat:
2770 case Intrinsic::ssub_sat: {
2772 Type *Ty = SI->getType();
2773 Value *Arg0 = SI->getLHS();
2774 Value *Arg1 = SI->getRHS();
2775
2776 // Make use of known overflow information.
2777 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2778 Arg0, Arg1, SI);
2779 switch (OR) {
2781 break;
2783 if (SI->isSigned())
2784 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2785 else
2786 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2788 unsigned BitWidth = Ty->getScalarSizeInBits();
2789 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2790 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2791 }
2793 unsigned BitWidth = Ty->getScalarSizeInBits();
2794 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2795 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2796 }
2797 }
2798
2799 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2800 // which after that:
2801 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2802 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2803 Constant *C, *C1;
2804 Value *A;
2805 if (IID == Intrinsic::usub_sat &&
2806 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2807 match(Arg1, m_ImmConstant(C1))) {
2808 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2809 auto *NewSub =
2810 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2811 return replaceInstUsesWith(*SI, NewSub);
2812 }
2813
2814 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2815 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2816 C->isNotMinSignedValue()) {
2817 Value *NegVal = ConstantExpr::getNeg(C);
2818 return replaceInstUsesWith(
2819 *II, Builder.CreateBinaryIntrinsic(
2820 Intrinsic::sadd_sat, Arg0, NegVal));
2821 }
2822
2823 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2824 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2825 // if Val and Val2 have the same sign
2826 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2827 Value *X;
2828 const APInt *Val, *Val2;
2829 APInt NewVal;
2830 bool IsUnsigned =
2831 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2832 if (Other->getIntrinsicID() == IID &&
2833 match(Arg1, m_APInt(Val)) &&
2834 match(Other->getArgOperand(0), m_Value(X)) &&
2835 match(Other->getArgOperand(1), m_APInt(Val2))) {
2836 if (IsUnsigned)
2837 NewVal = Val->uadd_sat(*Val2);
2838 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2839 bool Overflow;
2840 NewVal = Val->sadd_ov(*Val2, Overflow);
2841 if (Overflow) {
2842 // Both adds together may add more than SignedMaxValue
2843 // without saturating the final result.
2844 break;
2845 }
2846 } else {
2847 // Cannot fold saturated addition with different signs.
2848 break;
2849 }
2850
2851 return replaceInstUsesWith(
2852 *II, Builder.CreateBinaryIntrinsic(
2853 IID, X, ConstantInt::get(II->getType(), NewVal)));
2854 }
2855 }
2856 break;
2857 }
2858
2859 case Intrinsic::minnum:
2860 case Intrinsic::maxnum:
2861 case Intrinsic::minimumnum:
2862 case Intrinsic::maximumnum:
2863 case Intrinsic::minimum:
2864 case Intrinsic::maximum: {
2865 Value *Arg0 = II->getArgOperand(0);
2866 Value *Arg1 = II->getArgOperand(1);
2867 Value *X, *Y;
2868 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2869 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2870 // If both operands are negated, invert the call and negate the result:
2871 // min(-X, -Y) --> -(max(X, Y))
2872 // max(-X, -Y) --> -(min(X, Y))
2873 Intrinsic::ID NewIID;
2874 switch (IID) {
2875 case Intrinsic::maxnum:
2876 NewIID = Intrinsic::minnum;
2877 break;
2878 case Intrinsic::minnum:
2879 NewIID = Intrinsic::maxnum;
2880 break;
2881 case Intrinsic::maximumnum:
2882 NewIID = Intrinsic::minimumnum;
2883 break;
2884 case Intrinsic::minimumnum:
2885 NewIID = Intrinsic::maximumnum;
2886 break;
2887 case Intrinsic::maximum:
2888 NewIID = Intrinsic::minimum;
2889 break;
2890 case Intrinsic::minimum:
2891 NewIID = Intrinsic::maximum;
2892 break;
2893 default:
2894 llvm_unreachable("unexpected intrinsic ID");
2895 }
2896 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2897 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2898 FNeg->copyIRFlags(II);
2899 return FNeg;
2900 }
2901
2902 // m(m(X, C2), C1) -> m(X, C)
2903 const APFloat *C1, *C2;
2904 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2905 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2906 ((match(M->getArgOperand(0), m_Value(X)) &&
2907 match(M->getArgOperand(1), m_APFloat(C2))) ||
2908 (match(M->getArgOperand(1), m_Value(X)) &&
2909 match(M->getArgOperand(0), m_APFloat(C2))))) {
2910 APFloat Res(0.0);
2911 switch (IID) {
2912 case Intrinsic::maxnum:
2913 Res = maxnum(*C1, *C2);
2914 break;
2915 case Intrinsic::minnum:
2916 Res = minnum(*C1, *C2);
2917 break;
2918 case Intrinsic::maximumnum:
2919 Res = maximumnum(*C1, *C2);
2920 break;
2921 case Intrinsic::minimumnum:
2922 Res = minimumnum(*C1, *C2);
2923 break;
2924 case Intrinsic::maximum:
2925 Res = maximum(*C1, *C2);
2926 break;
2927 case Intrinsic::minimum:
2928 Res = minimum(*C1, *C2);
2929 break;
2930 default:
2931 llvm_unreachable("unexpected intrinsic ID");
2932 }
2933 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2934 // was a simplification (so Arg0 and its original flags could
2935 // propagate?)
2936 Value *V = Builder.CreateBinaryIntrinsic(
2937 IID, X, ConstantFP::get(Arg0->getType(), Res),
2939 return replaceInstUsesWith(*II, V);
2940 }
2941 }
2942
2943 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2944 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2945 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2946 X->getType() == Y->getType()) {
2947 Value *NewCall =
2948 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2949 return new FPExtInst(NewCall, II->getType());
2950 }
2951
2952 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2953 Constant *C;
2954 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2955 match(Arg1, m_ImmConstant(C))) {
2956 if (Constant *TruncC =
2957 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2958 Value *NewCall =
2959 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2960 return new FPExtInst(NewCall, II->getType());
2961 }
2962 }
2963
2964 // max X, -X --> fabs X
2965 // min X, -X --> -(fabs X)
2966 // TODO: Remove one-use limitation? That is obviously better for max,
2967 // hence why we don't check for one-use for that. However,
2968 // it would be an extra instruction for min (fnabs), but
2969 // that is still likely better for analysis and codegen.
2970 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2971 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2972 return Op0->hasOneUse() ||
2973 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2974 IID != Intrinsic::minimumnum);
2975 return false;
2976 };
2977
2978 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2979 Value *R = Builder.CreateFAbs(X, II);
2980 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2981 IID == Intrinsic::minimumnum)
2982 R = Builder.CreateFNegFMF(R, II);
2983 return replaceInstUsesWith(*II, R);
2984 }
2985
2986 break;
2987 }
2988 case Intrinsic::matrix_multiply: {
2989 // Optimize negation in matrix multiplication.
2990
2991 // -A * -B -> A * B
2992 Value *A, *B;
2993 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2994 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2995 replaceOperand(*II, 0, A);
2996 replaceOperand(*II, 1, B);
2997 return II;
2998 }
2999
3000 Value *Op0 = II->getOperand(0);
3001 Value *Op1 = II->getOperand(1);
3002 Value *OpNotNeg, *NegatedOp;
3003 unsigned NegatedOpArg, OtherOpArg;
3004 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
3005 NegatedOp = Op0;
3006 NegatedOpArg = 0;
3007 OtherOpArg = 1;
3008 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
3009 NegatedOp = Op1;
3010 NegatedOpArg = 1;
3011 OtherOpArg = 0;
3012 } else
3013 // Multiplication doesn't have a negated operand.
3014 break;
3015
3016 // Only optimize if the negated operand has only one use.
3017 if (!NegatedOp->hasOneUse())
3018 break;
3019
3020 Value *OtherOp = II->getOperand(OtherOpArg);
3021 VectorType *RetTy = cast<VectorType>(II->getType());
3022 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3023 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3024 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3025 ElementCount OtherCount = OtherOpTy->getElementCount();
3026 ElementCount RetCount = RetTy->getElementCount();
3027 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3028 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3029 ElementCount::isKnownLT(OtherCount, RetCount)) {
3030 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3031 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3032 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3033 return II;
3034 }
3035 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3036 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3037 SmallVector<Value *, 5> NewArgs(II->args());
3038 NewArgs[NegatedOpArg] = OpNotNeg;
3039 Instruction *NewMul =
3040 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3041 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3042 }
3043 break;
3044 }
3045 case Intrinsic::fmuladd: {
3046 // Try to simplify the underlying FMul.
3047 if (Value *V =
3048 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3049 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3050 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3051 II->getFastMathFlags());
3052
3053 [[fallthrough]];
3054 }
3055 case Intrinsic::fma: {
3056 // fma fneg(x), fneg(y), z -> fma x, y, z
3057 Value *Src0 = II->getArgOperand(0);
3058 Value *Src1 = II->getArgOperand(1);
3059 Value *Src2 = II->getArgOperand(2);
3060 Value *X, *Y;
3061 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3062 replaceOperand(*II, 0, X);
3063 replaceOperand(*II, 1, Y);
3064 return II;
3065 }
3066
3067 // fma fabs(x), fabs(x), z -> fma x, x, z
3068 if (match(Src0, m_FAbs(m_Value(X))) &&
3069 match(Src1, m_FAbs(m_Specific(X)))) {
3070 replaceOperand(*II, 0, X);
3071 replaceOperand(*II, 1, X);
3072 return II;
3073 }
3074
3075 // Try to simplify the underlying FMul. We can only apply simplifications
3076 // that do not require rounding.
3077 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3078 SQ.getWithInstruction(II)))
3079 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3080
3081 // fma x, y, 0 -> fmul x, y
3082 // This is always valid for -0.0, but requires nsz for +0.0 as
3083 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3084 if (match(Src2, m_NegZeroFP()) ||
3085 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3086 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3087
3088 // fma x, -1.0, y -> fsub y, x
3089 if (match(Src1, m_SpecificFP(-1.0)))
3090 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3091
3092 break;
3093 }
3094 case Intrinsic::copysign: {
3095 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3096 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3097 Sign, getSimplifyQuery().getWithInstruction(II))) {
3098 if (*KnownSignBit) {
3099 // If we know that the sign argument is negative, reduce to FNABS:
3100 // copysign Mag, -Sign --> fneg (fabs Mag)
3101 Value *Fabs = Builder.CreateFAbs(Mag, II);
3102 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3103 }
3104
3105 // If we know that the sign argument is positive, reduce to FABS:
3106 // copysign Mag, +Sign --> fabs Mag
3107 Value *Fabs = Builder.CreateFAbs(Mag, II);
3108 return replaceInstUsesWith(*II, Fabs);
3109 }
3110
3111 // Propagate sign argument through nested calls:
3112 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3113 Value *X;
3115 Value *CopySign =
3116 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3117 return replaceInstUsesWith(*II, CopySign);
3118 }
3119
3120 // Clear sign-bit of constant magnitude:
3121 // copysign -MagC, X --> copysign MagC, X
3122 // TODO: Support constant folding for fabs
3123 const APFloat *MagC;
3124 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3125 APFloat PosMagC = *MagC;
3126 PosMagC.clearSign();
3127 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3128 }
3129
3130 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3131 // copysign (fabs X), Sign --> copysign X, Sign
3132 // copysign (fneg X), Sign --> copysign X, Sign
3133 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3134 return replaceOperand(*II, 0, X);
3135
3136 // copysign(floor(fabs(X)), X) --> copysign(trunc(X), X)
3137 // copysign ignores the sign bit of its magnitude argument (implicit fabs),
3138 // so replacing floor(fabs(X)) with trunc(X) is correct for all inputs
3139 // including NaN without requiring nnan. The m_FAbs match also ensures
3140 // the floor argument is non-negative, so floor == trunc.
3141 Value *FAbsArg;
3142 if (match(Mag, m_Intrinsic<Intrinsic::floor>(m_FAbs(m_Value(FAbsArg)))) &&
3143 FAbsArg == Sign) {
3144 Value *Trunc = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, Sign, II);
3145 return replaceOperand(*II, 0, Trunc);
3146 }
3147
3148 Type *SignEltTy = Sign->getType()->getScalarType();
3149
3150 Value *CastSrc;
3151 if (match(Sign,
3153 CastSrc->getType()->isIntOrIntVectorTy() &&
3155 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3157 APInt::getSignMask(Known.getBitWidth()), Known,
3158 SQ))
3159 return II;
3160 }
3161
3162 break;
3163 }
3164 case Intrinsic::fabs: {
3165 Value *Cond, *TVal, *FVal;
3166 Value *Arg = II->getArgOperand(0);
3167 Value *X;
3168 // fabs (-X) --> fabs (X)
3169 if (match(Arg, m_FNeg(m_Value(X)))) {
3170 Value *Fabs = Builder.CreateFAbs(X, II);
3171 return replaceInstUsesWith(CI, Fabs);
3172 }
3173
3174 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3175 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3176 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3177 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3178 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3179 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3180 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3181 SI->setFastMathFlags(II->getFastMathFlags() |
3182 cast<SelectInst>(Arg)->getFastMathFlags());
3183 // Can't copy nsz to select, as even with the nsz flag the fabs result
3184 // always has the sign bit unset.
3185 SI->setHasNoSignedZeros(false);
3186 return SI;
3187 }
3188 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3189 if (match(TVal, m_FNeg(m_Specific(FVal))))
3190 return replaceOperand(*II, 0, FVal);
3191 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3192 if (match(FVal, m_FNeg(m_Specific(TVal))))
3193 return replaceOperand(*II, 0, TVal);
3194 }
3195
3196 Value *Magnitude, *Sign;
3197 if (match(II->getArgOperand(0),
3198 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3199 // fabs (copysign x, y) -> (fabs x)
3200 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3201 return replaceInstUsesWith(*II, AbsSign);
3202 }
3203
3204 [[fallthrough]];
3205 }
3206 case Intrinsic::ceil:
3207 case Intrinsic::floor:
3208 case Intrinsic::round:
3209 case Intrinsic::roundeven:
3210 case Intrinsic::nearbyint:
3211 case Intrinsic::rint:
3212 case Intrinsic::trunc: {
3213 Value *ExtSrc;
3214 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3215 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3216 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3217 return new FPExtInst(NarrowII, II->getType());
3218 }
3219 break;
3220 }
3221 case Intrinsic::cos:
3222 case Intrinsic::amdgcn_cos:
3223 case Intrinsic::cosh: {
3224 Value *X, *Sign;
3225 Value *Src = II->getArgOperand(0);
3226 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3227 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3228 // f(-x) --> f(x)
3229 // f(fabs(x)) --> f(x)
3230 // f(copysign(x, y)) --> f(x)
3231 // for f in {cos, cosh}
3232 return replaceOperand(*II, 0, X);
3233 }
3234 break;
3235 }
3236 case Intrinsic::sin:
3237 case Intrinsic::amdgcn_sin:
3238 case Intrinsic::sinh:
3239 case Intrinsic::tan:
3240 case Intrinsic::tanh: {
3241 Value *X;
3242 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3243 // f(-x) --> -f(x)
3244 // for f in {sin, sinh, tan, tanh}
3245 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3246 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3247 }
3248 break;
3249 }
3250 case Intrinsic::ldexp: {
3251 Value *Src = II->getArgOperand(0);
3252 Value *Exp = II->getArgOperand(1);
3253
3254 // ldexp(x, K) -> fmul x, 2^K
3255 uint64_t ConstExp;
3256 if (match(Exp, m_ConstantInt(ConstExp))) {
3257 const fltSemantics &FPTy =
3258 Src->getType()->getScalarType()->getFltSemantics();
3259
3260 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3262 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3263 // Skip overflow and underflow cases.
3264 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3265 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3266 }
3267 }
3268
3269 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3270 //
3271 // A danger is if the first ldexp would overflow to infinity or underflow to
3272 // zero, but the combined exponent avoids it.
3273 //
3274 // We ignore this with reassoc, or if we know both exponents have the same
3275 // sign (since then we'd just double down on the over/underflow which would
3276 // occur anyway).
3277 //
3278 // ldexp can take arbitrary integer types, so we also need to ensure that
3279 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3280 // then ldexp at the saturated exponent saturates to inf or zero as well.
3281 //
3282 // TODO: Could do better if we had range tracking for the input value
3283 // exponent. Also could broaden sign check to cover == 0 case.
3284 Value *InnerSrc;
3285 Value *InnerExp;
3287 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3288 Exp->getType() == InnerExp->getType()) {
3289 FastMathFlags FMF = II->getFastMathFlags();
3290 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3291
3292 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3293 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3294 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3295 Value *NewExp =
3296 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3297 II->setArgOperand(1, NewExp);
3298 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3299 return replaceOperand(*II, 0, InnerSrc);
3300 }
3301 }
3302
3303 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3304 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3305 Value *ExtSrc;
3306 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3307 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3308 Value *Select =
3309 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3310 ConstantFP::get(II->getType(), 1.0));
3312 }
3313 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3314 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3315 Value *Select =
3316 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3317 ConstantFP::get(II->getType(), 1.0));
3319 }
3320
3321 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3322 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3323 ///
3324 // TODO: If we cared, should insert a canonicalize for x
3325 Value *SelectCond, *SelectLHS, *SelectRHS;
3326 if (match(II->getArgOperand(1),
3327 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3328 m_Value(SelectRHS))))) {
3329 Value *NewLdexp = nullptr;
3330 Value *Select = nullptr;
3331 if (match(SelectRHS, m_ZeroInt())) {
3332 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3333 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3334 } else if (match(SelectLHS, m_ZeroInt())) {
3335 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3336 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3337 }
3338
3339 if (NewLdexp) {
3340 Select->takeName(II);
3341 return replaceInstUsesWith(*II, Select);
3342 }
3343 }
3344
3345 break;
3346 }
3347 case Intrinsic::ptrauth_auth:
3348 case Intrinsic::ptrauth_resign: {
3349 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3350 // sign+auth component if the key and discriminator match.
3351 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3352 Value *Ptr = II->getArgOperand(0);
3353 Value *Key = II->getArgOperand(1);
3354 Value *Disc = II->getArgOperand(2);
3355 Value *DS = nullptr;
3356 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3357 DS = Bundle->Inputs[0];
3358
3359 // AuthKey will be the key we need to end up authenticating against in
3360 // whatever we replace this sequence with.
3361 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3362 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3363 Value *OtherDS = nullptr;
3364 if (auto Bundle =
3366 OtherDS = Bundle->Inputs[0];
3367 if (DS != OtherDS)
3368 break;
3369
3370 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3371 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3372 break;
3373 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3374 // The resign intrinsic does not support deactivation symbols.
3375 assert(!DS);
3376 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3377 break;
3378 AuthKey = CI->getArgOperand(1);
3379 AuthDisc = CI->getArgOperand(2);
3380 } else
3381 break;
3382 BasePtr = CI->getArgOperand(0);
3383 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3384 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3385 // our purposes, so check for that too.
3386 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3387 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3388 break;
3389
3390 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3391 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3392 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3393 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3394 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3395 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3396 SignDisc, /*AddrDisc=*/Null,
3397 /*DeactivationSymbol=*/Null);
3399 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3400 return eraseInstFromFunction(*II);
3401 }
3402
3403 // auth(ptrauth(p,k,d),k,d) -> p
3404 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3405 } else
3406 break;
3407
3408 unsigned NewIntrin;
3409 if (AuthKey && NeedSign) {
3410 // resign(0,1) + resign(1,2) = resign(0, 2)
3411 NewIntrin = Intrinsic::ptrauth_resign;
3412 } else if (AuthKey) {
3413 // resign(0,1) + auth(1) = auth(0)
3414 NewIntrin = Intrinsic::ptrauth_auth;
3415 } else if (NeedSign) {
3416 // sign(0) + resign(0, 1) = sign(1)
3417 NewIntrin = Intrinsic::ptrauth_sign;
3418 } else {
3419 // sign(0) + auth(0) = nop
3420 replaceInstUsesWith(*II, BasePtr);
3421 return eraseInstFromFunction(*II);
3422 }
3423
3424 SmallVector<Value *, 4> CallArgs;
3425 CallArgs.push_back(BasePtr);
3426 if (AuthKey) {
3427 CallArgs.push_back(AuthKey);
3428 CallArgs.push_back(AuthDisc);
3429 }
3430
3431 if (NeedSign) {
3432 CallArgs.push_back(II->getArgOperand(3));
3433 CallArgs.push_back(II->getArgOperand(4));
3434 }
3435
3436 std::vector<OperandBundleDef> Bundles;
3437 if (DS)
3438 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3439
3440 Function *NewFn =
3441 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3442 return CallInst::Create(NewFn, CallArgs, Bundles);
3443 }
3444 case Intrinsic::arm_neon_vtbl1:
3445 case Intrinsic::arm_neon_vtbl2:
3446 case Intrinsic::arm_neon_vtbl3:
3447 case Intrinsic::arm_neon_vtbl4:
3448 case Intrinsic::aarch64_neon_tbl1:
3449 case Intrinsic::aarch64_neon_tbl2:
3450 case Intrinsic::aarch64_neon_tbl3:
3451 case Intrinsic::aarch64_neon_tbl4:
3452 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3453 case Intrinsic::arm_neon_vtbx1:
3454 case Intrinsic::arm_neon_vtbx2:
3455 case Intrinsic::arm_neon_vtbx3:
3456 case Intrinsic::arm_neon_vtbx4:
3457 case Intrinsic::aarch64_neon_tbx1:
3458 case Intrinsic::aarch64_neon_tbx2:
3459 case Intrinsic::aarch64_neon_tbx3:
3460 case Intrinsic::aarch64_neon_tbx4:
3461 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3462
3463 case Intrinsic::arm_neon_vmulls:
3464 case Intrinsic::arm_neon_vmullu:
3465 case Intrinsic::aarch64_neon_smull:
3466 case Intrinsic::aarch64_neon_umull: {
3467 Value *Arg0 = II->getArgOperand(0);
3468 Value *Arg1 = II->getArgOperand(1);
3469
3470 // Handle mul by zero first:
3472 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3473 }
3474
3475 // Check for constant LHS & RHS - in this case we just simplify.
3476 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3477 IID == Intrinsic::aarch64_neon_umull);
3478 VectorType *NewVT = cast<VectorType>(II->getType());
3479 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3480 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3481 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3482 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3483 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3484 }
3485
3486 // Couldn't simplify - canonicalize constant to the RHS.
3487 std::swap(Arg0, Arg1);
3488 }
3489
3490 // Handle mul by one:
3491 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3492 if (ConstantInt *Splat =
3493 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3494 if (Splat->isOne())
3495 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3496 /*isSigned=*/!Zext);
3497
3498 break;
3499 }
3500 case Intrinsic::arm_neon_aesd:
3501 case Intrinsic::arm_neon_aese:
3502 case Intrinsic::aarch64_crypto_aesd:
3503 case Intrinsic::aarch64_crypto_aese:
3504 case Intrinsic::aarch64_sve_aesd:
3505 case Intrinsic::aarch64_sve_aese: {
3506 Value *DataArg = II->getArgOperand(0);
3507 Value *KeyArg = II->getArgOperand(1);
3508
3509 // Accept zero on either operand.
3510 if (!match(KeyArg, m_ZeroInt()))
3511 std::swap(KeyArg, DataArg);
3512
3513 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3514 Value *Data, *Key;
3515 if (match(KeyArg, m_ZeroInt()) &&
3516 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3517 replaceOperand(*II, 0, Data);
3518 replaceOperand(*II, 1, Key);
3519 return II;
3520 }
3521 break;
3522 }
3523 case Intrinsic::arm_neon_vshifts:
3524 case Intrinsic::arm_neon_vshiftu:
3525 case Intrinsic::aarch64_neon_sshl:
3526 case Intrinsic::aarch64_neon_ushl:
3527 return foldNeonShift(II, *this);
3528 case Intrinsic::hexagon_V6_vandvrt:
3529 case Intrinsic::hexagon_V6_vandvrt_128B: {
3530 // Simplify Q -> V -> Q conversion.
3531 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3532 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3533 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3534 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3535 break;
3536 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3537 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3538 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3539 // Check if every byte has common bits in Bytes and Mask.
3540 uint64_t C = Bytes1 & Mask1;
3541 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3542 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3543 }
3544 break;
3545 }
3546 case Intrinsic::stackrestore: {
3547 enum class ClassifyResult {
3548 None,
3549 Alloca,
3550 StackRestore,
3551 CallWithSideEffects,
3552 };
3553 auto Classify = [](const Instruction *I) {
3554 if (isa<AllocaInst>(I))
3555 return ClassifyResult::Alloca;
3556
3557 if (auto *CI = dyn_cast<CallInst>(I)) {
3558 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3559 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3560 return ClassifyResult::StackRestore;
3561
3562 if (II->mayHaveSideEffects())
3563 return ClassifyResult::CallWithSideEffects;
3564 } else {
3565 // Consider all non-intrinsic calls to be side effects
3566 return ClassifyResult::CallWithSideEffects;
3567 }
3568 }
3569
3570 return ClassifyResult::None;
3571 };
3572
3573 // If the stacksave and the stackrestore are in the same BB, and there is
3574 // no intervening call, alloca, or stackrestore of a different stacksave,
3575 // remove the restore. This can happen when variable allocas are DCE'd.
3576 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3577 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3578 SS->getParent() == II->getParent()) {
3579 BasicBlock::iterator BI(SS);
3580 bool CannotRemove = false;
3581 for (++BI; &*BI != II; ++BI) {
3582 switch (Classify(&*BI)) {
3583 case ClassifyResult::None:
3584 // So far so good, look at next instructions.
3585 break;
3586
3587 case ClassifyResult::StackRestore:
3588 // If we found an intervening stackrestore for a different
3589 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3590 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3591 CannotRemove = true;
3592 break;
3593
3594 case ClassifyResult::Alloca:
3595 case ClassifyResult::CallWithSideEffects:
3596 // If we found an alloca, a non-intrinsic call, or an intrinsic
3597 // call with side effects, we can't remove the stackrestore.
3598 CannotRemove = true;
3599 break;
3600 }
3601 if (CannotRemove)
3602 break;
3603 }
3604
3605 if (!CannotRemove)
3606 return eraseInstFromFunction(CI);
3607 }
3608 }
3609
3610 // Scan down this block to see if there is another stack restore in the
3611 // same block without an intervening call/alloca.
3613 Instruction *TI = II->getParent()->getTerminator();
3614 bool CannotRemove = false;
3615 for (++BI; &*BI != TI; ++BI) {
3616 switch (Classify(&*BI)) {
3617 case ClassifyResult::None:
3618 // So far so good, look at next instructions.
3619 break;
3620
3621 case ClassifyResult::StackRestore:
3622 // If there is a stackrestore below this one, remove this one.
3623 return eraseInstFromFunction(CI);
3624
3625 case ClassifyResult::Alloca:
3626 case ClassifyResult::CallWithSideEffects:
3627 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3628 // with side effects (such as llvm.stacksave and llvm.read_register),
3629 // we can't remove the stack restore.
3630 CannotRemove = true;
3631 break;
3632 }
3633 if (CannotRemove)
3634 break;
3635 }
3636
3637 // If the stack restore is in a return, resume, or unwind block and if there
3638 // are no allocas or calls between the restore and the return, nuke the
3639 // restore.
3640 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3641 return eraseInstFromFunction(CI);
3642 break;
3643 }
3644 case Intrinsic::lifetime_end:
3645 // Asan needs to poison memory to detect invalid access which is possible
3646 // even for empty lifetime range.
3647 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3648 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3649 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3650 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3651 break;
3652
3653 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3654 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3655 }))
3656 return nullptr;
3657 break;
3658 case Intrinsic::assume: {
3659 for (auto [Idx, OBU] : llvm::enumerate(II->operand_bundles())) {
3660 auto RemoveBundle = [&, Idx = Idx]() -> Instruction * {
3661 if (II->getNumOperandBundles() == 1)
3662 return eraseInstFromFunction(*II);
3664 };
3665
3666 switch (getBundleAttrFromOBU(OBU)) {
3667 case BundleAttr::None:
3668 llvm_unreachable("Unexpected Attribute");
3669 case BundleAttr::Align: {
3670 // Try to remove redundant alignment assumptions.
3671 auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU);
3672
3673 if (!Alignment || !Offset || *Offset != 0)
3674 break;
3675
3676 // Remove align 1 and non-power-of-two bundles; they don't add any
3677 // useful information.
3678 if (*Alignment == 1 || !isPowerOf2_64(*Alignment))
3679 return RemoveBundle();
3680
3681 // Don't try to remove align assumptions for pointers derived from
3682 // arguments. We might lose information if the function gets inline and
3683 // the align argument attribute disappears.
3684 Value *UO = getUnderlyingObject(Ptr);
3685 if (!UO || isa<Argument>(UO))
3686 break;
3687
3688 // Compute known bits for the pointer and drop the assume if the
3689 // known alignment isn't increased by it.
3690 if (computeKnownBits(Ptr, II).countMinTrailingZeros() <
3691 Log2_64(*Alignment))
3692 continue;
3693 return RemoveBundle();
3694 }
3695
3696 case BundleAttr::Dereferenceable: {
3697 auto [Ptr, _, Count] = getAssumeDereferenceableInfo(OBU);
3698
3699 if (!Count)
3700 break;
3701
3703 Ptr, Align(1), APInt(64, *Count),
3704 getSimplifyQuery().getWithInstruction(II)))
3705 return RemoveBundle();
3706
3707 break;
3708 }
3709
3710 case BundleAttr::Ignore:
3711 return RemoveBundle();
3712
3713 case BundleAttr::NonNull: {
3714 auto [Ptr] = llvm::getAssumeNonNullInfo(OBU);
3715
3716 // Drop assume if we can prove nonnull without it
3717 if (isKnownNonZero(Ptr, getSimplifyQuery().getWithInstruction(II)))
3718 return RemoveBundle();
3719
3720 // Fold the assume into metadata if it's valid at the load
3721 if (auto *LI = dyn_cast<LoadInst>(Ptr);
3722 LI &&
3723 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3724 MDNode *MD = MDNode::get(II->getContext(), {});
3725 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3726 LI->setMetadata(LLVMContext::MD_noundef, MD);
3727 return RemoveBundle();
3728 }
3729
3730 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3731 GEP && GEP->isInBounds() &&
3732 !NullPointerIsDefined(II->getFunction(),
3733 Ptr->getType()->getPointerAddressSpace())) {
3734 Builder.CreateNonnullAssumption(GEP->stripInBoundsOffsets());
3735 return RemoveBundle();
3736 }
3737
3738 // TODO: apply nonnull return attributes to calls and invokes
3739 break;
3740 }
3741
3742 case BundleAttr::NoUndef: {
3743 auto [Val] = getAssumeNoUndefInfo(OBU);
3744
3746 return RemoveBundle();
3747
3748 if (auto *LI = dyn_cast<LoadInst>(Val);
3749 LI &&
3750 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3751 LI->setMetadata(LLVMContext::MD_noundef,
3752 MDNode::get(II->getContext(), {}));
3753 return RemoveBundle();
3754 }
3755
3756 } break;
3757
3758 case BundleAttr::SeparateStorage: {
3759 auto [Ptr1, Ptr2] = getAssumeSeparateStorageInfo(OBU);
3760 // Separate storage assumptions apply to the underlying allocations, not
3761 // any particular pointer within them. When evaluating the hints for AA
3762 // purposes we getUnderlyingObject them; by precomputing the answers
3763 // here we can avoid having to do so repeatedly there.
3764 auto MaybeSimplifyHint = [&](const Use &U) {
3765 Value *Hint = U.get();
3766 // Not having a limit is safe because InstCombine removes unreachable
3767 // code.
3768 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3769 if (Hint != UnderlyingObject)
3770 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3771 };
3772 MaybeSimplifyHint(Ptr1);
3773 MaybeSimplifyHint(Ptr2);
3774 } break;
3775
3776 // TODO: Drop these assumes when they are redundant
3777 case BundleAttr::DereferenceableOrNull:
3778 break;
3779
3780 // This cannot be simplified
3781 case BundleAttr::Cold:
3782 break;
3783 }
3784 }
3785
3786 // If the assume has operand bundles, the folds below will never work, so
3787 // don't bother trying.
3788 if (II->hasOperandBundles())
3789 break;
3790
3791 Value *IIOperand = II->getArgOperand(0);
3792
3793 // Canonicalize assume(a && b) -> assume(a); assume(b);
3794 // Note: New assumption intrinsics created here are registered by
3795 // the InstCombineIRInserter object.
3796 Value *A, *B;
3797 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3798 Builder.CreateAssumption(A);
3799 Builder.CreateAssumption(B);
3800 return eraseInstFromFunction(*II);
3801 }
3802 // assume(!(a || b)) -> assume(!a); assume(!b);
3803 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3804 Builder.CreateAssumption(Builder.CreateNot(A));
3805 Builder.CreateAssumption(Builder.CreateNot(B));
3806 return eraseInstFromFunction(*II);
3807 }
3808
3809 // Convert nonnull assume like:
3810 // %A = icmp ne i32* %PTR, null
3811 // call void @llvm.assume(i1 %A)
3812 // into
3813 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3814 if (match(IIOperand,
3816 A->getType()->isPointerTy()) {
3817 Builder.CreateNonnullAssumption(A);
3818 return eraseInstFromFunction(*II);
3819 }
3820
3821 // Convert alignment assume like:
3822 // %B = ptrtoint i32* %A to i64
3823 // %C = and i64 %B, Constant
3824 // %D = icmp eq i64 %C, 0
3825 // call void @llvm.assume(i1 %D)
3826 // into
3827 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3828 uint64_t AlignMask = 1;
3829 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3830 match(IIOperand,
3832 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3833 m_Zero())))) {
3834 if (isPowerOf2_64(AlignMask + 1)) {
3835 uint64_t Offset = 0;
3837 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3838 /// Note: this doesn't preserve the offset information but merges
3839 /// offset and alignment.
3840 /// TODO: we can generate a GEP instead of merging the alignment with
3841 /// the offset.
3842 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3843 MinAlign(Offset, AlignMask + 1));
3844 return eraseInstFromFunction(*II);
3845 }
3846 }
3847 }
3848
3849 // If there is a dominating assume with the same condition as this one,
3850 // then this one is redundant, and should be removed.
3851 KnownBits Known(1);
3852 computeKnownBits(IIOperand, Known, II);
3853 if (Known.isAllOnes())
3854 return eraseInstFromFunction(*II);
3855
3856 // assume(false) is unreachable.
3857 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3859 return eraseInstFromFunction(*II);
3860 }
3861
3862 // Update the cache of affected values for this assumption (we might be
3863 // here because we just simplified the condition).
3864 AC.updateAffectedValues(cast<AssumeInst>(II));
3865 break;
3866 }
3867 case Intrinsic::experimental_guard: {
3868 // Is this guard followed by another guard? We scan forward over a small
3869 // fixed window of instructions to handle common cases with conditions
3870 // computed between guards.
3871 Instruction *NextInst = II->getNextNode();
3872 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3873 // Note: Using context-free form to avoid compile time blow up
3874 if (!isSafeToSpeculativelyExecute(NextInst))
3875 break;
3876 NextInst = NextInst->getNextNode();
3877 }
3878 Value *NextCond = nullptr;
3879 if (match(NextInst,
3881 Value *CurrCond = II->getArgOperand(0);
3882
3883 // Remove a guard that it is immediately preceded by an identical guard.
3884 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3885 if (CurrCond != NextCond) {
3886 Instruction *MoveI = II->getNextNode();
3887 while (MoveI != NextInst) {
3888 auto *Temp = MoveI;
3889 MoveI = MoveI->getNextNode();
3890 Temp->moveBefore(II->getIterator());
3891 }
3892 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3893 }
3894 eraseInstFromFunction(*NextInst);
3895 return II;
3896 }
3897 break;
3898 }
3899 case Intrinsic::vector_insert: {
3900 Value *Vec = II->getArgOperand(0);
3901 Value *SubVec = II->getArgOperand(1);
3902 Value *Idx = II->getArgOperand(2);
3903 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3904 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3905 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3906
3907 // Only canonicalize if the destination vector, Vec, and SubVec are all
3908 // fixed vectors.
3909 if (DstTy && VecTy && SubVecTy) {
3910 unsigned DstNumElts = DstTy->getNumElements();
3911 unsigned VecNumElts = VecTy->getNumElements();
3912 unsigned SubVecNumElts = SubVecTy->getNumElements();
3913 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3914
3915 // An insert that entirely overwrites Vec with SubVec is a nop.
3916 if (VecNumElts == SubVecNumElts)
3917 return replaceInstUsesWith(CI, SubVec);
3918
3919 // Widen SubVec into a vector of the same width as Vec, since
3920 // shufflevector requires the two input vectors to be the same width.
3921 // Elements beyond the bounds of SubVec within the widened vector are
3922 // undefined.
3923 SmallVector<int, 8> WidenMask;
3924 unsigned i;
3925 for (i = 0; i != SubVecNumElts; ++i)
3926 WidenMask.push_back(i);
3927 for (; i != VecNumElts; ++i)
3928 WidenMask.push_back(PoisonMaskElem);
3929
3930 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3931
3933 for (unsigned i = 0; i != IdxN; ++i)
3934 Mask.push_back(i);
3935 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3936 Mask.push_back(i);
3937 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3938 Mask.push_back(i);
3939
3940 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3941 return replaceInstUsesWith(CI, Shuffle);
3942 }
3943 break;
3944 }
3945 case Intrinsic::vector_extract: {
3946 Value *Vec = II->getArgOperand(0);
3947 Value *Idx = II->getArgOperand(1);
3948
3949 Type *ReturnType = II->getType();
3950 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3951 // ExtractIdx)
3952 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3953 Value *InsertTuple, *InsertIdx, *InsertValue;
3955 m_Value(InsertValue),
3956 m_Value(InsertIdx))) &&
3957 InsertValue->getType() == ReturnType) {
3958 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3959 // Case where we get the same index right after setting it.
3960 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3961 // InsertValue
3962 if (ExtractIdx == Index)
3963 return replaceInstUsesWith(CI, InsertValue);
3964 // If we are getting a different index than what was set in the
3965 // insert.vector intrinsic. We can just set the input tuple to the one up
3966 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3967 // InsertIndex), ExtractIndex)
3968 // --> extract.vector(InsertTuple, ExtractIndex)
3969 else
3970 return replaceOperand(CI, 0, InsertTuple);
3971 }
3972
3973 ConstantInt *ALMUpperBound;
3975 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3976 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3977 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3978 unsigned ScaleFactor =
3979 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
3980 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3981 return replaceInstUsesWith(CI,
3982 ConstantVector::getNullValue(ReturnType));
3983 }
3984
3985 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3986 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3987
3988 if (DstTy && VecTy) {
3989 auto DstEltCnt = DstTy->getElementCount();
3990 auto VecEltCnt = VecTy->getElementCount();
3991 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3992
3993 // Extracting the entirety of Vec is a nop.
3994 if (DstEltCnt == VecTy->getElementCount()) {
3995 replaceInstUsesWith(CI, Vec);
3996 return eraseInstFromFunction(CI);
3997 }
3998
3999 // Only canonicalize to shufflevector if the destination vector and
4000 // Vec are fixed vectors.
4001 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
4002 break;
4003
4005 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
4006 Mask.push_back(IdxN + i);
4007
4008 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
4009 return replaceInstUsesWith(CI, Shuffle);
4010 }
4011 break;
4012 }
4013 case Intrinsic::experimental_vp_reverse: {
4014 Value *X;
4015 Value *Vec = II->getArgOperand(0);
4016 Value *Mask = II->getArgOperand(1);
4017 if (!match(Mask, m_AllOnes()))
4018 break;
4019 Value *EVL = II->getArgOperand(2);
4020 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
4021 // rev(unop rev(X)) --> unop X
4022 if (match(Vec,
4024 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
4025 auto *OldUnOp = cast<UnaryOperator>(Vec);
4027 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
4028 II->getIterator());
4029 return replaceInstUsesWith(CI, NewUnOp);
4030 }
4031 break;
4032 }
4033 case Intrinsic::vector_reduce_or:
4034 case Intrinsic::vector_reduce_and: {
4035 // Canonicalize logical or/and reductions:
4036 // Or reduction for i1 is represented as:
4037 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4038 // %res = cmp ne iReduxWidth %val, 0
4039 // And reduction for i1 is represented as:
4040 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4041 // %res = cmp eq iReduxWidth %val, 11111
4042 Value *Arg = II->getArgOperand(0);
4043 Value *Vect;
4044
4045 if (Value *NewOp =
4046 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4047 replaceUse(II->getOperandUse(0), NewOp);
4048 return II;
4049 }
4050
4051 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4052 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4053 if (FTy->getElementType() == Builder.getInt1Ty()) {
4054 Value *Res = Builder.CreateBitCast(
4055 Vect, Builder.getIntNTy(FTy->getNumElements()));
4056 if (IID == Intrinsic::vector_reduce_and) {
4057 Res = Builder.CreateICmpEQ(
4059 } else {
4060 assert(IID == Intrinsic::vector_reduce_or &&
4061 "Expected or reduction.");
4062 Res = Builder.CreateIsNotNull(Res);
4063 }
4064 if (Arg != Vect)
4065 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4066 II->getType());
4067 return replaceInstUsesWith(CI, Res);
4068 }
4069 }
4070 [[fallthrough]];
4071 }
4072 case Intrinsic::vector_reduce_add: {
4073 if (IID == Intrinsic::vector_reduce_add) {
4074 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4075 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4076 // Convert vector_reduce_add(SExt(<n x i1>)) to
4077 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4078 // Convert vector_reduce_add(<n x i1>) to
4079 // Trunc(ctpop(bitcast <n x i1> to in)).
4080 Value *Arg = II->getArgOperand(0);
4081 Value *Vect;
4082
4083 if (Value *NewOp =
4084 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4085 replaceUse(II->getOperandUse(0), NewOp);
4086 return II;
4087 }
4088
4089 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4090 if (Value *Splat = getSplatValue(Arg)) {
4091 ElementCount VecToReduceCount =
4092 cast<VectorType>(Arg->getType())->getElementCount();
4093 if (VecToReduceCount.isFixed()) {
4094 unsigned VectorSize = VecToReduceCount.getFixedValue();
4095 return BinaryOperator::CreateMul(
4096 Splat,
4097 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4098 /*ImplicitTrunc=*/true));
4099 }
4100 }
4101
4102 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4103 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4104 if (FTy->getElementType() == Builder.getInt1Ty()) {
4105 Value *V = Builder.CreateBitCast(
4106 Vect, Builder.getIntNTy(FTy->getNumElements()));
4107 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4108 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4109 if (Arg != Vect &&
4110 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4111 Res = Builder.CreateNeg(Res);
4112 return replaceInstUsesWith(CI, Res);
4113 }
4114 }
4115 }
4116 [[fallthrough]];
4117 }
4118 case Intrinsic::vector_reduce_xor: {
4119 if (IID == Intrinsic::vector_reduce_xor) {
4120 // Exclusive disjunction reduction over the vector with
4121 // (potentially-extended) i1 element type is actually a
4122 // (potentially-extended) arithmetic `add` reduction over the original
4123 // non-extended value:
4124 // vector_reduce_xor(?ext(<n x i1>))
4125 // -->
4126 // ?ext(vector_reduce_add(<n x i1>))
4127 Value *Arg = II->getArgOperand(0);
4128 Value *Vect;
4129
4130 if (Value *NewOp =
4131 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4132 replaceUse(II->getOperandUse(0), NewOp);
4133 return II;
4134 }
4135
4136 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4137 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4138 if (VTy->getElementType() == Builder.getInt1Ty()) {
4139 Value *Res = Builder.CreateAddReduce(Vect);
4140 if (Arg != Vect)
4141 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4142 II->getType());
4143 return replaceInstUsesWith(CI, Res);
4144 }
4145 }
4146 }
4147 [[fallthrough]];
4148 }
4149 case Intrinsic::vector_reduce_mul: {
4150 if (IID == Intrinsic::vector_reduce_mul) {
4151 Value *Arg = II->getArgOperand(0);
4152
4153 if (Value *NewOp =
4154 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4155 replaceUse(II->getOperandUse(0), NewOp);
4156 return II;
4157 }
4158
4159 // vector_reduce_mul(zext(<n x i1>)), or
4160 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4161 // zext(vector_reduce_and(<n x i1>)).
4162 // (The sext case doesn't work if n is odd because multiplying an odd
4163 // number of -1's produces -1, not 1.)
4164 Value *Vect;
4165 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4166 Vect->getType()->isIntOrIntVectorTy(1);
4167 bool IsSext =
4168 match(Arg, m_SExt(m_Value(Vect))) &&
4169 Vect->getType()->isIntOrIntVectorTy(1) &&
4170 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4171 if (IsZext || IsSext) {
4172 Value *Res = Builder.CreateAndReduce(Vect);
4173 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4174 }
4175
4176 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4177 if (Arg->getType()->isIntOrIntVectorTy(1))
4178 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4179 }
4180 [[fallthrough]];
4181 }
4182 case Intrinsic::vector_reduce_umin:
4183 case Intrinsic::vector_reduce_umax: {
4184 if (IID == Intrinsic::vector_reduce_umin ||
4185 IID == Intrinsic::vector_reduce_umax) {
4186 // UMin/UMax reduction over the vector with (potentially-extended)
4187 // i1 element type is actually a (potentially-extended)
4188 // logical `and`/`or` reduction over the original non-extended value:
4189 // vector_reduce_u{min,max}(?ext(<n x i1>))
4190 // -->
4191 // ?ext(vector_reduce_{and,or}(<n x i1>))
4192 Value *Arg = II->getArgOperand(0);
4193 Value *Vect;
4194
4195 if (Value *NewOp =
4196 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4197 replaceUse(II->getOperandUse(0), NewOp);
4198 return II;
4199 }
4200
4201 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4202 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4203 if (VTy->getElementType() == Builder.getInt1Ty()) {
4204 Value *Res = IID == Intrinsic::vector_reduce_umin
4205 ? Builder.CreateAndReduce(Vect)
4206 : Builder.CreateOrReduce(Vect);
4207 if (Arg != Vect)
4208 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4209 II->getType());
4210 return replaceInstUsesWith(CI, Res);
4211 }
4212 }
4213 }
4214 [[fallthrough]];
4215 }
4216 case Intrinsic::vector_reduce_smin:
4217 case Intrinsic::vector_reduce_smax: {
4218 if (IID == Intrinsic::vector_reduce_smin ||
4219 IID == Intrinsic::vector_reduce_smax) {
4220 // SMin/SMax reduction over the vector with (potentially-extended)
4221 // i1 element type is actually a (potentially-extended)
4222 // logical `and`/`or` reduction over the original non-extended value:
4223 // vector_reduce_s{min,max}(<n x i1>)
4224 // -->
4225 // vector_reduce_{or,and}(<n x i1>)
4226 // and
4227 // vector_reduce_s{min,max}(sext(<n x i1>))
4228 // -->
4229 // sext(vector_reduce_{or,and}(<n x i1>))
4230 // and
4231 // vector_reduce_s{min,max}(zext(<n x i1>))
4232 // -->
4233 // zext(vector_reduce_{and,or}(<n x i1>))
4234 Value *Arg = II->getArgOperand(0);
4235 Value *Vect;
4236
4237 if (Value *NewOp =
4238 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4239 replaceUse(II->getOperandUse(0), NewOp);
4240 return II;
4241 }
4242
4243 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4244 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4245 if (VTy->getElementType() == Builder.getInt1Ty()) {
4246 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4247 if (Arg != Vect)
4248 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4249 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4250 (ExtOpc == Instruction::CastOps::ZExt))
4251 ? Builder.CreateAndReduce(Vect)
4252 : Builder.CreateOrReduce(Vect);
4253 if (Arg != Vect)
4254 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4255 return replaceInstUsesWith(CI, Res);
4256 }
4257 }
4258 }
4259 [[fallthrough]];
4260 }
4261 case Intrinsic::vector_reduce_fmax:
4262 case Intrinsic::vector_reduce_fmin:
4263 case Intrinsic::vector_reduce_fadd:
4264 case Intrinsic::vector_reduce_fmul: {
4265 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4266 IID != Intrinsic::vector_reduce_fmul) ||
4267 II->hasAllowReassoc();
4268 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4269 IID == Intrinsic::vector_reduce_fmul)
4270 ? 1
4271 : 0;
4272 Value *Arg = II->getArgOperand(ArgIdx);
4273 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4274 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4275 return nullptr;
4276 }
4277 break;
4278 }
4279 case Intrinsic::is_fpclass: {
4280 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4281 return I;
4282 break;
4283 }
4284 case Intrinsic::threadlocal_address: {
4285 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4286 MaybeAlign Align = II->getRetAlign();
4287 if (MinAlign > Align.valueOrOne()) {
4288 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4289 return II;
4290 }
4291 break;
4292 }
4293 case Intrinsic::fptoui_sat:
4294 case Intrinsic::fptosi_sat:
4295 if (Instruction *I = foldItoFPtoI(*II))
4296 return I;
4297 break;
4298 case Intrinsic::frexp: {
4299 // frexp(frexp(x).fract) -> { frexp(x).fract, 0 }: the fraction operand is
4300 // already normalized, so the first result is idempotent and the second is
4301 // zero.
4302 if (match(II->getArgOperand(0),
4304 Value *Res = Builder.CreateInsertValue(PoisonValue::get(II->getType()),
4305 II->getArgOperand(0), 0);
4306 Res = Builder.CreateInsertValue(
4307 Res, Constant::getNullValue(II->getType()->getStructElementType(1)),
4308 1);
4309 return replaceInstUsesWith(*II, Res);
4310 }
4311 break;
4312 }
4313 case Intrinsic::get_active_lane_mask: {
4314 const APInt *Op0, *Op1;
4315 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4316 match(II->getOperand(1), m_APInt(Op1))) {
4317 Type *OpTy = II->getOperand(0)->getType();
4318 return replaceInstUsesWith(
4319 *II, Builder.CreateIntrinsic(
4320 II->getType(), Intrinsic::get_active_lane_mask,
4321 {Constant::getNullValue(OpTy),
4322 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4323 }
4324 break;
4325 }
4326 case Intrinsic::experimental_get_vector_length: {
4327 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4328 unsigned BitWidth =
4329 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4330 II->getType()->getScalarSizeInBits());
4331 ConstantRange Cnt =
4332 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4333 SQ.getWithInstruction(II))
4335 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4336 ->getValue()
4337 .zextOrTrunc(Cnt.getBitWidth());
4338 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4339 MaxLanes = MaxLanes.multiply(
4340 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4341
4342 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4343 return replaceInstUsesWith(
4344 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4345 return nullptr;
4346 }
4347 default: {
4348 // Handle target specific intrinsics
4349 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4350 if (V)
4351 return *V;
4352 break;
4353 }
4354 }
4355
4356 // Try to fold intrinsic into select/phi operands. This is legal if:
4357 // * The intrinsic is speculatable.
4358 // * The operand is one of the following:
4359 // - a phi.
4360 // - a select with a scalar condition.
4361 // - a select with a vector condition and II is not a cross lane operation.
4363 for (Value *Op : II->args()) {
4364 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4365 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4366 if (IsVectorCond &&
4367 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4368 continue;
4369 // Don't replace a scalar select with a more expensive vector select if
4370 // we can't simplify both arms of the select.
4371 bool SimplifyBothArms =
4372 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4374 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4375 return R;
4376 }
4377 if (auto *Phi = dyn_cast<PHINode>(Op))
4378 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4379 return R;
4380 }
4381 }
4382
4384 return Shuf;
4385
4387 return replaceInstUsesWith(*II, Reverse);
4388
4390 return replaceInstUsesWith(*II, Res);
4391
4392 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4393 // context, so it is handled in visitCallBase and we should trigger it.
4394 return visitCallBase(*II);
4395}
4396
4397// Fence instruction simplification
4399 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4400 // This check is solely here to handle arbitrary target-dependent syncscopes.
4401 // TODO: Can remove if does not matter in practice.
4402 if (NFI && FI.isIdenticalTo(NFI))
4403 return eraseInstFromFunction(FI);
4404
4405 // Returns true if FI1 is identical or stronger fence than FI2.
4406 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4407 auto FI1SyncScope = FI1->getSyncScopeID();
4408 // Consider same scope, where scope is global or single-thread.
4409 if (FI1SyncScope != FI2->getSyncScopeID() ||
4410 (FI1SyncScope != SyncScope::System &&
4411 FI1SyncScope != SyncScope::SingleThread))
4412 return false;
4413
4414 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4415 };
4416 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4417 return eraseInstFromFunction(FI);
4418
4419 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4420 if (isIdenticalOrStrongerFence(PFI, &FI))
4421 return eraseInstFromFunction(FI);
4422 return nullptr;
4423}
4424
4425// InvokeInst simplification
4427 return visitCallBase(II);
4428}
4429
4430// CallBrInst simplification
4432 return visitCallBase(CBI);
4433}
4434
4435// A simple parser for format string specifiers for the purposes of the
4436// modular-format attribute. In the case of malformed format strings this might
4437// under or over report the specifiers present, but such cases are undefined
4438// behavior.
4440 Bitset<256> Specifiers;
4441 for (size_t I = 0; I < FormatStr.size(); ++I) {
4442 if (FormatStr[I] != '%')
4443 continue;
4444
4445 // Check for escaped '%'.
4446 if (I + 1 < FormatStr.size() && FormatStr[I + 1] == '%') {
4447 ++I; // Skip the second '%'.
4448 continue;
4449 }
4450
4451 // Scan past allowed prefix characters.
4452 size_t J =
4453 FormatStr.find_first_not_of("0123456789-+ #0$.*'hlLjztqwvI", I + 1);
4454 if (J == StringRef::npos)
4455 break;
4456
4457 Specifiers.set(static_cast<unsigned char>(FormatStr[J]));
4458 I = J; // Resume search from after the specifier.
4459 }
4460 return Specifiers;
4461}
4462
4463static bool isAspectNeeded(StringRef Aspect, CallInst *CI, unsigned FirstArgIdx,
4464 const std::optional<Bitset<256>> &Specifiers) {
4465 if (Aspect == "float") {
4466 if (Specifiers) {
4467 static constexpr Bitset<256> FloatSpecifiers{'f', 'F', 'e', 'E',
4468 'g', 'G', 'a', 'A'};
4469 return (*Specifiers & FloatSpecifiers).any();
4470 }
4471 // Fallback to type-based check for dynamic format string.
4472 return llvm::any_of(
4473 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4474 CI->arg_end()),
4475 [](Value *V) { return V->getType()->isFloatingPointTy(); });
4476 }
4477 if (Aspect == "fixed") {
4478 if (Specifiers) {
4479 static constexpr Bitset<256> FixedSpecifiers{'r', 'R', 'k', 'K'};
4480 return (*Specifiers & FixedSpecifiers).any();
4481 }
4482 // Fallback for fixed-point: assume needed if format is dynamic.
4483 return true;
4484 }
4485 // Unknown aspects are always considered to be needed.
4486 return true;
4487}
4488
4489static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M,
4490 IRBuilderBase &B) {
4491 SmallString<20> Name = ImplName;
4492 Name += '_';
4493 Name += Aspect;
4494 LLVMContext &Ctx = M->getContext();
4495 Function *RelocNoneFn =
4496 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4497 B.CreateCall(RelocNoneFn,
4498 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4499}
4500
4502 if (!CI->hasFnAttr("modular-format"))
4503 return nullptr;
4504
4506 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4507 if (Args.size() < 5)
4508 return nullptr;
4509
4510 StringRef FormatIdxStr = Args[1];
4511 StringRef FirstArgIdxStr = Args[2];
4512 StringRef FnName = Args[3];
4513 StringRef ImplName = Args[4];
4515
4516 unsigned FormatIdx;
4517 unsigned FirstArgIdx;
4518 [[maybe_unused]] bool Error;
4519 Error = FormatIdxStr.getAsInteger(10, FormatIdx);
4520 assert(!Error && "invalid format arg index");
4521 --FormatIdx; // 1-based to 0-based
4522
4523 Error = FirstArgIdxStr.getAsInteger(10, FirstArgIdx);
4524 assert(!Error && "invalid first arg index");
4525 if (FirstArgIdx == 0)
4526 return nullptr;
4527 --FirstArgIdx; // 1-based to 0-based
4528
4529 if (AllAspects.empty())
4530 return nullptr;
4531
4532 Value *FormatVal = CI->getArgOperand(FormatIdx);
4533 StringRef FormatStr;
4534
4535 std::optional<Bitset<256>> Specifiers;
4536 if (getConstantStringInfo(FormatVal, FormatStr))
4537 Specifiers = parseFormatStringSpecifiers(FormatStr);
4538
4539 SmallVector<StringRef> NeededAspects;
4540 for (StringRef Aspect : AllAspects)
4541 if (isAspectNeeded(Aspect, CI, FirstArgIdx, Specifiers))
4542 NeededAspects.push_back(Aspect);
4543
4544 if (NeededAspects.size() == AllAspects.size())
4545 return nullptr;
4546
4547 Module *M = CI->getModule();
4548 LLVMContext &Ctx = M->getContext();
4549 Function *Callee = CI->getCalledFunction();
4550 FunctionCallee ModularFn = M->getOrInsertFunction(
4551 FnName, Callee->getFunctionType(),
4552 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4553 CallInst *New = cast<CallInst>(CI->clone());
4554 New->setCalledFunction(ModularFn);
4555 New->removeFnAttr("modular-format");
4556 B.Insert(New);
4557
4558 llvm::sort(NeededAspects);
4559 for (StringRef Request : NeededAspects)
4560 referenceAspect(Request, ImplName, M, B);
4561
4562 return New;
4563}
4564
4565Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4566 if (!CI->getCalledFunction()) return nullptr;
4567
4568 // Skip optimizing notail and musttail calls so
4569 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4570 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4571 if (CI->isMustTailCall() || CI->isNoTailCall())
4572 return nullptr;
4573
4574 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4575 replaceInstUsesWith(*From, With);
4576 };
4577 auto InstCombineErase = [this](Instruction *I) {
4579 };
4580 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4581 InstCombineRAUW, InstCombineErase);
4582 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4583 ++NumSimplified;
4584 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4585 }
4586 if (Value *With = optimizeModularFormat(CI, Builder)) {
4587 ++NumSimplified;
4588 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4589 }
4590
4591 return nullptr;
4592}
4593
4595 // Strip off at most one level of pointer casts, looking for an alloca. This
4596 // is good enough in practice and simpler than handling any number of casts.
4597 Value *Underlying = TrampMem->stripPointerCasts();
4598 if (Underlying != TrampMem &&
4599 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4600 return nullptr;
4601 if (!isa<AllocaInst>(Underlying))
4602 return nullptr;
4603
4604 IntrinsicInst *InitTrampoline = nullptr;
4605 for (User *U : TrampMem->users()) {
4607 if (!II)
4608 return nullptr;
4609 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4610 if (InitTrampoline)
4611 // More than one init_trampoline writes to this value. Give up.
4612 return nullptr;
4613 InitTrampoline = II;
4614 continue;
4615 }
4616 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4617 // Allow any number of calls to adjust.trampoline.
4618 continue;
4619 return nullptr;
4620 }
4621
4622 // No call to init.trampoline found.
4623 if (!InitTrampoline)
4624 return nullptr;
4625
4626 // Check that the alloca is being used in the expected way.
4627 if (InitTrampoline->getOperand(0) != TrampMem)
4628 return nullptr;
4629
4630 return InitTrampoline;
4631}
4632
4634 Value *TrampMem) {
4635 // Visit all the previous instructions in the basic block, and try to find a
4636 // init.trampoline which has a direct path to the adjust.trampoline.
4637 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4638 E = AdjustTramp->getParent()->begin();
4639 I != E;) {
4640 Instruction *Inst = &*--I;
4642 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4643 II->getOperand(0) == TrampMem)
4644 return II;
4645 if (Inst->mayWriteToMemory())
4646 return nullptr;
4647 }
4648 return nullptr;
4649}
4650
4651// Given a call to llvm.adjust.trampoline, find and return the corresponding
4652// call to llvm.init.trampoline if the call to the trampoline can be optimized
4653// to a direct call to a function. Otherwise return NULL.
4655 Callee = Callee->stripPointerCasts();
4656 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4657 if (!AdjustTramp ||
4658 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4659 return nullptr;
4660
4661 Value *TrampMem = AdjustTramp->getOperand(0);
4662
4664 return IT;
4665 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4666 return IT;
4667 return nullptr;
4668}
4669
4670Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4671 const Value *Callee = Call.getCalledOperand();
4672 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4673 if (!IPC || !IPC->isNoopCast(DL))
4674 return nullptr;
4675
4676 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4677 if (!II)
4678 return nullptr;
4679
4680 Intrinsic::ID IIID = II->getIntrinsicID();
4681 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4682 return nullptr;
4683
4684 // Isolate the ptrauth bundle from the others.
4685 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4687 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4688 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4689 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4690 PtrAuthBundleOrNone = Bundle;
4691 else
4692 NewBundles.emplace_back(Bundle);
4693 }
4694
4695 if (!PtrAuthBundleOrNone)
4696 return nullptr;
4697
4698 Value *NewCallee = nullptr;
4699 switch (IIID) {
4700 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4701 // assuming the call bundle and the sign operands match.
4702 case Intrinsic::ptrauth_resign: {
4703 // Resign result key should match bundle.
4704 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4705 return nullptr;
4706 // Resign result discriminator should match bundle.
4707 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4708 return nullptr;
4709
4710 // Resign input (auth) key should also match: we can't change the key on
4711 // the new call we're generating, because we don't know what keys are valid.
4712 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4713 return nullptr;
4714
4715 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4716 NewBundles.emplace_back("ptrauth", NewBundleOps);
4717 NewCallee = II->getOperand(0);
4718 break;
4719 }
4720
4721 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4722 // assuming the call bundle and the sign operands match.
4723 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4724 case Intrinsic::ptrauth_sign: {
4725 // Sign key should match bundle.
4726 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4727 return nullptr;
4728 // Sign discriminator should match bundle.
4729 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4730 return nullptr;
4731 NewCallee = II->getOperand(0);
4732 break;
4733 }
4734 default:
4735 llvm_unreachable("unexpected intrinsic ID");
4736 }
4737
4738 if (!NewCallee)
4739 return nullptr;
4740
4741 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4742 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4743 NewCall->setCalledOperand(NewCallee);
4744 return NewCall;
4745}
4746
4747Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4749 if (!CPA)
4750 return nullptr;
4751
4752 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4753 // If the ptrauth constant isn't based on a function pointer, bail out.
4754 if (!CalleeF)
4755 return nullptr;
4756
4757 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4759 if (!PAB)
4760 return nullptr;
4761
4762 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4763 Value *Discriminator = PAB->Inputs[1];
4764
4765 // If the bundle doesn't match, this is probably going to fail to auth.
4766 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4767 return nullptr;
4768
4769 // If the bundle matches the constant, proceed in making this a direct call.
4771 NewCall->setCalledOperand(CalleeF);
4772 return NewCall;
4773}
4774
4775bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4776 const TargetLibraryInfo *TLI) {
4777 // Note: We only handle cases which can't be driven from generic attributes
4778 // here. So, for example, nonnull and noalias (which are common properties
4779 // of some allocation functions) are expected to be handled via annotation
4780 // of the respective allocator declaration with generic attributes.
4781 bool Changed = false;
4782
4783 if (!Call.getType()->isPointerTy())
4784 return Changed;
4785
4786 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4787 if (Size && *Size != 0) {
4788 // TODO: We really should just emit deref_or_null here and then
4789 // let the generic inference code combine that with nonnull.
4790 if (Call.hasRetAttr(Attribute::NonNull)) {
4791 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4793 Call.getContext(), Size->getLimitedValue()));
4794 } else {
4795 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4797 Call.getContext(), Size->getLimitedValue()));
4798 }
4799 }
4800
4801 // Add alignment attribute if alignment is a power of two constant.
4802 Value *Alignment = getAllocAlignment(&Call, TLI);
4803 if (!Alignment)
4804 return Changed;
4805
4806 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4807 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4808 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4809 if (llvm::isPowerOf2_64(AlignmentVal)) {
4810 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4811 Align NewAlign = Align(AlignmentVal);
4812 if (NewAlign > ExistingAlign) {
4815 Changed = true;
4816 }
4817 }
4818 }
4819 return Changed;
4820}
4821
4822/// Improvements for call, callbr and invoke instructions.
4823Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4824 bool Changed = annotateAnyAllocSite(Call, &TLI);
4825
4826 // Mark any parameters that are known to be non-null with the nonnull
4827 // attribute. This is helpful for inlining calls to functions with null
4828 // checks on their arguments.
4829 SmallVector<unsigned, 4> ArgNos;
4830 unsigned ArgNo = 0;
4831
4832 for (Value *V : Call.args()) {
4833 if (V->getType()->isPointerTy()) {
4834 // Simplify the nonnull operand if the parameter is known to be nonnull.
4835 // Otherwise, try to infer nonnull for it.
4836 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4837 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4838 (HasDereferenceable &&
4840 V->getType()->getPointerAddressSpace()))) {
4841 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4842 replaceOperand(Call, ArgNo, Res);
4843 Changed = true;
4844 }
4845 } else if (isKnownNonZero(V,
4846 getSimplifyQuery().getWithInstruction(&Call))) {
4847 ArgNos.push_back(ArgNo);
4848 }
4849 }
4850 ArgNo++;
4851 }
4852
4853 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4854
4855 if (!ArgNos.empty()) {
4856 AttributeList AS = Call.getAttributes();
4857 LLVMContext &Ctx = Call.getContext();
4858 AS = AS.addParamAttribute(Ctx, ArgNos,
4859 Attribute::get(Ctx, Attribute::NonNull));
4860 Call.setAttributes(AS);
4861 Changed = true;
4862 }
4863
4864 // If the callee is a pointer to a function, attempt to move any casts to the
4865 // arguments of the call/callbr/invoke.
4867 Function *CalleeF = dyn_cast<Function>(Callee);
4868 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4869 transformConstExprCastCall(Call))
4870 return nullptr;
4871
4872 if (CalleeF) {
4873 // Remove the convergent attr on calls when the callee is not convergent.
4874 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4875 !CalleeF->isIntrinsic()) {
4876 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4877 << "\n");
4879 return &Call;
4880 }
4881
4882 // If the call and callee calling conventions don't match, and neither one
4883 // of the calling conventions is compatible with C calling convention
4884 // this call must be unreachable, as the call is undefined.
4885 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4886 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4890 // Only do this for calls to a function with a body. A prototype may
4891 // not actually end up matching the implementation's calling conv for a
4892 // variety of reasons (e.g. it may be written in assembly).
4893 !CalleeF->isDeclaration()) {
4894 Instruction *OldCall = &Call;
4896 // If OldCall does not return void then replaceInstUsesWith poison.
4897 // This allows ValueHandlers and custom metadata to adjust itself.
4898 if (!OldCall->getType()->isVoidTy())
4899 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4900 if (isa<CallInst>(OldCall))
4901 return eraseInstFromFunction(*OldCall);
4902
4903 // We cannot remove an invoke or a callbr, because it would change thexi
4904 // CFG, just change the callee to a null pointer.
4905 cast<CallBase>(OldCall)->setCalledFunction(
4906 CalleeF->getFunctionType(),
4907 Constant::getNullValue(CalleeF->getType()));
4908 return nullptr;
4909 }
4910 }
4911
4912 // Calling a null function pointer is undefined if a null address isn't
4913 // dereferenceable.
4914 if ((isa<ConstantPointerNull>(Callee) &&
4916 isa<UndefValue>(Callee)) {
4917 // If Call does not return void then replaceInstUsesWith poison.
4918 // This allows ValueHandlers and custom metadata to adjust itself.
4919 if (!Call.getType()->isVoidTy())
4921
4922 if (Call.isTerminator()) {
4923 // Can't remove an invoke or callbr because we cannot change the CFG.
4924 return nullptr;
4925 }
4926
4927 // This instruction is not reachable, just remove it.
4930 }
4931
4932 if (IntrinsicInst *II = findInitTrampoline(Callee))
4933 return transformCallThroughTrampoline(Call, *II);
4934
4935 // Combine calls involving pointer authentication intrinsics.
4936 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4937 return NewCall;
4938
4939 // Combine calls to ptrauth constants.
4940 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4941 return NewCall;
4942
4943 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4944 InlineAsm *IA = cast<InlineAsm>(Callee);
4945 if (!IA->canThrow()) {
4946 // Normal inline asm calls cannot throw - mark them
4947 // 'nounwind'.
4949 Changed = true;
4950 }
4951 }
4952
4953 // Try to optimize the call if possible, we require DataLayout for most of
4954 // this. None of these calls are seen as possibly dead so go ahead and
4955 // delete the instruction now.
4956 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4957 Instruction *I = tryOptimizeCall(CI);
4958 // If we changed something return the result, etc. Otherwise let
4959 // the fallthrough check.
4960 if (I) return eraseInstFromFunction(*I);
4961 }
4962
4963 if (!Call.use_empty() && !Call.isMustTailCall())
4964 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4965 Type *CallTy = Call.getType();
4966 Type *RetArgTy = ReturnedArg->getType();
4967 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4968 return replaceInstUsesWith(
4969 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4970 }
4971
4972 // Drop unnecessary callee_type metadata from calls that were converted
4973 // into direct calls.
4974 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4975 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4976 Changed = true;
4977 }
4978
4979 // Drop unnecessary kcfi operand bundles from calls that were converted
4980 // into direct calls.
4982 if (Bundle && !Call.isIndirectCall()) {
4983 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4984 if (CalleeF) {
4985 ConstantInt *FunctionType = nullptr;
4986 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4987
4988 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4989 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4990
4991 if (FunctionType &&
4992 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4993 dbgs() << Call.getModule()->getName()
4994 << ": warning: kcfi: " << Call.getCaller()->getName()
4995 << ": call to " << CalleeF->getName()
4996 << " using a mismatching function pointer type\n";
4997 }
4998 });
4999
5001 }
5002
5003 if (isRemovableAlloc(&Call, &TLI))
5004 return visitAllocSite(Call);
5005
5006 // Handle intrinsics which can be used in both call and invoke context.
5007 switch (Call.getIntrinsicID()) {
5008 case Intrinsic::experimental_gc_statepoint: {
5009 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
5010 SmallPtrSet<Value *, 32> LiveGcValues;
5011 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5012 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5013
5014 // Remove the relocation if unused.
5015 if (GCR.use_empty()) {
5017 continue;
5018 }
5019
5020 Value *DerivedPtr = GCR.getDerivedPtr();
5021 Value *BasePtr = GCR.getBasePtr();
5022
5023 // Undef is undef, even after relocation.
5024 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
5027 continue;
5028 }
5029
5030 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
5031 // The relocation of null will be null for most any collector.
5032 // TODO: provide a hook for this in GCStrategy. There might be some
5033 // weird collector this property does not hold for.
5034 if (isa<ConstantPointerNull>(DerivedPtr)) {
5035 // Use null-pointer of gc_relocate's type to replace it.
5038 continue;
5039 }
5040
5041 // isKnownNonNull -> nonnull attribute
5042 if (!GCR.hasRetAttr(Attribute::NonNull) &&
5043 isKnownNonZero(DerivedPtr,
5044 getSimplifyQuery().getWithInstruction(&Call))) {
5045 GCR.addRetAttr(Attribute::NonNull);
5046 // We discovered new fact, re-check users.
5047 Worklist.pushUsersToWorkList(GCR);
5048 }
5049 }
5050
5051 // If we have two copies of the same pointer in the statepoint argument
5052 // list, canonicalize to one. This may let us common gc.relocates.
5053 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
5054 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
5055 auto *OpIntTy = GCR.getOperand(2)->getType();
5056 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
5057 }
5058
5059 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
5060 // Canonicalize on the type from the uses to the defs
5061
5062 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
5063 LiveGcValues.insert(BasePtr);
5064 LiveGcValues.insert(DerivedPtr);
5065 }
5066 std::optional<OperandBundleUse> Bundle =
5068 unsigned NumOfGCLives = LiveGcValues.size();
5069 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
5070 break;
5071 // We can reduce the size of gc live bundle.
5072 DenseMap<Value *, unsigned> Val2Idx;
5073 std::vector<Value *> NewLiveGc;
5074 for (Value *V : Bundle->Inputs) {
5075 auto [It, Inserted] = Val2Idx.try_emplace(V);
5076 if (!Inserted)
5077 continue;
5078 if (LiveGcValues.count(V)) {
5079 It->second = NewLiveGc.size();
5080 NewLiveGc.push_back(V);
5081 } else
5082 It->second = NumOfGCLives;
5083 }
5084 // Update all gc.relocates
5085 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5086 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5087 Value *BasePtr = GCR.getBasePtr();
5088 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
5089 "Missed live gc for base pointer");
5090 auto *OpIntTy1 = GCR.getOperand(1)->getType();
5091 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
5092 Value *DerivedPtr = GCR.getDerivedPtr();
5093 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
5094 "Missed live gc for derived pointer");
5095 auto *OpIntTy2 = GCR.getOperand(2)->getType();
5096 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
5097 }
5098 // Create new statepoint instruction.
5099 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
5100 return CallBase::Create(&Call, NewBundle);
5101 }
5102 default: { break; }
5103 }
5104
5105 return Changed ? &Call : nullptr;
5106}
5107
5108/// If the callee is a constexpr cast of a function, attempt to move the cast to
5109/// the arguments of the call/invoke.
5110/// CallBrInst is not supported.
5111bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5112 auto *Callee =
5114 if (!Callee)
5115 return false;
5116
5118 "CallBr's don't have a single point after a def to insert at");
5119
5120 // Don't perform the transform for declarations, which may not be fully
5121 // accurate. For example, void @foo() is commonly used as a placeholder for
5122 // unknown prototypes.
5123 if (Callee->isDeclaration())
5124 return false;
5125
5126 // If this is a call to a thunk function, don't remove the cast. Thunks are
5127 // used to transparently forward all incoming parameters and outgoing return
5128 // values, so it's important to leave the cast in place.
5129 if (Callee->hasFnAttribute("thunk"))
5130 return false;
5131
5132 // If this is a call to a naked function, the assembly might be
5133 // using an argument, or otherwise rely on the frame layout,
5134 // the function prototype will mismatch.
5135 if (Callee->hasFnAttribute(Attribute::Naked))
5136 return false;
5137
5138 // If this is a musttail call, the callee's prototype must match the caller's
5139 // prototype with the exception of pointee types. The code below doesn't
5140 // implement that, so we can't do this transform.
5141 // TODO: Do the transform if it only requires adding pointer casts.
5142 if (Call.isMustTailCall())
5143 return false;
5144
5146 const AttributeList &CallerPAL = Call.getAttributes();
5147
5148 // Okay, this is a cast from a function to a different type. Unless doing so
5149 // would cause a type conversion of one of our arguments, change this call to
5150 // be a direct call with arguments casted to the appropriate types.
5151 FunctionType *FT = Callee->getFunctionType();
5152 Type *OldRetTy = Caller->getType();
5153 Type *NewRetTy = FT->getReturnType();
5154
5155 // Check to see if we are changing the return type...
5156 if (OldRetTy != NewRetTy) {
5157
5158 if (NewRetTy->isStructTy())
5159 return false; // TODO: Handle multiple return values.
5160
5161 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5162 if (!Caller->use_empty())
5163 return false; // Cannot transform this return value.
5164 }
5165
5166 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5167 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5168 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5169 NewRetTy, CallerPAL.getRetAttrs())))
5170 return false; // Attribute not compatible with transformed value.
5171 }
5172
5173 // If the callbase is an invoke instruction, and the return value is
5174 // used by a PHI node in a successor, we cannot change the return type of
5175 // the call because there is no place to put the cast instruction (without
5176 // breaking the critical edge). Bail out in this case.
5177 if (!Caller->use_empty()) {
5178 BasicBlock *PhisNotSupportedBlock = nullptr;
5179 if (auto *II = dyn_cast<InvokeInst>(Caller))
5180 PhisNotSupportedBlock = II->getNormalDest();
5181 if (PhisNotSupportedBlock)
5182 for (User *U : Caller->users())
5183 if (PHINode *PN = dyn_cast<PHINode>(U))
5184 if (PN->getParent() == PhisNotSupportedBlock)
5185 return false;
5186 }
5187 }
5188
5189 unsigned NumActualArgs = Call.arg_size();
5190 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5191
5192 // Prevent us turning:
5193 // declare void @takes_i32_inalloca(i32* inalloca)
5194 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5195 //
5196 // into:
5197 // call void @takes_i32_inalloca(i32* null)
5198 //
5199 // Similarly, avoid folding away bitcasts of byval calls.
5200 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5201 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5202 return false;
5203
5204 auto AI = Call.arg_begin();
5205 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5206 Type *ParamTy = FT->getParamType(i);
5207 Type *ActTy = (*AI)->getType();
5208
5209 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5210 return false; // Cannot transform this parameter value.
5211
5212 // Check if there are any incompatible attributes we cannot drop safely.
5213 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5214 .overlaps(AttributeFuncs::typeIncompatible(
5215 ParamTy, CallerPAL.getParamAttrs(i),
5216 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5217 return false; // Attribute not compatible with transformed value.
5218
5219 if (Call.isInAllocaArgument(i) ||
5220 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5221 return false; // Cannot transform to and from inalloca/preallocated.
5222
5223 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5224 return false;
5225
5226 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5227 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5228 return false; // Cannot transform to or from byval.
5229 }
5230
5231 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5232 !CallerPAL.isEmpty()) {
5233 // In this case we have more arguments than the new function type, but we
5234 // won't be dropping them. Check that these extra arguments have attributes
5235 // that are compatible with being a vararg call argument.
5236 unsigned SRetIdx;
5237 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5238 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5239 return false;
5240 }
5241
5242 // Okay, we decided that this is a safe thing to do: go ahead and start
5243 // inserting cast instructions as necessary.
5244 SmallVector<Value *, 8> Args;
5246 Args.reserve(NumActualArgs);
5247 ArgAttrs.reserve(NumActualArgs);
5248
5249 // Get any return attributes.
5250 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5251
5252 // If the return value is not being used, the type may not be compatible
5253 // with the existing attributes. Wipe out any problematic attributes.
5254 RAttrs.remove(
5255 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5256
5257 LLVMContext &Ctx = Call.getContext();
5258 AI = Call.arg_begin();
5259 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5260 Type *ParamTy = FT->getParamType(i);
5261
5262 Value *NewArg = *AI;
5263 if ((*AI)->getType() != ParamTy)
5264 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5265 Args.push_back(NewArg);
5266
5267 // Add any parameter attributes except the ones incompatible with the new
5268 // type. Note that we made sure all incompatible ones are safe to drop.
5269 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5270 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5271 ArgAttrs.push_back(
5272 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5273 }
5274
5275 // If the function takes more arguments than the call was taking, add them
5276 // now.
5277 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5278 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5279 ArgAttrs.push_back(AttributeSet());
5280 }
5281
5282 // If we are removing arguments to the function, emit an obnoxious warning.
5283 if (FT->getNumParams() < NumActualArgs) {
5284 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5285 if (FT->isVarArg()) {
5286 // Add all of the arguments in their promoted form to the arg list.
5287 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5288 Type *PTy = getPromotedType((*AI)->getType());
5289 Value *NewArg = *AI;
5290 if (PTy != (*AI)->getType()) {
5291 // Must promote to pass through va_arg area!
5292 Instruction::CastOps opcode =
5293 CastInst::getCastOpcode(*AI, false, PTy, false);
5294 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5295 }
5296 Args.push_back(NewArg);
5297
5298 // Add any parameter attributes.
5299 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5300 }
5301 }
5302 }
5303
5304 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5305
5306 if (NewRetTy->isVoidTy())
5307 Caller->setName(""); // Void type should not have a name.
5308
5309 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5310 "missing argument attributes");
5311 AttributeList NewCallerPAL = AttributeList::get(
5312 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5313
5315 Call.getOperandBundlesAsDefs(OpBundles);
5316
5317 CallBase *NewCall;
5318 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5319 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5320 II->getUnwindDest(), Args, OpBundles);
5321 } else {
5322 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5323 cast<CallInst>(NewCall)->setTailCallKind(
5324 cast<CallInst>(Caller)->getTailCallKind());
5325 }
5326 NewCall->takeName(Caller);
5328 NewCall->setAttributes(NewCallerPAL);
5329
5330 // Preserve prof metadata if any.
5331 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5332
5333 // Insert a cast of the return type as necessary.
5334 Instruction *NC = NewCall;
5335 Value *NV = NC;
5336 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5337 assert(!NV->getType()->isVoidTy());
5339 NC->setDebugLoc(Caller->getDebugLoc());
5340
5341 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5342 assert(OptInsertPt && "No place to insert cast");
5343 InsertNewInstBefore(NC, *OptInsertPt);
5344 Worklist.pushUsersToWorkList(*Caller);
5345 }
5346
5347 if (!Caller->use_empty())
5348 replaceInstUsesWith(*Caller, NV);
5349 else if (Caller->hasValueHandle()) {
5350 if (OldRetTy == NV->getType())
5352 else
5353 // We cannot call ValueIsRAUWd with a different type, and the
5354 // actual tracked value will disappear.
5356 }
5357
5358 eraseInstFromFunction(*Caller);
5359 return true;
5360}
5361
5362/// Turn a call to a function created by init_trampoline / adjust_trampoline
5363/// intrinsic pair into a direct call to the underlying function.
5365InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5366 IntrinsicInst &Tramp) {
5367 FunctionType *FTy = Call.getFunctionType();
5368 AttributeList Attrs = Call.getAttributes();
5369
5370 // If the call already has the 'nest' attribute somewhere then give up -
5371 // otherwise 'nest' would occur twice after splicing in the chain.
5372 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5373 return nullptr;
5374
5376 FunctionType *NestFTy = NestF->getFunctionType();
5377
5378 AttributeList NestAttrs = NestF->getAttributes();
5379 if (!NestAttrs.isEmpty()) {
5380 unsigned NestArgNo = 0;
5381 Type *NestTy = nullptr;
5382 AttributeSet NestAttr;
5383
5384 // Look for a parameter marked with the 'nest' attribute.
5385 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5386 E = NestFTy->param_end();
5387 I != E; ++NestArgNo, ++I) {
5388 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5389 if (AS.hasAttribute(Attribute::Nest)) {
5390 // Record the parameter type and any other attributes.
5391 NestTy = *I;
5392 NestAttr = AS;
5393 break;
5394 }
5395 }
5396
5397 if (NestTy) {
5398 std::vector<Value*> NewArgs;
5399 std::vector<AttributeSet> NewArgAttrs;
5400 NewArgs.reserve(Call.arg_size() + 1);
5401 NewArgAttrs.reserve(Call.arg_size());
5402
5403 // Insert the nest argument into the call argument list, which may
5404 // mean appending it. Likewise for attributes.
5405
5406 {
5407 unsigned ArgNo = 0;
5408 auto I = Call.arg_begin(), E = Call.arg_end();
5409 do {
5410 if (ArgNo == NestArgNo) {
5411 // Add the chain argument and attributes.
5412 Value *NestVal = Tramp.getArgOperand(2);
5413 if (NestVal->getType() != NestTy)
5414 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5415 NewArgs.push_back(NestVal);
5416 NewArgAttrs.push_back(NestAttr);
5417 }
5418
5419 if (I == E)
5420 break;
5421
5422 // Add the original argument and attributes.
5423 NewArgs.push_back(*I);
5424 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5425
5426 ++ArgNo;
5427 ++I;
5428 } while (true);
5429 }
5430
5431 // The trampoline may have been bitcast to a bogus type (FTy).
5432 // Handle this by synthesizing a new function type, equal to FTy
5433 // with the chain parameter inserted.
5434
5435 std::vector<Type*> NewTypes;
5436 NewTypes.reserve(FTy->getNumParams()+1);
5437
5438 // Insert the chain's type into the list of parameter types, which may
5439 // mean appending it.
5440 {
5441 unsigned ArgNo = 0;
5442 FunctionType::param_iterator I = FTy->param_begin(),
5443 E = FTy->param_end();
5444
5445 do {
5446 if (ArgNo == NestArgNo)
5447 // Add the chain's type.
5448 NewTypes.push_back(NestTy);
5449
5450 if (I == E)
5451 break;
5452
5453 // Add the original type.
5454 NewTypes.push_back(*I);
5455
5456 ++ArgNo;
5457 ++I;
5458 } while (true);
5459 }
5460
5461 // Replace the trampoline call with a direct call. Let the generic
5462 // code sort out any function type mismatches.
5463 FunctionType *NewFTy =
5464 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5465 AttributeList NewPAL =
5466 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5467 Attrs.getRetAttrs(), NewArgAttrs);
5468
5470 Call.getOperandBundlesAsDefs(OpBundles);
5471
5472 Instruction *NewCaller;
5473 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5474 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5475 II->getUnwindDest(), NewArgs, OpBundles);
5476 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5477 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5478 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5479 NewCaller =
5480 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5481 CBI->getIndirectDests(), NewArgs, OpBundles);
5482 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5483 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5484 } else {
5485 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5486 cast<CallInst>(NewCaller)->setTailCallKind(
5487 cast<CallInst>(Call).getTailCallKind());
5488 cast<CallInst>(NewCaller)->setCallingConv(
5489 cast<CallInst>(Call).getCallingConv());
5490 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5491 }
5492 NewCaller->setDebugLoc(Call.getDebugLoc());
5493
5494 return NewCaller;
5495 }
5496 }
5497
5498 // Replace the trampoline call with a direct call. Since there is no 'nest'
5499 // parameter, there is no need to adjust the argument list. Let the generic
5500 // code sort out any function type mismatches.
5501 Call.setCalledFunction(FTy, NestF);
5502 return &Call;
5503}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M, IRBuilderBase &B)
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static Bitset< 256 > parseFormatStringSpecifiers(StringRef FormatStr)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static bool isAspectNeeded(StringRef Aspect, CallInst *CI, unsigned FirstArgIdx, const std::optional< Bitset< 256 > > &Specifiers)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
bool isNegative() const
Definition APFloat.h:1538
void clearSign()
Definition APFloat.h:1357
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isZero() const
Definition APFloat.h:1534
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1983
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1963
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1970
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2071
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
This is a constexpr reimplementation of a subset of std::bitset.
Definition Bitset.h:30
constexpr bool any() const
Definition Bitset.h:113
constexpr Bitset & set()
Definition Bitset.h:81
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2130
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2659
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2494
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2257
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1259
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1312
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:798
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI AssumeSeparateStorageInfo getAssumeSeparateStorageInfo(OperandBundleUse)
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1740
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1695
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1726
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1676
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI AssumeNonNullInfo getAssumeNonNullInfo(OperandBundleUse)
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const SimplifyQuery &Q, bool IgnoreFree=false)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:245
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
LLVM_ABI AssumeAlignInfo getAssumeAlignInfo(OperandBundleUse)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:265
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1713
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1753
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI AssumeDereferenceableInfo getAssumeDereferenceableInfo(OperandBundleUse)
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI AssumeNoUndefInfo getAssumeNoUndefInfo(OperandBundleUse)
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const