LLVM 22.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Statepoint.h"
55#include "llvm/IR/Type.h"
56#include "llvm/IR/User.h"
57#include "llvm/IR/Value.h"
58#include "llvm/IR/ValueHandle.h"
63#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstdint>
77#include <optional>
78#include <utility>
79#include <vector>
80
81#define DEBUG_TYPE "instcombine"
83
84using namespace llvm;
85using namespace PatternMatch;
86
87STATISTIC(NumSimplified, "Number of library calls simplified");
88
90 "instcombine-guard-widening-window",
91 cl::init(3),
92 cl::desc("How wide an instruction window to bypass looking for "
93 "another guard"));
94
95/// Return the specified type promoted as it would be to pass though a va_arg
96/// area.
98 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
99 if (ITy->getBitWidth() < 32)
100 return Type::getInt32Ty(Ty->getContext());
101 }
102 return Ty;
103}
104
105/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
106/// TODO: This should probably be integrated with visitAllocSites, but that
107/// requires a deeper change to allow either unread or unwritten objects.
109 auto *Src = MI->getRawSource();
110 while (isa<GetElementPtrInst>(Src)) {
111 if (!Src->hasOneUse())
112 return false;
113 Src = cast<Instruction>(Src)->getOperand(0);
114 }
115 return isa<AllocaInst>(Src) && Src->hasOneUse();
116}
117
119 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
120 MaybeAlign CopyDstAlign = MI->getDestAlign();
121 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
122 MI->setDestAlignment(DstAlign);
123 return MI;
124 }
125
126 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
127 MaybeAlign CopySrcAlign = MI->getSourceAlign();
128 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
129 MI->setSourceAlignment(SrcAlign);
130 return MI;
131 }
132
133 // If we have a store to a location which is known constant, we can conclude
134 // that the store must be storing the constant value (else the memory
135 // wouldn't be constant), and this must be a noop.
136 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
137 // Set the size of the copy to 0, it will be deleted on the next iteration.
138 MI->setLength((uint64_t)0);
139 return MI;
140 }
141
142 // If the source is provably undef, the memcpy/memmove doesn't do anything
143 // (unless the transfer is volatile).
144 if (hasUndefSource(MI) && !MI->isVolatile()) {
145 // Set the size of the copy to 0, it will be deleted on the next iteration.
146 MI->setLength((uint64_t)0);
147 return MI;
148 }
149
150 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
151 // load/store.
152 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
153 if (!MemOpLength) return nullptr;
154
155 // Source and destination pointer types are always "i8*" for intrinsic. See
156 // if the size is something we can handle with a single primitive load/store.
157 // A single load+store correctly handles overlapping memory in the memmove
158 // case.
159 uint64_t Size = MemOpLength->getLimitedValue();
160 assert(Size && "0-sized memory transferring should be removed already.");
161
162 if (Size > 8 || (Size&(Size-1)))
163 return nullptr; // If not 1/2/4/8 bytes, exit.
164
165 // If it is an atomic and alignment is less than the size then we will
166 // introduce the unaligned memory access which will be later transformed
167 // into libcall in CodeGen. This is not evident performance gain so disable
168 // it now.
169 if (MI->isAtomic())
170 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
171 return nullptr;
172
173 // Use an integer load+store unless we can find something better.
174 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
175
176 // If the memcpy has metadata describing the members, see if we can get the
177 // TBAA, scope and noalias tags describing our copy.
178 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
179
180 Value *Src = MI->getArgOperand(1);
181 Value *Dest = MI->getArgOperand(0);
182 LoadInst *L = Builder.CreateLoad(IntType, Src);
183 // Alignment from the mem intrinsic will be better, so use it.
184 L->setAlignment(*CopySrcAlign);
185 L->setAAMetadata(AACopyMD);
186 MDNode *LoopMemParallelMD =
187 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
188 if (LoopMemParallelMD)
189 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
190 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
191 if (AccessGroupMD)
192 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
193
194 StoreInst *S = Builder.CreateStore(L, Dest);
195 // Alignment from the mem intrinsic will be better, so use it.
196 S->setAlignment(*CopyDstAlign);
197 S->setAAMetadata(AACopyMD);
198 if (LoopMemParallelMD)
199 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
200 if (AccessGroupMD)
201 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
202 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
203
204 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
205 // non-atomics can be volatile
206 L->setVolatile(MT->isVolatile());
207 S->setVolatile(MT->isVolatile());
208 }
209 if (MI->isAtomic()) {
210 // atomics have to be unordered
211 L->setOrdering(AtomicOrdering::Unordered);
213 }
214
215 // Set the size of the copy to 0, it will be deleted on the next iteration.
216 MI->setLength((uint64_t)0);
217 return MI;
218}
219
221 const Align KnownAlignment =
222 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
223 MaybeAlign MemSetAlign = MI->getDestAlign();
224 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
225 MI->setDestAlignment(KnownAlignment);
226 return MI;
227 }
228
229 // If we have a store to a location which is known constant, we can conclude
230 // that the store must be storing the constant value (else the memory
231 // wouldn't be constant), and this must be a noop.
232 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
233 // Set the size of the copy to 0, it will be deleted on the next iteration.
234 MI->setLength((uint64_t)0);
235 return MI;
236 }
237
238 // Remove memset with an undef value.
239 // FIXME: This is technically incorrect because it might overwrite a poison
240 // value. Change to PoisonValue once #52930 is resolved.
241 if (isa<UndefValue>(MI->getValue())) {
242 // Set the size of the copy to 0, it will be deleted on the next iteration.
243 MI->setLength((uint64_t)0);
244 return MI;
245 }
246
247 // Extract the length and alignment and fill if they are constant.
248 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
249 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
250 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
251 return nullptr;
252 const uint64_t Len = LenC->getLimitedValue();
253 assert(Len && "0-sized memory setting should be removed already.");
254 const Align Alignment = MI->getDestAlign().valueOrOne();
255
256 // If it is an atomic and alignment is less than the size then we will
257 // introduce the unaligned memory access which will be later transformed
258 // into libcall in CodeGen. This is not evident performance gain so disable
259 // it now.
260 if (MI->isAtomic() && Alignment < Len)
261 return nullptr;
262
263 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
264 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
265 Value *Dest = MI->getDest();
266
267 // Extract the fill value and store.
268 Constant *FillVal = ConstantInt::get(
269 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
270 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
271 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
272 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
273 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275 }
276
277 S->setAlignment(Alignment);
278 if (MI->isAtomic())
280
281 // Set the size of the copy to 0, it will be deleted on the next iteration.
282 MI->setLength((uint64_t)0);
283 return MI;
284 }
285
286 return nullptr;
287}
288
289// TODO, Obvious Missing Transforms:
290// * Narrow width by halfs excluding zero/undef lanes
291Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
292 Value *LoadPtr = II.getArgOperand(0);
293 const Align Alignment = II.getParamAlign(0).valueOrOne();
294
295 // If the mask is all ones or undefs, this is a plain vector load of the 1st
296 // argument.
297 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
298 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
299 "unmaskedload");
300 L->copyMetadata(II);
301 return L;
302 }
303
304 // If we can unconditionally load from this address, replace with a
305 // load/select idiom. TODO: use DT for context sensitive query
306 if (isDereferenceablePointer(LoadPtr, II.getType(),
307 II.getDataLayout(), &II, &AC)) {
308 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
309 "unmaskedload");
310 LI->copyMetadata(II);
311 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
312 }
313
314 return nullptr;
315}
316
317// TODO, Obvious Missing Transforms:
318// * Single constant active lane -> store
319// * Narrow width by halfs excluding zero/undef lanes
320Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
321 Value *StorePtr = II.getArgOperand(1);
322 Align Alignment = II.getParamAlign(1).valueOrOne();
323 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
324 if (!ConstMask)
325 return nullptr;
326
327 // If the mask is all zeros, this instruction does nothing.
328 if (maskIsAllZeroOrUndef(ConstMask))
330
331 // If the mask is all ones, this is a plain vector store of the 1st argument.
332 if (maskIsAllOneOrUndef(ConstMask)) {
333 StoreInst *S =
334 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
335 S->copyMetadata(II);
336 return S;
337 }
338
339 if (isa<ScalableVectorType>(ConstMask->getType()))
340 return nullptr;
341
342 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
343 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
344 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
345 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
346 PoisonElts))
347 return replaceOperand(II, 0, V);
348
349 return nullptr;
350}
351
352// TODO, Obvious Missing Transforms:
353// * Single constant active lane load -> load
354// * Dereferenceable address & few lanes -> scalarize speculative load/selects
355// * Adjacent vector addresses -> masked.load
356// * Narrow width by halfs excluding zero/undef lanes
357// * Vector incrementing address -> vector masked load
358Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
359 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
360 if (!ConstMask)
361 return nullptr;
362
363 // Vector splat address w/known mask -> scalar load
364 // Fold the gather to load the source vector first lane
365 // because it is reloading the same value each time
366 if (ConstMask->isAllOnesValue())
367 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
368 auto *VecTy = cast<VectorType>(II.getType());
369 const Align Alignment = II.getParamAlign(0).valueOrOne();
370 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
371 Alignment, "load.scalar");
372 Value *Shuf =
373 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
375 }
376
377 return nullptr;
378}
379
380// TODO, Obvious Missing Transforms:
381// * Single constant active lane -> store
382// * Adjacent vector addresses -> masked.store
383// * Narrow store width by halfs excluding zero/undef lanes
384// * Vector incrementing address -> vector masked store
385Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
386 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
387 if (!ConstMask)
388 return nullptr;
389
390 // If the mask is all zeros, a scatter does nothing.
391 if (maskIsAllZeroOrUndef(ConstMask))
393
394 // Vector splat address -> scalar store
395 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
396 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
397 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
398 if (maskContainsAllOneOrUndef(ConstMask)) {
399 Align Alignment = II.getParamAlign(1).valueOrOne();
400 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
401 Alignment);
402 S->copyMetadata(II);
403 return S;
404 }
405 }
406 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
407 // lastlane), ptr
408 if (ConstMask->isAllOnesValue()) {
409 Align Alignment = II.getParamAlign(1).valueOrOne();
410 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
411 ElementCount VF = WideLoadTy->getElementCount();
412 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
413 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
414 Value *Extract =
415 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
416 StoreInst *S =
417 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
418 S->copyMetadata(II);
419 return S;
420 }
421 }
422 if (isa<ScalableVectorType>(ConstMask->getType()))
423 return nullptr;
424
425 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
426 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
427 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
428 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
429 PoisonElts))
430 return replaceOperand(II, 0, V);
431 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
432 PoisonElts))
433 return replaceOperand(II, 1, V);
434
435 return nullptr;
436}
437
438/// This function transforms launder.invariant.group and strip.invariant.group
439/// like:
440/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
441/// launder(strip(%x)) -> launder(%x)
442/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
443/// strip(launder(%x)) -> strip(%x)
444/// This is legal because it preserves the most recent information about
445/// the presence or absence of invariant.group.
447 InstCombinerImpl &IC) {
448 auto *Arg = II.getArgOperand(0);
449 auto *StrippedArg = Arg->stripPointerCasts();
450 auto *StrippedInvariantGroupsArg = StrippedArg;
451 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
452 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
453 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
454 break;
455 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
456 }
457 if (StrippedArg == StrippedInvariantGroupsArg)
458 return nullptr; // No launders/strips to remove.
459
460 Value *Result = nullptr;
461
462 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
463 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
464 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
465 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
466 else
468 "simplifyInvariantGroupIntrinsic only handles launder and strip");
469 if (Result->getType()->getPointerAddressSpace() !=
470 II.getType()->getPointerAddressSpace())
471 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
472
473 return cast<Instruction>(Result);
474}
475
477 assert((II.getIntrinsicID() == Intrinsic::cttz ||
478 II.getIntrinsicID() == Intrinsic::ctlz) &&
479 "Expected cttz or ctlz intrinsic");
480 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
481 Value *Op0 = II.getArgOperand(0);
482 Value *Op1 = II.getArgOperand(1);
483 Value *X;
484 // ctlz(bitreverse(x)) -> cttz(x)
485 // cttz(bitreverse(x)) -> ctlz(x)
486 if (match(Op0, m_BitReverse(m_Value(X)))) {
487 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
488 Function *F =
489 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
490 return CallInst::Create(F, {X, II.getArgOperand(1)});
491 }
492
493 if (II.getType()->isIntOrIntVectorTy(1)) {
494 // ctlz/cttz i1 Op0 --> not Op0
495 if (match(Op1, m_Zero()))
496 return BinaryOperator::CreateNot(Op0);
497 // If zero is poison, then the input can be assumed to be "true", so the
498 // instruction simplifies to "false".
499 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
500 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
501 }
502
503 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
504 if (II.hasOneUse() && match(Op1, m_Zero()) &&
505 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
506 II.dropUBImplyingAttrsAndMetadata();
507 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
508 }
509
510 Constant *C;
511
512 if (IsTZ) {
513 // cttz(-x) -> cttz(x)
514 if (match(Op0, m_Neg(m_Value(X))))
515 return IC.replaceOperand(II, 0, X);
516
517 // cttz(-x & x) -> cttz(x)
518 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
519 return IC.replaceOperand(II, 0, X);
520
521 // cttz(sext(x)) -> cttz(zext(x))
522 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
523 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
524 auto *CttzZext =
525 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
526 return IC.replaceInstUsesWith(II, CttzZext);
527 }
528
529 // Zext doesn't change the number of trailing zeros, so narrow:
530 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
531 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
532 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
533 IC.Builder.getTrue());
534 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
535 return IC.replaceInstUsesWith(II, ZextCttz);
536 }
537
538 // cttz(abs(x)) -> cttz(x)
539 // cttz(nabs(x)) -> cttz(x)
540 Value *Y;
542 if (SPF == SPF_ABS || SPF == SPF_NABS)
543 return IC.replaceOperand(II, 0, X);
544
546 return IC.replaceOperand(II, 0, X);
547
548 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
549 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
550 match(Op1, m_One())) {
551 Value *ConstCttz =
552 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
553 return BinaryOperator::CreateAdd(ConstCttz, X);
554 }
555
556 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
557 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
558 match(Op1, m_One())) {
559 Value *ConstCttz =
560 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
561 return BinaryOperator::CreateSub(ConstCttz, X);
562 }
563
564 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
565 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
566 Value *Width =
567 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
568 return BinaryOperator::CreateSub(Width, X);
569 }
570 } else {
571 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
572 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
573 match(Op1, m_One())) {
574 Value *ConstCtlz =
575 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
576 return BinaryOperator::CreateAdd(ConstCtlz, X);
577 }
578
579 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
580 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
581 match(Op1, m_One())) {
582 Value *ConstCtlz =
583 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
584 return BinaryOperator::CreateSub(ConstCtlz, X);
585 }
586
587 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
588 if (Op0->hasOneUse() &&
589 match(Op0,
591 Type *Ty = II.getType();
592 unsigned BitWidth = Ty->getScalarSizeInBits();
593 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
594 {X, IC.Builder.getFalse()});
595 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
596 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
597 }
598 }
599
600 // cttz(Pow2) -> Log2(Pow2)
601 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
602 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
603 if (IsTZ)
604 return IC.replaceInstUsesWith(II, R);
605 BinaryOperator *BO = BinaryOperator::CreateSub(
606 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
607 R);
608 BO->setHasNoSignedWrap();
610 return BO;
611 }
612
613 KnownBits Known = IC.computeKnownBits(Op0, &II);
614
615 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
616 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
617 : Known.countMaxLeadingZeros();
618 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
619 : Known.countMinLeadingZeros();
620
621 // If all bits above (ctlz) or below (cttz) the first known one are known
622 // zero, this value is constant.
623 // FIXME: This should be in InstSimplify because we're replacing an
624 // instruction with a constant.
625 if (PossibleZeros == DefiniteZeros) {
626 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
627 return IC.replaceInstUsesWith(II, C);
628 }
629
630 // If the input to cttz/ctlz is known to be non-zero,
631 // then change the 'ZeroIsPoison' parameter to 'true'
632 // because we know the zero behavior can't affect the result.
633 if (!Known.One.isZero() ||
635 if (!match(II.getArgOperand(1), m_One()))
636 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
637 }
638
639 // Add range attribute since known bits can't completely reflect what we know.
640 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
641 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
642 !II.getMetadata(LLVMContext::MD_range)) {
643 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
644 APInt(BitWidth, PossibleZeros + 1));
645 II.addRangeRetAttr(Range);
646 return &II;
647 }
648
649 return nullptr;
650}
651
653 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
654 "Expected ctpop intrinsic");
655 Type *Ty = II.getType();
656 unsigned BitWidth = Ty->getScalarSizeInBits();
657 Value *Op0 = II.getArgOperand(0);
658 Value *X, *Y;
659
660 // ctpop(bitreverse(x)) -> ctpop(x)
661 // ctpop(bswap(x)) -> ctpop(x)
662 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
663 return IC.replaceOperand(II, 0, X);
664
665 // ctpop(rot(x)) -> ctpop(x)
666 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
667 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
668 X == Y)
669 return IC.replaceOperand(II, 0, X);
670
671 // ctpop(x | -x) -> bitwidth - cttz(x, false)
672 if (Op0->hasOneUse() &&
673 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
674 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
675 {X, IC.Builder.getFalse()});
676 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
677 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
678 }
679
680 // ctpop(~x & (x - 1)) -> cttz(x, false)
681 if (match(Op0,
683 Function *F =
684 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
685 return CallInst::Create(F, {X, IC.Builder.getFalse()});
686 }
687
688 // Zext doesn't change the number of set bits, so narrow:
689 // ctpop (zext X) --> zext (ctpop X)
690 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
691 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
692 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
693 }
694
695 KnownBits Known(BitWidth);
696 IC.computeKnownBits(Op0, Known, &II);
697
698 // If all bits are zero except for exactly one fixed bit, then the result
699 // must be 0 or 1, and we can get that answer by shifting to LSB:
700 // ctpop (X & 32) --> (X & 32) >> 5
701 // TODO: Investigate removing this as its likely unnecessary given the below
702 // `isKnownToBeAPowerOfTwo` check.
703 if ((~Known.Zero).isPowerOf2())
704 return BinaryOperator::CreateLShr(
705 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
706
707 // More generally we can also handle non-constant power of 2 patterns such as
708 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
709 // ctpop(Pow2OrZero) --> icmp ne X, 0
710 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
711 return CastInst::Create(Instruction::ZExt,
714 Ty);
715
716 // Add range attribute since known bits can't completely reflect what we know.
717 if (BitWidth != 1) {
718 ConstantRange OldRange =
719 II.getRange().value_or(ConstantRange::getFull(BitWidth));
720
721 unsigned Lower = Known.countMinPopulation();
722 unsigned Upper = Known.countMaxPopulation() + 1;
723
724 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
726 Lower = 1;
727
729 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
730
731 if (Range != OldRange) {
732 II.addRangeRetAttr(Range);
733 return &II;
734 }
735 }
736
737 return nullptr;
738}
739
740/// Convert a table lookup to shufflevector if the mask is constant.
741/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
742/// which case we could lower the shufflevector with rev64 instructions
743/// as it's actually a byte reverse.
745 InstCombiner::BuilderTy &Builder) {
746 // Bail out if the mask is not a constant.
747 auto *C = dyn_cast<Constant>(II.getArgOperand(1));
748 if (!C)
749 return nullptr;
750
751 auto *VecTy = cast<FixedVectorType>(II.getType());
752 unsigned NumElts = VecTy->getNumElements();
753
754 // Only perform this transformation for <8 x i8> vector types.
755 if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
756 return nullptr;
757
758 int Indexes[8];
759
760 for (unsigned I = 0; I < NumElts; ++I) {
761 Constant *COp = C->getAggregateElement(I);
762
763 if (!COp || !isa<ConstantInt>(COp))
764 return nullptr;
765
766 Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
767
768 // Make sure the mask indices are in range.
769 if ((unsigned)Indexes[I] >= NumElts)
770 return nullptr;
771 }
772
773 auto *V1 = II.getArgOperand(0);
774 auto *V2 = Constant::getNullValue(V1->getType());
775 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
776}
777
778// Returns true iff the 2 intrinsics have the same operands, limiting the
779// comparison to the first NumOperands.
780static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
781 unsigned NumOperands) {
782 assert(I.arg_size() >= NumOperands && "Not enough operands");
783 assert(E.arg_size() >= NumOperands && "Not enough operands");
784 for (unsigned i = 0; i < NumOperands; i++)
785 if (I.getArgOperand(i) != E.getArgOperand(i))
786 return false;
787 return true;
788}
789
790// Remove trivially empty start/end intrinsic ranges, i.e. a start
791// immediately followed by an end (ignoring debuginfo or other
792// start/end intrinsics in between). As this handles only the most trivial
793// cases, tracking the nesting level is not needed:
794//
795// call @llvm.foo.start(i1 0)
796// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
797// call @llvm.foo.end(i1 0)
798// call @llvm.foo.end(i1 0) ; &I
799static bool
801 std::function<bool(const IntrinsicInst &)> IsStart) {
802 // We start from the end intrinsic and scan backwards, so that InstCombine
803 // has already processed (and potentially removed) all the instructions
804 // before the end intrinsic.
805 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
806 for (; BI != BE; ++BI) {
807 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
808 if (I->isDebugOrPseudoInst() ||
809 I->getIntrinsicID() == EndI.getIntrinsicID())
810 continue;
811 if (IsStart(*I)) {
812 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
814 IC.eraseInstFromFunction(EndI);
815 return true;
816 }
817 // Skip start intrinsics that don't pair with this end intrinsic.
818 continue;
819 }
820 }
821 break;
822 }
823
824 return false;
825}
826
828 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
829 // Bail out on the case where the source va_list of a va_copy is destroyed
830 // immediately by a follow-up va_end.
831 return II.getIntrinsicID() == Intrinsic::vastart ||
832 (II.getIntrinsicID() == Intrinsic::vacopy &&
833 I.getArgOperand(0) != II.getArgOperand(1));
834 });
835 return nullptr;
836}
837
839 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
840 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
841 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
842 Call.setArgOperand(0, Arg1);
843 Call.setArgOperand(1, Arg0);
844 return &Call;
845 }
846 return nullptr;
847}
848
849/// Creates a result tuple for an overflow intrinsic \p II with a given
850/// \p Result and a constant \p Overflow value.
852 Constant *Overflow) {
853 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
854 StructType *ST = cast<StructType>(II->getType());
855 Constant *Struct = ConstantStruct::get(ST, V);
856 return InsertValueInst::Create(Struct, Result, 0);
857}
858
860InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
861 WithOverflowInst *WO = cast<WithOverflowInst>(II);
862 Value *OperationResult = nullptr;
863 Constant *OverflowResult = nullptr;
864 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
865 WO->getRHS(), *WO, OperationResult, OverflowResult))
866 return createOverflowTuple(WO, OperationResult, OverflowResult);
867
868 // See whether we can optimize the overflow check with assumption information.
869 for (User *U : WO->users()) {
870 if (!match(U, m_ExtractValue<1>(m_Value())))
871 continue;
872
873 for (auto &AssumeVH : AC.assumptionsFor(U)) {
874 if (!AssumeVH)
875 continue;
876 CallInst *I = cast<CallInst>(AssumeVH);
877 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
878 continue;
879 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
880 /*AllowEphemerals=*/true))
881 continue;
882 Value *Result =
883 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
884 Result->takeName(WO);
885 if (auto *Inst = dyn_cast<Instruction>(Result)) {
886 if (WO->isSigned())
887 Inst->setHasNoSignedWrap();
888 else
889 Inst->setHasNoUnsignedWrap();
890 }
891 return createOverflowTuple(WO, Result,
892 ConstantInt::getFalse(U->getType()));
893 }
894 }
895
896 return nullptr;
897}
898
899static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
900 Ty = Ty->getScalarType();
901 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
902}
903
904static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
905 Ty = Ty->getScalarType();
906 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
907}
908
909/// \returns the compare predicate type if the test performed by
910/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
911/// floating-point environment assumed for \p F for type \p Ty
913 const Function &F, Type *Ty) {
914 switch (static_cast<unsigned>(Mask)) {
915 case fcZero:
916 if (inputDenormalIsIEEE(F, Ty))
917 return FCmpInst::FCMP_OEQ;
918 break;
919 case fcZero | fcSubnormal:
920 if (inputDenormalIsDAZ(F, Ty))
921 return FCmpInst::FCMP_OEQ;
922 break;
923 case fcPositive | fcNegZero:
924 if (inputDenormalIsIEEE(F, Ty))
925 return FCmpInst::FCMP_OGE;
926 break;
928 if (inputDenormalIsDAZ(F, Ty))
929 return FCmpInst::FCMP_OGE;
930 break;
932 if (inputDenormalIsIEEE(F, Ty))
933 return FCmpInst::FCMP_OGT;
934 break;
935 case fcNegative | fcPosZero:
936 if (inputDenormalIsIEEE(F, Ty))
937 return FCmpInst::FCMP_OLE;
938 break;
940 if (inputDenormalIsDAZ(F, Ty))
941 return FCmpInst::FCMP_OLE;
942 break;
944 if (inputDenormalIsIEEE(F, Ty))
945 return FCmpInst::FCMP_OLT;
946 break;
947 case fcPosNormal | fcPosInf:
948 if (inputDenormalIsDAZ(F, Ty))
949 return FCmpInst::FCMP_OGT;
950 break;
951 case fcNegNormal | fcNegInf:
952 if (inputDenormalIsDAZ(F, Ty))
953 return FCmpInst::FCMP_OLT;
954 break;
955 case ~fcZero & ~fcNan:
956 if (inputDenormalIsIEEE(F, Ty))
957 return FCmpInst::FCMP_ONE;
958 break;
959 case ~(fcZero | fcSubnormal) & ~fcNan:
960 if (inputDenormalIsDAZ(F, Ty))
961 return FCmpInst::FCMP_ONE;
962 break;
963 default:
964 break;
965 }
966
968}
969
970Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
971 Value *Src0 = II.getArgOperand(0);
972 Value *Src1 = II.getArgOperand(1);
973 const ConstantInt *CMask = cast<ConstantInt>(Src1);
974 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
975 const bool IsUnordered = (Mask & fcNan) == fcNan;
976 const bool IsOrdered = (Mask & fcNan) == fcNone;
977 const FPClassTest OrderedMask = Mask & ~fcNan;
978 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
979
980 const bool IsStrict =
981 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
982
983 Value *FNegSrc;
984 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
985 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
986
987 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
988 return replaceOperand(II, 0, FNegSrc);
989 }
990
991 Value *FAbsSrc;
992 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
993 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
994 return replaceOperand(II, 0, FAbsSrc);
995 }
996
997 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
998 (IsOrdered || IsUnordered) && !IsStrict) {
999 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1000 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1001 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1002 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1004 FCmpInst::Predicate Pred =
1005 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1006 if (OrderedInvertedMask == fcInf)
1007 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1008
1009 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
1010 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1011 CmpInf->takeName(&II);
1012 return replaceInstUsesWith(II, CmpInf);
1013 }
1014
1015 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1016 (IsOrdered || IsUnordered) && !IsStrict) {
1017 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1018 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1019 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1020 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1021 Constant *Inf =
1022 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1023 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1024 : Builder.CreateFCmpOEQ(Src0, Inf);
1025
1026 EqInf->takeName(&II);
1027 return replaceInstUsesWith(II, EqInf);
1028 }
1029
1030 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1031 (IsOrdered || IsUnordered) && !IsStrict) {
1032 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1033 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1034 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1035 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1037 OrderedInvertedMask == fcNegInf);
1038 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1039 : Builder.CreateFCmpONE(Src0, Inf);
1040 NeInf->takeName(&II);
1041 return replaceInstUsesWith(II, NeInf);
1042 }
1043
1044 if (Mask == fcNan && !IsStrict) {
1045 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1046 // exceptions.
1047 Value *IsNan =
1048 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1049 IsNan->takeName(&II);
1050 return replaceInstUsesWith(II, IsNan);
1051 }
1052
1053 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1054 // Equivalent of !isnan. Replace with standard fcmp.
1055 Value *FCmp =
1056 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1057 FCmp->takeName(&II);
1058 return replaceInstUsesWith(II, FCmp);
1059 }
1060
1062
1063 // Try to replace with an fcmp with 0
1064 //
1065 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1066 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1067 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1068 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1069 //
1070 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1071 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1072 //
1073 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1074 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1075 //
1076 if (!IsStrict && (IsOrdered || IsUnordered) &&
1077 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1078 Src0->getType())) !=
1081 // Equivalent of == 0.
1082 Value *FCmp = Builder.CreateFCmp(
1083 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1084 Src0, Zero);
1085
1086 FCmp->takeName(&II);
1087 return replaceInstUsesWith(II, FCmp);
1088 }
1089
1090 KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
1091
1092 // Clear test bits we know must be false from the source value.
1093 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1094 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1095 if ((Mask & Known.KnownFPClasses) != Mask) {
1096 II.setArgOperand(
1097 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1098 return &II;
1099 }
1100
1101 // If none of the tests which can return false are possible, fold to true.
1102 // fp_class (nnan x), ~(qnan|snan) -> true
1103 // fp_class (ninf x), ~(ninf|pinf) -> true
1104 if (Mask == Known.KnownFPClasses)
1105 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1106
1107 return nullptr;
1108}
1109
1110static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1111 KnownBits Known = computeKnownBits(Op, SQ);
1112 if (Known.isNonNegative())
1113 return false;
1114 if (Known.isNegative())
1115 return true;
1116
1117 Value *X, *Y;
1118 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1120
1121 return std::nullopt;
1122}
1123
1124static std::optional<bool> getKnownSignOrZero(Value *Op,
1125 const SimplifyQuery &SQ) {
1126 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1127 return Sign;
1128
1129 Value *X, *Y;
1130 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1132
1133 return std::nullopt;
1134}
1135
1136/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1137static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1138 const SimplifyQuery &SQ) {
1139 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1140 if (!Known1)
1141 return false;
1142 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1143 if (!Known0)
1144 return false;
1145 return *Known0 == *Known1;
1146}
1147
1148/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1149/// can trigger other combines.
1151 InstCombiner::BuilderTy &Builder) {
1152 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1153 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1154 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1155 "Expected a min or max intrinsic");
1156
1157 // TODO: Match vectors with undef elements, but undef may not propagate.
1158 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1159 Value *X;
1160 const APInt *C0, *C1;
1161 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1162 !match(Op1, m_APInt(C1)))
1163 return nullptr;
1164
1165 // Check for necessary no-wrap and overflow constraints.
1166 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1167 auto *Add = cast<BinaryOperator>(Op0);
1168 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1169 (!IsSigned && !Add->hasNoUnsignedWrap()))
1170 return nullptr;
1171
1172 // If the constant difference overflows, then instsimplify should reduce the
1173 // min/max to the add or C1.
1174 bool Overflow;
1175 APInt CDiff =
1176 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1177 assert(!Overflow && "Expected simplify of min/max");
1178
1179 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1180 // Note: the "mismatched" no-overflow setting does not propagate.
1181 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1182 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1183 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1184 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1185}
1186/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1187Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1188 Type *Ty = MinMax1.getType();
1189
1190 // We are looking for a tree of:
1191 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1192 // Where the min and max could be reversed
1193 Instruction *MinMax2;
1194 BinaryOperator *AddSub;
1195 const APInt *MinValue, *MaxValue;
1196 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1197 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1198 return nullptr;
1199 } else if (match(&MinMax1,
1200 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1201 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1202 return nullptr;
1203 } else
1204 return nullptr;
1205
1206 // Check that the constants clamp a saturate, and that the new type would be
1207 // sensible to convert to.
1208 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1209 return nullptr;
1210 // In what bitwidth can this be treated as saturating arithmetics?
1211 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1212 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1213 // good first approximation for what should be done there.
1214 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1215 return nullptr;
1216
1217 // Also make sure that the inner min/max and the add/sub have one use.
1218 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1219 return nullptr;
1220
1221 // Create the new type (which can be a vector type)
1222 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1223
1224 Intrinsic::ID IntrinsicID;
1225 if (AddSub->getOpcode() == Instruction::Add)
1226 IntrinsicID = Intrinsic::sadd_sat;
1227 else if (AddSub->getOpcode() == Instruction::Sub)
1228 IntrinsicID = Intrinsic::ssub_sat;
1229 else
1230 return nullptr;
1231
1232 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1233 // is usually achieved via a sext from a smaller type.
1234 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1235 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1236 return nullptr;
1237
1238 // Finally create and return the sat intrinsic, truncated to the new type
1239 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1240 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1241 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1242 return CastInst::Create(Instruction::SExt, Sat, Ty);
1243}
1244
1245
1246/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1247/// can only be one of two possible constant values -- turn that into a select
1248/// of constants.
1250 InstCombiner::BuilderTy &Builder) {
1251 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1252 Value *X;
1253 const APInt *C0, *C1;
1254 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1255 return nullptr;
1256
1258 switch (II->getIntrinsicID()) {
1259 case Intrinsic::smax:
1260 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1261 Pred = ICmpInst::ICMP_SGT;
1262 break;
1263 case Intrinsic::smin:
1264 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1265 Pred = ICmpInst::ICMP_SLT;
1266 break;
1267 case Intrinsic::umax:
1268 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1269 Pred = ICmpInst::ICMP_UGT;
1270 break;
1271 case Intrinsic::umin:
1272 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1273 Pred = ICmpInst::ICMP_ULT;
1274 break;
1275 default:
1276 llvm_unreachable("Expected min/max intrinsic");
1277 }
1278 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1279 return nullptr;
1280
1281 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1282 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1283 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1284 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1285}
1286
1287/// If this min/max has a constant operand and an operand that is a matching
1288/// min/max with a constant operand, constant-fold the 2 constant operands.
1290 IRBuilderBase &Builder,
1291 const SimplifyQuery &SQ) {
1292 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1293 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1294 if (!LHS)
1295 return nullptr;
1296
1297 Constant *C0, *C1;
1298 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1299 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1300 return nullptr;
1301
1302 // max (max X, C0), C1 --> max X, (max C0, C1)
1303 // min (min X, C0), C1 --> min X, (min C0, C1)
1304 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1305 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1306 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1307 if (InnerMinMaxID != MinMaxID &&
1308 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1309 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1310 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1311 return nullptr;
1312
1314 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1315 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1316 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1317 {LHS->getArgOperand(0), NewC});
1318}
1319
1320/// If this min/max has a matching min/max operand with a constant, try to push
1321/// the constant operand into this instruction. This can enable more folds.
1322static Instruction *
1324 InstCombiner::BuilderTy &Builder) {
1325 // Match and capture a min/max operand candidate.
1326 Value *X, *Y;
1327 Constant *C;
1328 Instruction *Inner;
1330 m_Instruction(Inner),
1332 m_Value(Y))))
1333 return nullptr;
1334
1335 // The inner op must match. Check for constants to avoid infinite loops.
1336 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1337 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1338 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1340 return nullptr;
1341
1342 // max (max X, C), Y --> max (max X, Y), C
1344 MinMaxID, II->getType());
1345 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1346 NewInner->takeName(Inner);
1347 return CallInst::Create(MinMax, {NewInner, C});
1348}
1349
1350/// Reduce a sequence of min/max intrinsics with a common operand.
1352 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1353 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1354 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1355 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1356 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1357 RHS->getIntrinsicID() != MinMaxID ||
1358 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1359 return nullptr;
1360
1361 Value *A = LHS->getArgOperand(0);
1362 Value *B = LHS->getArgOperand(1);
1363 Value *C = RHS->getArgOperand(0);
1364 Value *D = RHS->getArgOperand(1);
1365
1366 // Look for a common operand.
1367 Value *MinMaxOp = nullptr;
1368 Value *ThirdOp = nullptr;
1369 if (LHS->hasOneUse()) {
1370 // If the LHS is only used in this chain and the RHS is used outside of it,
1371 // reuse the RHS min/max because that will eliminate the LHS.
1372 if (D == A || C == A) {
1373 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1374 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1375 MinMaxOp = RHS;
1376 ThirdOp = B;
1377 } else if (D == B || C == B) {
1378 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1379 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1380 MinMaxOp = RHS;
1381 ThirdOp = A;
1382 }
1383 } else {
1384 assert(RHS->hasOneUse() && "Expected one-use operand");
1385 // Reuse the LHS. This will eliminate the RHS.
1386 if (D == A || D == B) {
1387 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1388 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1389 MinMaxOp = LHS;
1390 ThirdOp = C;
1391 } else if (C == A || C == B) {
1392 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1393 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1394 MinMaxOp = LHS;
1395 ThirdOp = D;
1396 }
1397 }
1398
1399 if (!MinMaxOp || !ThirdOp)
1400 return nullptr;
1401
1402 Module *Mod = II->getModule();
1403 Function *MinMax =
1404 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1405 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1406}
1407
1408/// If all arguments of the intrinsic are unary shuffles with the same mask,
1409/// try to shuffle after the intrinsic.
1412 if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1413 !II->getCalledFunction()->isSpeculatable())
1414 return nullptr;
1415
1416 Value *X;
1417 Constant *C;
1418 ArrayRef<int> Mask;
1419 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1420 return isa<Constant>(Arg.get()) ||
1421 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1422 Arg.getOperandNo(), nullptr);
1423 });
1424 if (!NonConstArg ||
1425 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1426 return nullptr;
1427
1428 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1429 // instructions.
1430 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1431 return nullptr;
1432
1433 // See if all arguments are shuffled with the same mask.
1435 Type *SrcTy = X->getType();
1436 for (Use &Arg : II->args()) {
1437 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1438 Arg.getOperandNo(), nullptr))
1439 NewArgs.push_back(Arg);
1440 else if (match(&Arg,
1441 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1442 X->getType() == SrcTy)
1443 NewArgs.push_back(X);
1444 else if (match(&Arg, m_ImmConstant(C))) {
1445 // If it's a constant, try find the constant that would be shuffled to C.
1446 if (Constant *ShuffledC =
1447 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1448 NewArgs.push_back(ShuffledC);
1449 else
1450 return nullptr;
1451 } else
1452 return nullptr;
1453 }
1454
1455 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1456 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1457 // Result type might be a different vector width.
1458 // TODO: Check that the result type isn't widened?
1459 VectorType *ResTy =
1460 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1461 Value *NewIntrinsic =
1462 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1463 return new ShuffleVectorInst(NewIntrinsic, Mask);
1464}
1465
1466/// If all arguments of the intrinsic are reverses, try to pull the reverse
1467/// after the intrinsic.
1469 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1470 return nullptr;
1471
1472 // At least 1 operand must be a reverse with 1 use because we are creating 2
1473 // instructions.
1474 if (none_of(II->args(), [](Value *V) {
1475 return match(V, m_OneUse(m_VecReverse(m_Value())));
1476 }))
1477 return nullptr;
1478
1479 Value *X;
1480 Constant *C;
1481 SmallVector<Value *> NewArgs;
1482 for (Use &Arg : II->args()) {
1483 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1484 Arg.getOperandNo(), nullptr))
1485 NewArgs.push_back(Arg);
1486 else if (match(&Arg, m_VecReverse(m_Value(X))))
1487 NewArgs.push_back(X);
1488 else if (isSplatValue(Arg))
1489 NewArgs.push_back(Arg);
1490 else if (match(&Arg, m_ImmConstant(C)))
1491 NewArgs.push_back(Builder.CreateVectorReverse(C));
1492 else
1493 return nullptr;
1494 }
1495
1496 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1497 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1498 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1499 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1500 return Builder.CreateVectorReverse(NewIntrinsic);
1501}
1502
1503/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1504/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1505/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1506template <Intrinsic::ID IntrID>
1508 InstCombiner::BuilderTy &Builder) {
1509 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1510 "This helper only supports BSWAP and BITREVERSE intrinsics");
1511
1512 Value *X, *Y;
1513 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1514 // don't match ConstantExpr that aren't meaningful for this transform.
1517 Value *OldReorderX, *OldReorderY;
1519
1520 // If both X and Y are bswap/bitreverse, the transform reduces the number
1521 // of instructions even if there's multiuse.
1522 // If only one operand is bswap/bitreverse, we need to ensure the operand
1523 // have only one use.
1524 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1525 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1526 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1527 }
1528
1529 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1530 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1531 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1532 }
1533
1534 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1535 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1536 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1537 }
1538 }
1539 return nullptr;
1540}
1541
1542/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1543/// `f(f(x, y), y) == f(x, y)` holds.
1545 switch (IID) {
1546 case Intrinsic::smax:
1547 case Intrinsic::smin:
1548 case Intrinsic::umax:
1549 case Intrinsic::umin:
1550 case Intrinsic::maximum:
1551 case Intrinsic::minimum:
1552 case Intrinsic::maximumnum:
1553 case Intrinsic::minimumnum:
1554 case Intrinsic::maxnum:
1555 case Intrinsic::minnum:
1556 return true;
1557 default:
1558 return false;
1559 }
1560}
1561
1562/// Attempt to simplify value-accumulating recurrences of kind:
1563/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1564/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1565/// And let the idempotent binary intrinsic be hoisted, when the operands are
1566/// known to be loop-invariant.
1568 IntrinsicInst *II) {
1569 PHINode *PN;
1570 Value *Init, *OtherOp;
1571
1572 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1573 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1574 auto IID = II->getIntrinsicID();
1575 if (!isIdempotentBinaryIntrinsic(IID) ||
1577 !IC.getDominatorTree().dominates(OtherOp, PN))
1578 return nullptr;
1579
1580 auto *InvariantBinaryInst =
1581 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1582 if (isa<FPMathOperator>(InvariantBinaryInst))
1583 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1584 return InvariantBinaryInst;
1585}
1586
1587static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1588 if (!CanReorderLanes)
1589 return nullptr;
1590
1591 Value *V;
1592 if (match(Arg, m_VecReverse(m_Value(V))))
1593 return V;
1594
1595 ArrayRef<int> Mask;
1596 if (!isa<FixedVectorType>(Arg->getType()) ||
1597 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1598 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1599 return nullptr;
1600
1601 int Sz = Mask.size();
1602 SmallBitVector UsedIndices(Sz);
1603 for (int Idx : Mask) {
1604 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1605 return nullptr;
1606 UsedIndices.set(Idx);
1607 }
1608
1609 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1610 // other changes.
1611 return UsedIndices.all() ? V : nullptr;
1612}
1613
1614/// Fold an unsigned minimum of trailing or leading zero bits counts:
1615/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1616/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
1617/// >> ConstOp))
1618template <Intrinsic::ID IntrID>
1619static Value *
1621 const DataLayout &DL,
1622 InstCombiner::BuilderTy &Builder) {
1623 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1624 "This helper only supports cttz and ctlz intrinsics");
1625
1626 Value *CtOp;
1627 Value *ZeroUndef;
1628 if (!match(I0,
1629 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
1630 return nullptr;
1631
1632 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1633 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1634 if (!match(I1, m_CheckedInt(LessBitWidth)))
1635 // We have a constant >= BitWidth (which can be handled by CVP)
1636 // or a non-splat vector with elements < and >= BitWidth
1637 return nullptr;
1638
1639 Type *Ty = I1->getType();
1641 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1642 IntrID == Intrinsic::cttz
1643 ? ConstantInt::get(Ty, 1)
1644 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1645 cast<Constant>(I1), DL);
1646 return Builder.CreateBinaryIntrinsic(
1647 IntrID, Builder.CreateOr(CtOp, NewConst),
1648 ConstantInt::getTrue(ZeroUndef->getType()));
1649}
1650
1651/// Return whether "X LOp (Y ROp Z)" is always equal to
1652/// "(X LOp Y) ROp (X LOp Z)".
1654 bool HasNSW, Intrinsic::ID ROp) {
1655 switch (ROp) {
1656 case Intrinsic::umax:
1657 case Intrinsic::umin:
1658 if (HasNUW && LOp == Instruction::Add)
1659 return true;
1660 if (HasNUW && LOp == Instruction::Shl)
1661 return true;
1662 return false;
1663 case Intrinsic::smax:
1664 case Intrinsic::smin:
1665 return HasNSW && LOp == Instruction::Add;
1666 default:
1667 return false;
1668 }
1669}
1670
1671// Attempts to factorise a common term
1672// in an instruction that has the form "(A op' B) op (C op' D)
1673// where op is an intrinsic and op' is a binop
1674static Value *
1676 InstCombiner::BuilderTy &Builder) {
1677 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1678 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1679
1682
1683 if (!Op0 || !Op1)
1684 return nullptr;
1685
1686 if (Op0->getOpcode() != Op1->getOpcode())
1687 return nullptr;
1688
1689 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1690 return nullptr;
1691
1692 Instruction::BinaryOps InnerOpcode =
1693 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1694 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1695 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1696
1697 if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode))
1698 return nullptr;
1699
1700 Value *A = Op0->getOperand(0);
1701 Value *B = Op0->getOperand(1);
1702 Value *C = Op1->getOperand(0);
1703 Value *D = Op1->getOperand(1);
1704
1705 // Attempts to swap variables such that A equals C or B equals D,
1706 // if the inner operation is commutative.
1707 if (Op0->isCommutative() && A != C && B != D) {
1708 if (A == D || B == C)
1709 std::swap(C, D);
1710 else
1711 return nullptr;
1712 }
1713
1714 BinaryOperator *NewBinop;
1715 if (A == C) {
1716 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1717 NewBinop =
1718 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1719 } else if (B == D) {
1720 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1721 NewBinop =
1722 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1723 } else {
1724 return nullptr;
1725 }
1726
1727 NewBinop->setHasNoUnsignedWrap(HasNUW);
1728 NewBinop->setHasNoSignedWrap(HasNSW);
1729
1730 return NewBinop;
1731}
1732
1733/// CallInst simplification. This mostly only handles folding of intrinsic
1734/// instructions. For normal calls, it allows visitCallBase to do the heavy
1735/// lifting.
1737 // Don't try to simplify calls without uses. It will not do anything useful,
1738 // but will result in the following folds being skipped.
1739 if (!CI.use_empty()) {
1740 SmallVector<Value *, 8> Args(CI.args());
1741 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1742 SQ.getWithInstruction(&CI)))
1743 return replaceInstUsesWith(CI, V);
1744 }
1745
1746 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1747 return visitFree(CI, FreedOp);
1748
1749 // If the caller function (i.e. us, the function that contains this CallInst)
1750 // is nounwind, mark the call as nounwind, even if the callee isn't.
1751 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1752 CI.setDoesNotThrow();
1753 return &CI;
1754 }
1755
1757 if (!II)
1758 return visitCallBase(CI);
1759
1760 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1761 // instead of in visitCallBase.
1762 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1763 if (auto NumBytes = MI->getLengthInBytes()) {
1764 // memmove/cpy/set of zero bytes is a noop.
1765 if (NumBytes->isZero())
1766 return eraseInstFromFunction(CI);
1767
1768 // For atomic unordered mem intrinsics if len is not a positive or
1769 // not a multiple of element size then behavior is undefined.
1770 if (MI->isAtomic() &&
1771 (NumBytes->isNegative() ||
1772 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1774 assert(MI->getType()->isVoidTy() &&
1775 "non void atomic unordered mem intrinsic");
1776 return eraseInstFromFunction(*MI);
1777 }
1778 }
1779
1780 // No other transformations apply to volatile transfers.
1781 if (MI->isVolatile())
1782 return nullptr;
1783
1785 // memmove(x,x,size) -> noop.
1786 if (MTI->getSource() == MTI->getDest())
1787 return eraseInstFromFunction(CI);
1788 }
1789
1790 auto IsPointerUndefined = [MI](Value *Ptr) {
1791 return isa<ConstantPointerNull>(Ptr) &&
1793 MI->getFunction(),
1794 cast<PointerType>(Ptr->getType())->getAddressSpace());
1795 };
1796 bool SrcIsUndefined = false;
1797 // If we can determine a pointer alignment that is bigger than currently
1798 // set, update the alignment.
1799 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1801 return I;
1802 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1803 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1804 if (Instruction *I = SimplifyAnyMemSet(MSI))
1805 return I;
1806 }
1807
1808 // If src/dest is null, this memory intrinsic must be a noop.
1809 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1810 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1811 return eraseInstFromFunction(CI);
1812 }
1813
1814 // If we have a memmove and the source operation is a constant global,
1815 // then the source and dest pointers can't alias, so we can change this
1816 // into a call to memcpy.
1817 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1818 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1819 if (GVSrc->isConstant()) {
1820 Module *M = CI.getModule();
1821 Intrinsic::ID MemCpyID =
1822 MMI->isAtomic()
1823 ? Intrinsic::memcpy_element_unordered_atomic
1824 : Intrinsic::memcpy;
1825 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1826 CI.getArgOperand(1)->getType(),
1827 CI.getArgOperand(2)->getType() };
1829 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1830 return II;
1831 }
1832 }
1833 }
1834
1835 // For fixed width vector result intrinsics, use the generic demanded vector
1836 // support.
1837 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1838 auto VWidth = IIFVTy->getNumElements();
1839 APInt PoisonElts(VWidth, 0);
1840 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1841 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1842 if (V != II)
1843 return replaceInstUsesWith(*II, V);
1844 return II;
1845 }
1846 }
1847
1848 if (II->isCommutative()) {
1849 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
1850 replaceOperand(*II, 0, Pair->first);
1851 replaceOperand(*II, 1, Pair->second);
1852 return II;
1853 }
1854
1855 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1856 return NewCall;
1857 }
1858
1859 // Unused constrained FP intrinsic calls may have declared side effect, which
1860 // prevents it from being removed. In some cases however the side effect is
1861 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1862 // returns a replacement, the call may be removed.
1863 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
1864 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
1865 return eraseInstFromFunction(CI);
1866 }
1867
1868 Intrinsic::ID IID = II->getIntrinsicID();
1869 switch (IID) {
1870 case Intrinsic::objectsize: {
1871 SmallVector<Instruction *> InsertedInstructions;
1872 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
1873 &InsertedInstructions)) {
1874 for (Instruction *Inserted : InsertedInstructions)
1875 Worklist.add(Inserted);
1876 return replaceInstUsesWith(CI, V);
1877 }
1878 return nullptr;
1879 }
1880 case Intrinsic::abs: {
1881 Value *IIOperand = II->getArgOperand(0);
1882 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
1883
1884 // abs(-x) -> abs(x)
1885 Value *X;
1886 if (match(IIOperand, m_Neg(m_Value(X)))) {
1887 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
1888 replaceOperand(*II, 1, Builder.getTrue());
1889 return replaceOperand(*II, 0, X);
1890 }
1891 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
1892 return replaceOperand(*II, 0, X);
1893
1894 Value *Y;
1895 // abs(a * abs(b)) -> abs(a * b)
1896 if (match(IIOperand,
1899 bool NSW =
1900 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
1901 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
1902 return replaceOperand(*II, 0, XY);
1903 }
1904
1905 if (std::optional<bool> Known =
1906 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
1907 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1908 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1909 if (!*Known)
1910 return replaceInstUsesWith(*II, IIOperand);
1911
1912 // abs(x) -> -x if x < 0
1913 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1914 if (IntMinIsPoison)
1915 return BinaryOperator::CreateNSWNeg(IIOperand);
1916 return BinaryOperator::CreateNeg(IIOperand);
1917 }
1918
1919 // abs (sext X) --> zext (abs X*)
1920 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1921 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
1922 Value *NarrowAbs =
1923 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1924 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
1925 }
1926
1927 // Match a complicated way to check if a number is odd/even:
1928 // abs (srem X, 2) --> and X, 1
1929 const APInt *C;
1930 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
1931 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
1932
1933 break;
1934 }
1935 case Intrinsic::umin: {
1936 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1937 // umin(x, 1) == zext(x != 0)
1938 if (match(I1, m_One())) {
1939 assert(II->getType()->getScalarSizeInBits() != 1 &&
1940 "Expected simplify of umin with max constant");
1941 Value *Zero = Constant::getNullValue(I0->getType());
1942 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
1943 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
1944 }
1945 // umin(cttz(x), const) --> cttz(x | (1 << const))
1946 if (Value *FoldedCttz =
1948 I0, I1, DL, Builder))
1949 return replaceInstUsesWith(*II, FoldedCttz);
1950 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
1951 if (Value *FoldedCtlz =
1953 I0, I1, DL, Builder))
1954 return replaceInstUsesWith(*II, FoldedCtlz);
1955 [[fallthrough]];
1956 }
1957 case Intrinsic::umax: {
1958 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1959 Value *X, *Y;
1960 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
1961 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1962 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1963 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1964 }
1965 Constant *C;
1966 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1967 I0->hasOneUse()) {
1968 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
1969 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1970 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1971 }
1972 }
1973 // If C is not 0:
1974 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
1975 // If C is not 0 or 1:
1976 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
1977 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
1978 const APInt *C;
1979 Value *X;
1980 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
1981 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
1982 return nullptr;
1983 if (C->isZero())
1984 return nullptr;
1985 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
1986 return nullptr;
1987
1988 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
1989 Value *NewSelect =
1990 Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
1991 return replaceInstUsesWith(*II, NewSelect);
1992 };
1993
1994 if (IID == Intrinsic::umax) {
1995 if (Instruction *I = foldMaxMulShift(I0, I1))
1996 return I;
1997 if (Instruction *I = foldMaxMulShift(I1, I0))
1998 return I;
1999 }
2000
2001 // If both operands of unsigned min/max are sign-extended, it is still ok
2002 // to narrow the operation.
2003 [[fallthrough]];
2004 }
2005 case Intrinsic::smax:
2006 case Intrinsic::smin: {
2007 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2008 Value *X, *Y;
2009 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2010 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2011 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2012 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2013 }
2014
2015 Constant *C;
2016 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2017 I0->hasOneUse()) {
2018 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2019 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2020 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2021 }
2022 }
2023
2024 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2025 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2026 const APInt *MinC, *MaxC;
2027 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2028 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2029 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2030 Value *NewMax = Builder.CreateBinaryIntrinsic(
2031 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2032 return replaceInstUsesWith(
2033 *II, Builder.CreateBinaryIntrinsic(
2034 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2035 };
2036 if (IID == Intrinsic::smax &&
2038 m_APInt(MinC)))) &&
2039 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2040 return CreateCanonicalClampForm(true);
2041 if (IID == Intrinsic::umax &&
2043 m_APInt(MinC)))) &&
2044 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2045 return CreateCanonicalClampForm(false);
2046
2047 // umin(i1 X, i1 Y) -> and i1 X, Y
2048 // smax(i1 X, i1 Y) -> and i1 X, Y
2049 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2050 II->getType()->isIntOrIntVectorTy(1)) {
2051 return BinaryOperator::CreateAnd(I0, I1);
2052 }
2053
2054 // umax(i1 X, i1 Y) -> or i1 X, Y
2055 // smin(i1 X, i1 Y) -> or i1 X, Y
2056 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2057 II->getType()->isIntOrIntVectorTy(1)) {
2058 return BinaryOperator::CreateOr(I0, I1);
2059 }
2060
2061 // smin(smax(X, -1), 1) -> scmp(X, 0)
2062 // smax(smin(X, 1), -1) -> scmp(X, 0)
2063 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2064 // And i1's have been changed to and/ors
2065 // So we only need to check for smin
2066 if (IID == Intrinsic::smin) {
2067 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2068 match(I1, m_One())) {
2069 Value *Zero = ConstantInt::get(X->getType(), 0);
2070 return replaceInstUsesWith(
2071 CI,
2072 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2073 }
2074 }
2075
2076 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2077 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2078 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2079 // TODO: Canonicalize neg after min/max if I1 is constant.
2080 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2081 (I0->hasOneUse() || I1->hasOneUse())) {
2083 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2084 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2085 }
2086 }
2087
2088 // (umax X, (xor X, Pow2))
2089 // -> (or X, Pow2)
2090 // (umin X, (xor X, Pow2))
2091 // -> (and X, ~Pow2)
2092 // (smax X, (xor X, Pos_Pow2))
2093 // -> (or X, Pos_Pow2)
2094 // (smin X, (xor X, Pos_Pow2))
2095 // -> (and X, ~Pos_Pow2)
2096 // (smax X, (xor X, Neg_Pow2))
2097 // -> (and X, ~Neg_Pow2)
2098 // (smin X, (xor X, Neg_Pow2))
2099 // -> (or X, Neg_Pow2)
2100 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2101 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2102 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2103 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2104 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2105
2106 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2107 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2108 if (KnownSign == std::nullopt) {
2109 UseOr = false;
2110 UseAndN = false;
2111 } else if (*KnownSign /* true is Signed. */) {
2112 UseOr ^= true;
2113 UseAndN ^= true;
2114 Type *Ty = I0->getType();
2115 // Negative power of 2 must be IntMin. It's possible to be able to
2116 // prove negative / power of 2 without actually having known bits, so
2117 // just get the value by hand.
2119 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2120 }
2121 }
2122 if (UseOr)
2123 return BinaryOperator::CreateOr(I0, X);
2124 else if (UseAndN)
2125 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2126 }
2127
2128 // If we can eliminate ~A and Y is free to invert:
2129 // max ~A, Y --> ~(min A, ~Y)
2130 //
2131 // Examples:
2132 // max ~A, ~Y --> ~(min A, Y)
2133 // max ~A, C --> ~(min A, ~C)
2134 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2135 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2136 Value *A;
2137 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2138 !isFreeToInvert(A, A->hasOneUse())) {
2139 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2141 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2142 return BinaryOperator::CreateNot(InvMaxMin);
2143 }
2144 }
2145 return nullptr;
2146 };
2147
2148 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2149 return I;
2150 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2151 return I;
2152
2154 return I;
2155
2156 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2157 const APInt *RHSC;
2158 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2159 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2160 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2161 ConstantInt::get(II->getType(), *RHSC));
2162
2163 // smax(X, -X) --> abs(X)
2164 // smin(X, -X) --> -abs(X)
2165 // umax(X, -X) --> -abs(X)
2166 // umin(X, -X) --> abs(X)
2167 if (isKnownNegation(I0, I1)) {
2168 // We can choose either operand as the input to abs(), but if we can
2169 // eliminate the only use of a value, that's better for subsequent
2170 // transforms/analysis.
2171 if (I0->hasOneUse() && !I1->hasOneUse())
2172 std::swap(I0, I1);
2173
2174 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2175 // operation and potentially its negation.
2176 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2177 Value *Abs = Builder.CreateBinaryIntrinsic(
2178 Intrinsic::abs, I0,
2179 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2180
2181 // We don't have a "nabs" intrinsic, so negate if needed based on the
2182 // max/min operation.
2183 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2184 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2185 return replaceInstUsesWith(CI, Abs);
2186 }
2187
2189 return Sel;
2190
2191 if (Instruction *SAdd = matchSAddSubSat(*II))
2192 return SAdd;
2193
2194 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2195 return replaceInstUsesWith(*II, NewMinMax);
2196
2198 return R;
2199
2200 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2201 return NewMinMax;
2202
2203 // Try to fold minmax with constant RHS based on range information
2204 if (match(I1, m_APIntAllowPoison(RHSC))) {
2205 ICmpInst::Predicate Pred =
2207 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2209 I0, IsSigned, SQ.getWithInstruction(II));
2210 if (!LHS_CR.isFullSet()) {
2211 if (LHS_CR.icmp(Pred, *RHSC))
2212 return replaceInstUsesWith(*II, I0);
2213 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2214 return replaceInstUsesWith(*II,
2215 ConstantInt::get(II->getType(), *RHSC));
2216 }
2217 }
2218
2220 return replaceInstUsesWith(*II, V);
2221
2222 break;
2223 }
2224 case Intrinsic::scmp: {
2225 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2226 Value *LHS, *RHS;
2227 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2228 return replaceInstUsesWith(
2229 CI,
2230 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2231 break;
2232 }
2233 case Intrinsic::bitreverse: {
2234 Value *IIOperand = II->getArgOperand(0);
2235 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2236 Value *X;
2237 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2238 X->getType()->isIntOrIntVectorTy(1)) {
2239 Type *Ty = II->getType();
2240 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2241 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2243 }
2244
2245 if (Instruction *crossLogicOpFold =
2247 return crossLogicOpFold;
2248
2249 break;
2250 }
2251 case Intrinsic::bswap: {
2252 Value *IIOperand = II->getArgOperand(0);
2253
2254 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2255 // inverse-shift-of-bswap:
2256 // bswap (shl X, Y) --> lshr (bswap X), Y
2257 // bswap (lshr X, Y) --> shl (bswap X), Y
2258 Value *X, *Y;
2259 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2260 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2262 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2263 BinaryOperator::BinaryOps InverseShift =
2264 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2265 ? Instruction::LShr
2266 : Instruction::Shl;
2267 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2268 }
2269 }
2270
2271 KnownBits Known = computeKnownBits(IIOperand, II);
2272 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2273 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2274 unsigned BW = Known.getBitWidth();
2275
2276 // bswap(x) -> shift(x) if x has exactly one "active byte"
2277 if (BW - LZ - TZ == 8) {
2278 assert(LZ != TZ && "active byte cannot be in the middle");
2279 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2280 return BinaryOperator::CreateNUWShl(
2281 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2282 // -> lshr(x) if the "active byte" is in the high part of x
2283 return BinaryOperator::CreateExactLShr(
2284 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2285 }
2286
2287 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2288 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2289 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2290 Value *CV = ConstantInt::get(X->getType(), C);
2291 Value *V = Builder.CreateLShr(X, CV);
2292 return new TruncInst(V, IIOperand->getType());
2293 }
2294
2295 if (Instruction *crossLogicOpFold =
2297 return crossLogicOpFold;
2298 }
2299
2300 // Try to fold into bitreverse if bswap is the root of the expression tree.
2301 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2302 /*MatchBitReversals*/ true))
2303 return BitOp;
2304 break;
2305 }
2306 case Intrinsic::masked_load:
2307 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2308 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2309 break;
2310 case Intrinsic::masked_store:
2311 return simplifyMaskedStore(*II);
2312 case Intrinsic::masked_gather:
2313 return simplifyMaskedGather(*II);
2314 case Intrinsic::masked_scatter:
2315 return simplifyMaskedScatter(*II);
2316 case Intrinsic::launder_invariant_group:
2317 case Intrinsic::strip_invariant_group:
2318 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2319 return replaceInstUsesWith(*II, SkippedBarrier);
2320 break;
2321 case Intrinsic::powi:
2322 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2323 // 0 and 1 are handled in instsimplify
2324 // powi(x, -1) -> 1/x
2325 if (Power->isMinusOne())
2326 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2327 II->getArgOperand(0), II);
2328 // powi(x, 2) -> x*x
2329 if (Power->equalsInt(2))
2330 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2331 II->getArgOperand(0), II);
2332
2333 if (!Power->getValue()[0]) {
2334 Value *X;
2335 // If power is even:
2336 // powi(-x, p) -> powi(x, p)
2337 // powi(fabs(x), p) -> powi(x, p)
2338 // powi(copysign(x, y), p) -> powi(x, p)
2339 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2340 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2341 match(II->getArgOperand(0),
2343 return replaceOperand(*II, 0, X);
2344 }
2345 }
2346 break;
2347
2348 case Intrinsic::cttz:
2349 case Intrinsic::ctlz:
2350 if (auto *I = foldCttzCtlz(*II, *this))
2351 return I;
2352 break;
2353
2354 case Intrinsic::ctpop:
2355 if (auto *I = foldCtpop(*II, *this))
2356 return I;
2357 break;
2358
2359 case Intrinsic::fshl:
2360 case Intrinsic::fshr: {
2361 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2362 Type *Ty = II->getType();
2363 unsigned BitWidth = Ty->getScalarSizeInBits();
2364 Constant *ShAmtC;
2365 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2366 // Canonicalize a shift amount constant operand to modulo the bit-width.
2367 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2368 Constant *ModuloC =
2369 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2370 if (!ModuloC)
2371 return nullptr;
2372 if (ModuloC != ShAmtC)
2373 return replaceOperand(*II, 2, ModuloC);
2374
2376 ShAmtC, DL),
2377 m_One()) &&
2378 "Shift amount expected to be modulo bitwidth");
2379
2380 // Canonicalize funnel shift right by constant to funnel shift left. This
2381 // is not entirely arbitrary. For historical reasons, the backend may
2382 // recognize rotate left patterns but miss rotate right patterns.
2383 if (IID == Intrinsic::fshr) {
2384 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2385 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2386 return nullptr;
2387
2388 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2389 Module *Mod = II->getModule();
2390 Function *Fshl =
2391 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2392 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2393 }
2394 assert(IID == Intrinsic::fshl &&
2395 "All funnel shifts by simple constants should go left");
2396
2397 // fshl(X, 0, C) --> shl X, C
2398 // fshl(X, undef, C) --> shl X, C
2399 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2400 return BinaryOperator::CreateShl(Op0, ShAmtC);
2401
2402 // fshl(0, X, C) --> lshr X, (BW-C)
2403 // fshl(undef, X, C) --> lshr X, (BW-C)
2404 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2405 return BinaryOperator::CreateLShr(Op1,
2406 ConstantExpr::getSub(WidthC, ShAmtC));
2407
2408 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2409 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2410 Module *Mod = II->getModule();
2411 Function *Bswap =
2412 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2413 return CallInst::Create(Bswap, { Op0 });
2414 }
2415 if (Instruction *BitOp =
2416 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2417 /*MatchBitReversals*/ true))
2418 return BitOp;
2419
2420 // R = fshl(X, X, C2)
2421 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2422 Value *InnerOp;
2423 const APInt *ShAmtInnerC, *ShAmtOuterC;
2424 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2425 m_APInt(ShAmtInnerC))) &&
2426 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2427 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2428 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2429 if (Modulo.isZero())
2430 return replaceInstUsesWith(*II, InnerOp);
2431 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2433 {InnerOp, InnerOp, ModuloC});
2434 }
2435 }
2436
2437 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2438 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2439 // if BitWidth is a power-of-2
2440 Value *Y;
2441 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2442 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2443 Module *Mod = II->getModule();
2445 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2446 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2447 }
2448
2449 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2450 // power-of-2
2451 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2452 match(Op1, m_ZeroInt())) {
2453 Value *Op2 = II->getArgOperand(2);
2454 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2455 return BinaryOperator::CreateShl(Op0, And);
2456 }
2457
2458 // Left or right might be masked.
2460 return &CI;
2461
2462 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2463 // so only the low bits of the shift amount are demanded if the bitwidth is
2464 // a power-of-2.
2465 if (!isPowerOf2_32(BitWidth))
2466 break;
2468 KnownBits Op2Known(BitWidth);
2469 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2470 return &CI;
2471 break;
2472 }
2473 case Intrinsic::ptrmask: {
2474 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2475 KnownBits Known(BitWidth);
2477 return II;
2478
2479 Value *InnerPtr, *InnerMask;
2480 bool Changed = false;
2481 // Combine:
2482 // (ptrmask (ptrmask p, A), B)
2483 // -> (ptrmask p, (and A, B))
2484 if (match(II->getArgOperand(0),
2486 m_Value(InnerMask))))) {
2487 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2488 "Mask types must match");
2489 // TODO: If InnerMask == Op1, we could copy attributes from inner
2490 // callsite -> outer callsite.
2491 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2492 replaceOperand(CI, 0, InnerPtr);
2493 replaceOperand(CI, 1, NewMask);
2494 Changed = true;
2495 }
2496
2497 // See if we can deduce non-null.
2498 if (!CI.hasRetAttr(Attribute::NonNull) &&
2499 (Known.isNonZero() ||
2500 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2501 CI.addRetAttr(Attribute::NonNull);
2502 Changed = true;
2503 }
2504
2505 unsigned NewAlignmentLog =
2507 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2508 // Known bits will capture if we had alignment information associated with
2509 // the pointer argument.
2510 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2512 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2513 Changed = true;
2514 }
2515 if (Changed)
2516 return &CI;
2517 break;
2518 }
2519 case Intrinsic::uadd_with_overflow:
2520 case Intrinsic::sadd_with_overflow: {
2521 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2522 return I;
2523
2524 // Given 2 constant operands whose sum does not overflow:
2525 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2526 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2527 Value *X;
2528 const APInt *C0, *C1;
2529 Value *Arg0 = II->getArgOperand(0);
2530 Value *Arg1 = II->getArgOperand(1);
2531 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2532 bool HasNWAdd = IsSigned
2533 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2534 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2535 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2536 bool Overflow;
2537 APInt NewC =
2538 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2539 if (!Overflow)
2540 return replaceInstUsesWith(
2541 *II, Builder.CreateBinaryIntrinsic(
2542 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2543 }
2544 break;
2545 }
2546
2547 case Intrinsic::umul_with_overflow:
2548 case Intrinsic::smul_with_overflow:
2549 case Intrinsic::usub_with_overflow:
2550 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2551 return I;
2552 break;
2553
2554 case Intrinsic::ssub_with_overflow: {
2555 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2556 return I;
2557
2558 Constant *C;
2559 Value *Arg0 = II->getArgOperand(0);
2560 Value *Arg1 = II->getArgOperand(1);
2561 // Given a constant C that is not the minimum signed value
2562 // for an integer of a given bit width:
2563 //
2564 // ssubo X, C -> saddo X, -C
2565 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2566 Value *NegVal = ConstantExpr::getNeg(C);
2567 // Build a saddo call that is equivalent to the discovered
2568 // ssubo call.
2569 return replaceInstUsesWith(
2570 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2571 Arg0, NegVal));
2572 }
2573
2574 break;
2575 }
2576
2577 case Intrinsic::uadd_sat:
2578 case Intrinsic::sadd_sat:
2579 case Intrinsic::usub_sat:
2580 case Intrinsic::ssub_sat: {
2582 Type *Ty = SI->getType();
2583 Value *Arg0 = SI->getLHS();
2584 Value *Arg1 = SI->getRHS();
2585
2586 // Make use of known overflow information.
2587 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2588 Arg0, Arg1, SI);
2589 switch (OR) {
2591 break;
2593 if (SI->isSigned())
2594 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2595 else
2596 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2598 unsigned BitWidth = Ty->getScalarSizeInBits();
2599 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2600 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2601 }
2603 unsigned BitWidth = Ty->getScalarSizeInBits();
2604 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2605 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2606 }
2607 }
2608
2609 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2610 // which after that:
2611 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2612 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2613 Constant *C, *C1;
2614 Value *A;
2615 if (IID == Intrinsic::usub_sat &&
2616 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2617 match(Arg1, m_ImmConstant(C1))) {
2618 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2619 auto *NewSub =
2620 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2621 return replaceInstUsesWith(*SI, NewSub);
2622 }
2623
2624 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2625 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2626 C->isNotMinSignedValue()) {
2627 Value *NegVal = ConstantExpr::getNeg(C);
2628 return replaceInstUsesWith(
2629 *II, Builder.CreateBinaryIntrinsic(
2630 Intrinsic::sadd_sat, Arg0, NegVal));
2631 }
2632
2633 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2634 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2635 // if Val and Val2 have the same sign
2636 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2637 Value *X;
2638 const APInt *Val, *Val2;
2639 APInt NewVal;
2640 bool IsUnsigned =
2641 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2642 if (Other->getIntrinsicID() == IID &&
2643 match(Arg1, m_APInt(Val)) &&
2644 match(Other->getArgOperand(0), m_Value(X)) &&
2645 match(Other->getArgOperand(1), m_APInt(Val2))) {
2646 if (IsUnsigned)
2647 NewVal = Val->uadd_sat(*Val2);
2648 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2649 bool Overflow;
2650 NewVal = Val->sadd_ov(*Val2, Overflow);
2651 if (Overflow) {
2652 // Both adds together may add more than SignedMaxValue
2653 // without saturating the final result.
2654 break;
2655 }
2656 } else {
2657 // Cannot fold saturated addition with different signs.
2658 break;
2659 }
2660
2661 return replaceInstUsesWith(
2662 *II, Builder.CreateBinaryIntrinsic(
2663 IID, X, ConstantInt::get(II->getType(), NewVal)));
2664 }
2665 }
2666 break;
2667 }
2668
2669 case Intrinsic::minnum:
2670 case Intrinsic::maxnum:
2671 case Intrinsic::minimum:
2672 case Intrinsic::maximum: {
2673 Value *Arg0 = II->getArgOperand(0);
2674 Value *Arg1 = II->getArgOperand(1);
2675 Value *X, *Y;
2676 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2677 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2678 // If both operands are negated, invert the call and negate the result:
2679 // min(-X, -Y) --> -(max(X, Y))
2680 // max(-X, -Y) --> -(min(X, Y))
2681 Intrinsic::ID NewIID;
2682 switch (IID) {
2683 case Intrinsic::maxnum:
2684 NewIID = Intrinsic::minnum;
2685 break;
2686 case Intrinsic::minnum:
2687 NewIID = Intrinsic::maxnum;
2688 break;
2689 case Intrinsic::maximum:
2690 NewIID = Intrinsic::minimum;
2691 break;
2692 case Intrinsic::minimum:
2693 NewIID = Intrinsic::maximum;
2694 break;
2695 default:
2696 llvm_unreachable("unexpected intrinsic ID");
2697 }
2698 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2699 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2700 FNeg->copyIRFlags(II);
2701 return FNeg;
2702 }
2703
2704 // m(m(X, C2), C1) -> m(X, C)
2705 const APFloat *C1, *C2;
2706 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2707 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2708 ((match(M->getArgOperand(0), m_Value(X)) &&
2709 match(M->getArgOperand(1), m_APFloat(C2))) ||
2710 (match(M->getArgOperand(1), m_Value(X)) &&
2711 match(M->getArgOperand(0), m_APFloat(C2))))) {
2712 APFloat Res(0.0);
2713 switch (IID) {
2714 case Intrinsic::maxnum:
2715 Res = maxnum(*C1, *C2);
2716 break;
2717 case Intrinsic::minnum:
2718 Res = minnum(*C1, *C2);
2719 break;
2720 case Intrinsic::maximum:
2721 Res = maximum(*C1, *C2);
2722 break;
2723 case Intrinsic::minimum:
2724 Res = minimum(*C1, *C2);
2725 break;
2726 default:
2727 llvm_unreachable("unexpected intrinsic ID");
2728 }
2729 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2730 // was a simplification (so Arg0 and its original flags could
2731 // propagate?)
2732 Value *V = Builder.CreateBinaryIntrinsic(
2733 IID, X, ConstantFP::get(Arg0->getType(), Res),
2735 return replaceInstUsesWith(*II, V);
2736 }
2737 }
2738
2739 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2740 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2741 match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
2742 X->getType() == Y->getType()) {
2743 Value *NewCall =
2744 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2745 return new FPExtInst(NewCall, II->getType());
2746 }
2747
2748 // max X, -X --> fabs X
2749 // min X, -X --> -(fabs X)
2750 // TODO: Remove one-use limitation? That is obviously better for max,
2751 // hence why we don't check for one-use for that. However,
2752 // it would be an extra instruction for min (fnabs), but
2753 // that is still likely better for analysis and codegen.
2754 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2755 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2756 return Op0->hasOneUse() ||
2757 (IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2758 return false;
2759 };
2760
2761 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2762 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2763 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
2764 R = Builder.CreateFNegFMF(R, II);
2765 return replaceInstUsesWith(*II, R);
2766 }
2767
2768 break;
2769 }
2770 case Intrinsic::matrix_multiply: {
2771 // Optimize negation in matrix multiplication.
2772
2773 // -A * -B -> A * B
2774 Value *A, *B;
2775 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2776 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2777 replaceOperand(*II, 0, A);
2778 replaceOperand(*II, 1, B);
2779 return II;
2780 }
2781
2782 Value *Op0 = II->getOperand(0);
2783 Value *Op1 = II->getOperand(1);
2784 Value *OpNotNeg, *NegatedOp;
2785 unsigned NegatedOpArg, OtherOpArg;
2786 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2787 NegatedOp = Op0;
2788 NegatedOpArg = 0;
2789 OtherOpArg = 1;
2790 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2791 NegatedOp = Op1;
2792 NegatedOpArg = 1;
2793 OtherOpArg = 0;
2794 } else
2795 // Multiplication doesn't have a negated operand.
2796 break;
2797
2798 // Only optimize if the negated operand has only one use.
2799 if (!NegatedOp->hasOneUse())
2800 break;
2801
2802 Value *OtherOp = II->getOperand(OtherOpArg);
2803 VectorType *RetTy = cast<VectorType>(II->getType());
2804 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2805 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2806 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2807 ElementCount OtherCount = OtherOpTy->getElementCount();
2808 ElementCount RetCount = RetTy->getElementCount();
2809 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2810 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2811 ElementCount::isKnownLT(OtherCount, RetCount)) {
2812 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2813 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2814 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2815 return II;
2816 }
2817 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2818 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2819 SmallVector<Value *, 5> NewArgs(II->args());
2820 NewArgs[NegatedOpArg] = OpNotNeg;
2821 Instruction *NewMul =
2822 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
2823 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
2824 }
2825 break;
2826 }
2827 case Intrinsic::fmuladd: {
2828 // Try to simplify the underlying FMul.
2829 if (Value *V =
2830 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
2831 II->getFastMathFlags(), SQ.getWithInstruction(II)))
2832 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
2833 II->getFastMathFlags());
2834
2835 [[fallthrough]];
2836 }
2837 case Intrinsic::fma: {
2838 // fma fneg(x), fneg(y), z -> fma x, y, z
2839 Value *Src0 = II->getArgOperand(0);
2840 Value *Src1 = II->getArgOperand(1);
2841 Value *Src2 = II->getArgOperand(2);
2842 Value *X, *Y;
2843 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2844 replaceOperand(*II, 0, X);
2845 replaceOperand(*II, 1, Y);
2846 return II;
2847 }
2848
2849 // fma fabs(x), fabs(x), z -> fma x, x, z
2850 if (match(Src0, m_FAbs(m_Value(X))) &&
2851 match(Src1, m_FAbs(m_Specific(X)))) {
2852 replaceOperand(*II, 0, X);
2853 replaceOperand(*II, 1, X);
2854 return II;
2855 }
2856
2857 // Try to simplify the underlying FMul. We can only apply simplifications
2858 // that do not require rounding.
2859 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
2860 SQ.getWithInstruction(II)))
2861 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
2862
2863 // fma x, y, 0 -> fmul x, y
2864 // This is always valid for -0.0, but requires nsz for +0.0 as
2865 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2866 if (match(Src2, m_NegZeroFP()) ||
2867 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2868 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
2869
2870 // fma x, -1.0, y -> fsub y, x
2871 if (match(Src1, m_SpecificFP(-1.0)))
2872 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
2873
2874 break;
2875 }
2876 case Intrinsic::copysign: {
2877 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
2878 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2879 Sign, getSimplifyQuery().getWithInstruction(II))) {
2880 if (*KnownSignBit) {
2881 // If we know that the sign argument is negative, reduce to FNABS:
2882 // copysign Mag, -Sign --> fneg (fabs Mag)
2883 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2884 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
2885 }
2886
2887 // If we know that the sign argument is positive, reduce to FABS:
2888 // copysign Mag, +Sign --> fabs Mag
2889 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2890 return replaceInstUsesWith(*II, Fabs);
2891 }
2892
2893 // Propagate sign argument through nested calls:
2894 // copysign Mag, (copysign ?, X) --> copysign Mag, X
2895 Value *X;
2897 Value *CopySign =
2898 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
2899 return replaceInstUsesWith(*II, CopySign);
2900 }
2901
2902 // Clear sign-bit of constant magnitude:
2903 // copysign -MagC, X --> copysign MagC, X
2904 // TODO: Support constant folding for fabs
2905 const APFloat *MagC;
2906 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
2907 APFloat PosMagC = *MagC;
2908 PosMagC.clearSign();
2909 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
2910 }
2911
2912 // Peek through changes of magnitude's sign-bit. This call rewrites those:
2913 // copysign (fabs X), Sign --> copysign X, Sign
2914 // copysign (fneg X), Sign --> copysign X, Sign
2915 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
2916 return replaceOperand(*II, 0, X);
2917
2918 break;
2919 }
2920 case Intrinsic::fabs: {
2921 Value *Cond, *TVal, *FVal;
2922 Value *Arg = II->getArgOperand(0);
2923 Value *X;
2924 // fabs (-X) --> fabs (X)
2925 if (match(Arg, m_FNeg(m_Value(X)))) {
2926 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2927 return replaceInstUsesWith(CI, Fabs);
2928 }
2929
2930 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
2931 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2932 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
2933 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
2934 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
2935 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
2936 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
2937 FastMathFlags FMF1 = II->getFastMathFlags();
2938 FastMathFlags FMF2 = cast<SelectInst>(Arg)->getFastMathFlags();
2939 FMF2.setNoSignedZeros(false);
2940 SI->setFastMathFlags(FMF1 | FMF2);
2941 return SI;
2942 }
2943 // fabs (select Cond, -FVal, FVal) --> fabs FVal
2944 if (match(TVal, m_FNeg(m_Specific(FVal))))
2945 return replaceOperand(*II, 0, FVal);
2946 // fabs (select Cond, TVal, -TVal) --> fabs TVal
2947 if (match(FVal, m_FNeg(m_Specific(TVal))))
2948 return replaceOperand(*II, 0, TVal);
2949 }
2950
2951 Value *Magnitude, *Sign;
2952 if (match(II->getArgOperand(0),
2953 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
2954 // fabs (copysign x, y) -> (fabs x)
2955 CallInst *AbsSign =
2956 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
2957 return replaceInstUsesWith(*II, AbsSign);
2958 }
2959
2960 [[fallthrough]];
2961 }
2962 case Intrinsic::ceil:
2963 case Intrinsic::floor:
2964 case Intrinsic::round:
2965 case Intrinsic::roundeven:
2966 case Intrinsic::nearbyint:
2967 case Intrinsic::rint:
2968 case Intrinsic::trunc: {
2969 Value *ExtSrc;
2970 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2971 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2972 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
2973 return new FPExtInst(NarrowII, II->getType());
2974 }
2975 break;
2976 }
2977 case Intrinsic::cos:
2978 case Intrinsic::amdgcn_cos: {
2979 Value *X, *Sign;
2980 Value *Src = II->getArgOperand(0);
2981 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
2982 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
2983 // cos(-x) --> cos(x)
2984 // cos(fabs(x)) --> cos(x)
2985 // cos(copysign(x, y)) --> cos(x)
2986 return replaceOperand(*II, 0, X);
2987 }
2988 break;
2989 }
2990 case Intrinsic::sin:
2991 case Intrinsic::amdgcn_sin: {
2992 Value *X;
2993 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
2994 // sin(-x) --> -sin(x)
2995 Value *NewSin = Builder.CreateUnaryIntrinsic(IID, X, II);
2996 return UnaryOperator::CreateFNegFMF(NewSin, II);
2997 }
2998 break;
2999 }
3000 case Intrinsic::ldexp: {
3001 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3002 //
3003 // The danger is if the first ldexp would overflow to infinity or underflow
3004 // to zero, but the combined exponent avoids it. We ignore this with
3005 // reassoc.
3006 //
3007 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3008 // it would just double down on the overflow/underflow which would occur
3009 // anyway.
3010 //
3011 // TODO: Could do better if we had range tracking for the input value
3012 // exponent. Also could broaden sign check to cover == 0 case.
3013 Value *Src = II->getArgOperand(0);
3014 Value *Exp = II->getArgOperand(1);
3015 Value *InnerSrc;
3016 Value *InnerExp;
3018 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3019 Exp->getType() == InnerExp->getType()) {
3020 FastMathFlags FMF = II->getFastMathFlags();
3021 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3022
3023 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3024 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
3025 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3026 // width.
3027 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3028 II->setArgOperand(1, NewExp);
3029 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3030 return replaceOperand(*II, 0, InnerSrc);
3031 }
3032 }
3033
3034 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3035 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3036 Value *ExtSrc;
3037 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3038 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3039 Value *Select =
3040 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3041 ConstantFP::get(II->getType(), 1.0));
3043 }
3044 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3045 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3046 Value *Select =
3047 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3048 ConstantFP::get(II->getType(), 1.0));
3050 }
3051
3052 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3053 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3054 ///
3055 // TODO: If we cared, should insert a canonicalize for x
3056 Value *SelectCond, *SelectLHS, *SelectRHS;
3057 if (match(II->getArgOperand(1),
3058 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3059 m_Value(SelectRHS))))) {
3060 Value *NewLdexp = nullptr;
3061 Value *Select = nullptr;
3062 if (match(SelectRHS, m_ZeroInt())) {
3063 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3064 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3065 } else if (match(SelectLHS, m_ZeroInt())) {
3066 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3067 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3068 }
3069
3070 if (NewLdexp) {
3071 Select->takeName(II);
3072 return replaceInstUsesWith(*II, Select);
3073 }
3074 }
3075
3076 break;
3077 }
3078 case Intrinsic::ptrauth_auth:
3079 case Intrinsic::ptrauth_resign: {
3080 // We don't support this optimization on intrinsic calls with deactivation
3081 // symbols, which are represented using operand bundles.
3082 if (II->hasOperandBundles())
3083 break;
3084
3085 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3086 // sign+auth component if the key and discriminator match.
3087 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3088 Value *Ptr = II->getArgOperand(0);
3089 Value *Key = II->getArgOperand(1);
3090 Value *Disc = II->getArgOperand(2);
3091
3092 // AuthKey will be the key we need to end up authenticating against in
3093 // whatever we replace this sequence with.
3094 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3095 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3096 // We don't support this optimization on intrinsic calls with deactivation
3097 // symbols, which are represented using operand bundles.
3098 if (CI->hasOperandBundles())
3099 break;
3100
3101 BasePtr = CI->getArgOperand(0);
3102 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3103 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3104 break;
3105 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3106 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3107 break;
3108 AuthKey = CI->getArgOperand(1);
3109 AuthDisc = CI->getArgOperand(2);
3110 } else
3111 break;
3112 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3113 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3114 // our purposes, so check for that too.
3115 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3116 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3117 break;
3118
3119 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3120 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3121 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3122 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3123 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3124 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3125 SignDisc, /*AddrDisc=*/Null,
3126 /*DeactivationSymbol=*/Null);
3128 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3129 return eraseInstFromFunction(*II);
3130 }
3131
3132 // auth(ptrauth(p,k,d),k,d) -> p
3133 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3134 } else
3135 break;
3136
3137 unsigned NewIntrin;
3138 if (AuthKey && NeedSign) {
3139 // resign(0,1) + resign(1,2) = resign(0, 2)
3140 NewIntrin = Intrinsic::ptrauth_resign;
3141 } else if (AuthKey) {
3142 // resign(0,1) + auth(1) = auth(0)
3143 NewIntrin = Intrinsic::ptrauth_auth;
3144 } else if (NeedSign) {
3145 // sign(0) + resign(0, 1) = sign(1)
3146 NewIntrin = Intrinsic::ptrauth_sign;
3147 } else {
3148 // sign(0) + auth(0) = nop
3149 replaceInstUsesWith(*II, BasePtr);
3150 return eraseInstFromFunction(*II);
3151 }
3152
3153 SmallVector<Value *, 4> CallArgs;
3154 CallArgs.push_back(BasePtr);
3155 if (AuthKey) {
3156 CallArgs.push_back(AuthKey);
3157 CallArgs.push_back(AuthDisc);
3158 }
3159
3160 if (NeedSign) {
3161 CallArgs.push_back(II->getArgOperand(3));
3162 CallArgs.push_back(II->getArgOperand(4));
3163 }
3164
3165 Function *NewFn =
3166 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3167 return CallInst::Create(NewFn, CallArgs);
3168 }
3169 case Intrinsic::arm_neon_vtbl1:
3170 case Intrinsic::aarch64_neon_tbl1:
3171 if (Value *V = simplifyNeonTbl1(*II, Builder))
3172 return replaceInstUsesWith(*II, V);
3173 break;
3174
3175 case Intrinsic::arm_neon_vmulls:
3176 case Intrinsic::arm_neon_vmullu:
3177 case Intrinsic::aarch64_neon_smull:
3178 case Intrinsic::aarch64_neon_umull: {
3179 Value *Arg0 = II->getArgOperand(0);
3180 Value *Arg1 = II->getArgOperand(1);
3181
3182 // Handle mul by zero first:
3184 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3185 }
3186
3187 // Check for constant LHS & RHS - in this case we just simplify.
3188 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3189 IID == Intrinsic::aarch64_neon_umull);
3190 VectorType *NewVT = cast<VectorType>(II->getType());
3191 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3192 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3193 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3194 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3195 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3196 }
3197
3198 // Couldn't simplify - canonicalize constant to the RHS.
3199 std::swap(Arg0, Arg1);
3200 }
3201
3202 // Handle mul by one:
3203 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3204 if (ConstantInt *Splat =
3205 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3206 if (Splat->isOne())
3207 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3208 /*isSigned=*/!Zext);
3209
3210 break;
3211 }
3212 case Intrinsic::arm_neon_aesd:
3213 case Intrinsic::arm_neon_aese:
3214 case Intrinsic::aarch64_crypto_aesd:
3215 case Intrinsic::aarch64_crypto_aese:
3216 case Intrinsic::aarch64_sve_aesd:
3217 case Intrinsic::aarch64_sve_aese: {
3218 Value *DataArg = II->getArgOperand(0);
3219 Value *KeyArg = II->getArgOperand(1);
3220
3221 // Accept zero on either operand.
3222 if (!match(KeyArg, m_ZeroInt()))
3223 std::swap(KeyArg, DataArg);
3224
3225 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3226 Value *Data, *Key;
3227 if (match(KeyArg, m_ZeroInt()) &&
3228 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3229 replaceOperand(*II, 0, Data);
3230 replaceOperand(*II, 1, Key);
3231 return II;
3232 }
3233 break;
3234 }
3235 case Intrinsic::hexagon_V6_vandvrt:
3236 case Intrinsic::hexagon_V6_vandvrt_128B: {
3237 // Simplify Q -> V -> Q conversion.
3238 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3239 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3240 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3241 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3242 break;
3243 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3244 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3245 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3246 // Check if every byte has common bits in Bytes and Mask.
3247 uint64_t C = Bytes1 & Mask1;
3248 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3249 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3250 }
3251 break;
3252 }
3253 case Intrinsic::stackrestore: {
3254 enum class ClassifyResult {
3255 None,
3256 Alloca,
3257 StackRestore,
3258 CallWithSideEffects,
3259 };
3260 auto Classify = [](const Instruction *I) {
3261 if (isa<AllocaInst>(I))
3262 return ClassifyResult::Alloca;
3263
3264 if (auto *CI = dyn_cast<CallInst>(I)) {
3265 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3266 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3267 return ClassifyResult::StackRestore;
3268
3269 if (II->mayHaveSideEffects())
3270 return ClassifyResult::CallWithSideEffects;
3271 } else {
3272 // Consider all non-intrinsic calls to be side effects
3273 return ClassifyResult::CallWithSideEffects;
3274 }
3275 }
3276
3277 return ClassifyResult::None;
3278 };
3279
3280 // If the stacksave and the stackrestore are in the same BB, and there is
3281 // no intervening call, alloca, or stackrestore of a different stacksave,
3282 // remove the restore. This can happen when variable allocas are DCE'd.
3283 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3284 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3285 SS->getParent() == II->getParent()) {
3286 BasicBlock::iterator BI(SS);
3287 bool CannotRemove = false;
3288 for (++BI; &*BI != II; ++BI) {
3289 switch (Classify(&*BI)) {
3290 case ClassifyResult::None:
3291 // So far so good, look at next instructions.
3292 break;
3293
3294 case ClassifyResult::StackRestore:
3295 // If we found an intervening stackrestore for a different
3296 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3297 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3298 CannotRemove = true;
3299 break;
3300
3301 case ClassifyResult::Alloca:
3302 case ClassifyResult::CallWithSideEffects:
3303 // If we found an alloca, a non-intrinsic call, or an intrinsic
3304 // call with side effects, we can't remove the stackrestore.
3305 CannotRemove = true;
3306 break;
3307 }
3308 if (CannotRemove)
3309 break;
3310 }
3311
3312 if (!CannotRemove)
3313 return eraseInstFromFunction(CI);
3314 }
3315 }
3316
3317 // Scan down this block to see if there is another stack restore in the
3318 // same block without an intervening call/alloca.
3320 Instruction *TI = II->getParent()->getTerminator();
3321 bool CannotRemove = false;
3322 for (++BI; &*BI != TI; ++BI) {
3323 switch (Classify(&*BI)) {
3324 case ClassifyResult::None:
3325 // So far so good, look at next instructions.
3326 break;
3327
3328 case ClassifyResult::StackRestore:
3329 // If there is a stackrestore below this one, remove this one.
3330 return eraseInstFromFunction(CI);
3331
3332 case ClassifyResult::Alloca:
3333 case ClassifyResult::CallWithSideEffects:
3334 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3335 // with side effects (such as llvm.stacksave and llvm.read_register),
3336 // we can't remove the stack restore.
3337 CannotRemove = true;
3338 break;
3339 }
3340 if (CannotRemove)
3341 break;
3342 }
3343
3344 // If the stack restore is in a return, resume, or unwind block and if there
3345 // are no allocas or calls between the restore and the return, nuke the
3346 // restore.
3347 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3348 return eraseInstFromFunction(CI);
3349 break;
3350 }
3351 case Intrinsic::lifetime_end:
3352 // Asan needs to poison memory to detect invalid access which is possible
3353 // even for empty lifetime range.
3354 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3355 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3356 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3357 break;
3358
3359 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3360 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3361 }))
3362 return nullptr;
3363 break;
3364 case Intrinsic::assume: {
3365 Value *IIOperand = II->getArgOperand(0);
3367 II->getOperandBundlesAsDefs(OpBundles);
3368
3369 /// This will remove the boolean Condition from the assume given as
3370 /// argument and remove the assume if it becomes useless.
3371 /// always returns nullptr for use as a return values.
3372 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3373 assert(isa<AssumeInst>(Assume));
3375 return eraseInstFromFunction(CI);
3376 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3377 return nullptr;
3378 };
3379 // Remove an assume if it is followed by an identical assume.
3380 // TODO: Do we need this? Unless there are conflicting assumptions, the
3381 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3382 Instruction *Next = II->getNextNode();
3384 return RemoveConditionFromAssume(Next);
3385
3386 // Canonicalize assume(a && b) -> assume(a); assume(b);
3387 // Note: New assumption intrinsics created here are registered by
3388 // the InstCombineIRInserter object.
3389 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3390 Value *AssumeIntrinsic = II->getCalledOperand();
3391 Value *A, *B;
3392 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3393 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3394 II->getName());
3395 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3396 return eraseInstFromFunction(*II);
3397 }
3398 // assume(!(a || b)) -> assume(!a); assume(!b);
3399 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3400 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3401 Builder.CreateNot(A), OpBundles, II->getName());
3402 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3403 Builder.CreateNot(B), II->getName());
3404 return eraseInstFromFunction(*II);
3405 }
3406
3407 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3408 // (if assume is valid at the load)
3409 Instruction *LHS;
3411 m_Zero())) &&
3412 LHS->getOpcode() == Instruction::Load &&
3413 LHS->getType()->isPointerTy() &&
3414 isValidAssumeForContext(II, LHS, &DT)) {
3415 MDNode *MD = MDNode::get(II->getContext(), {});
3416 LHS->setMetadata(LLVMContext::MD_nonnull, MD);
3417 LHS->setMetadata(LLVMContext::MD_noundef, MD);
3418 return RemoveConditionFromAssume(II);
3419
3420 // TODO: apply nonnull return attributes to calls and invokes
3421 // TODO: apply range metadata for range check patterns?
3422 }
3423
3424 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3425 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3426
3427 // Separate storage assumptions apply to the underlying allocations, not
3428 // any particular pointer within them. When evaluating the hints for AA
3429 // purposes we getUnderlyingObject them; by precomputing the answers here
3430 // we can avoid having to do so repeatedly there.
3431 if (OBU.getTagName() == "separate_storage") {
3432 assert(OBU.Inputs.size() == 2);
3433 auto MaybeSimplifyHint = [&](const Use &U) {
3434 Value *Hint = U.get();
3435 // Not having a limit is safe because InstCombine removes unreachable
3436 // code.
3437 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3438 if (Hint != UnderlyingObject)
3439 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3440 };
3441 MaybeSimplifyHint(OBU.Inputs[0]);
3442 MaybeSimplifyHint(OBU.Inputs[1]);
3443 }
3444
3445 // Try to remove redundant alignment assumptions.
3446 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3448 *cast<AssumeInst>(II), II->arg_size() + Idx);
3449 if (!RK || RK.AttrKind != Attribute::Alignment ||
3451 continue;
3452
3453 // Remove align 1 bundles; they don't add any useful information.
3454 if (RK.ArgValue == 1)
3456
3457 // Don't try to remove align assumptions for pointers derived from
3458 // arguments. We might lose information if the function gets inline and
3459 // the align argument attribute disappears.
3461 if (!UO || isa<Argument>(UO))
3462 continue;
3463
3464 // Compute known bits for the pointer, passing nullptr as context to
3465 // avoid computeKnownBits using the assumption we are about to remove
3466 // for reasoning.
3467 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3468 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3470 if ((1ULL << TZ) < RK.ArgValue)
3471 continue;
3473 }
3474 }
3475
3476 // Convert nonnull assume like:
3477 // %A = icmp ne i32* %PTR, null
3478 // call void @llvm.assume(i1 %A)
3479 // into
3480 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3482 match(IIOperand,
3484 A->getType()->isPointerTy()) {
3485 if (auto *Replacement = buildAssumeFromKnowledge(
3486 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3487
3488 Replacement->insertBefore(Next->getIterator());
3489 AC.registerAssumption(Replacement);
3490 return RemoveConditionFromAssume(II);
3491 }
3492 }
3493
3494 // Convert alignment assume like:
3495 // %B = ptrtoint i32* %A to i64
3496 // %C = and i64 %B, Constant
3497 // %D = icmp eq i64 %C, 0
3498 // call void @llvm.assume(i1 %D)
3499 // into
3500 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3501 uint64_t AlignMask = 1;
3503 (match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3504 match(IIOperand,
3506 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3507 m_Zero())))) {
3508 if (isPowerOf2_64(AlignMask + 1)) {
3509 uint64_t Offset = 0;
3511 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3512 /// Note: this doesn't preserve the offset information but merges
3513 /// offset and alignment.
3514 /// TODO: we can generate a GEP instead of merging the alignment with
3515 /// the offset.
3516 RetainedKnowledge RK{Attribute::Alignment,
3517 (unsigned)MinAlign(Offset, AlignMask + 1), A};
3518 if (auto *Replacement =
3520
3521 Replacement->insertAfter(II->getIterator());
3522 AC.registerAssumption(Replacement);
3523 }
3524 return RemoveConditionFromAssume(II);
3525 }
3526 }
3527 }
3528
3529 /// Canonicalize Knowledge in operand bundles.
3530 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3531 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3532 auto &BOI = II->bundle_op_info_begin()[Idx];
3535 if (BOI.End - BOI.Begin > 2)
3536 continue; // Prevent reducing knowledge in an align with offset since
3537 // extracting a RetainedKnowledge from them looses offset
3538 // information
3539 RetainedKnowledge CanonRK =
3542 &getDominatorTree());
3543 if (CanonRK == RK)
3544 continue;
3545 if (!CanonRK) {
3546 if (BOI.End - BOI.Begin > 0) {
3547 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3548 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3549 }
3550 continue;
3551 }
3552 assert(RK.AttrKind == CanonRK.AttrKind);
3553 if (BOI.End - BOI.Begin > 0)
3554 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3555 if (BOI.End - BOI.Begin > 1)
3556 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3557 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3558 if (RK.WasOn)
3559 Worklist.pushValue(RK.WasOn);
3560 return II;
3561 }
3562 }
3563
3564 // If there is a dominating assume with the same condition as this one,
3565 // then this one is redundant, and should be removed.
3566 KnownBits Known(1);
3567 computeKnownBits(IIOperand, Known, II);
3569 return eraseInstFromFunction(*II);
3570
3571 // assume(false) is unreachable.
3572 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3574 return eraseInstFromFunction(*II);
3575 }
3576
3577 // Update the cache of affected values for this assumption (we might be
3578 // here because we just simplified the condition).
3579 AC.updateAffectedValues(cast<AssumeInst>(II));
3580 break;
3581 }
3582 case Intrinsic::experimental_guard: {
3583 // Is this guard followed by another guard? We scan forward over a small
3584 // fixed window of instructions to handle common cases with conditions
3585 // computed between guards.
3586 Instruction *NextInst = II->getNextNode();
3587 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3588 // Note: Using context-free form to avoid compile time blow up
3589 if (!isSafeToSpeculativelyExecute(NextInst))
3590 break;
3591 NextInst = NextInst->getNextNode();
3592 }
3593 Value *NextCond = nullptr;
3594 if (match(NextInst,
3596 Value *CurrCond = II->getArgOperand(0);
3597
3598 // Remove a guard that it is immediately preceded by an identical guard.
3599 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3600 if (CurrCond != NextCond) {
3601 Instruction *MoveI = II->getNextNode();
3602 while (MoveI != NextInst) {
3603 auto *Temp = MoveI;
3604 MoveI = MoveI->getNextNode();
3605 Temp->moveBefore(II->getIterator());
3606 }
3607 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3608 }
3609 eraseInstFromFunction(*NextInst);
3610 return II;
3611 }
3612 break;
3613 }
3614 case Intrinsic::vector_insert: {
3615 Value *Vec = II->getArgOperand(0);
3616 Value *SubVec = II->getArgOperand(1);
3617 Value *Idx = II->getArgOperand(2);
3618 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3619 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3620 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3621
3622 // Only canonicalize if the destination vector, Vec, and SubVec are all
3623 // fixed vectors.
3624 if (DstTy && VecTy && SubVecTy) {
3625 unsigned DstNumElts = DstTy->getNumElements();
3626 unsigned VecNumElts = VecTy->getNumElements();
3627 unsigned SubVecNumElts = SubVecTy->getNumElements();
3628 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3629
3630 // An insert that entirely overwrites Vec with SubVec is a nop.
3631 if (VecNumElts == SubVecNumElts)
3632 return replaceInstUsesWith(CI, SubVec);
3633
3634 // Widen SubVec into a vector of the same width as Vec, since
3635 // shufflevector requires the two input vectors to be the same width.
3636 // Elements beyond the bounds of SubVec within the widened vector are
3637 // undefined.
3638 SmallVector<int, 8> WidenMask;
3639 unsigned i;
3640 for (i = 0; i != SubVecNumElts; ++i)
3641 WidenMask.push_back(i);
3642 for (; i != VecNumElts; ++i)
3643 WidenMask.push_back(PoisonMaskElem);
3644
3645 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3646
3648 for (unsigned i = 0; i != IdxN; ++i)
3649 Mask.push_back(i);
3650 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3651 Mask.push_back(i);
3652 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3653 Mask.push_back(i);
3654
3655 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3656 return replaceInstUsesWith(CI, Shuffle);
3657 }
3658 break;
3659 }
3660 case Intrinsic::vector_extract: {
3661 Value *Vec = II->getArgOperand(0);
3662 Value *Idx = II->getArgOperand(1);
3663
3664 Type *ReturnType = II->getType();
3665 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3666 // ExtractIdx)
3667 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3668 Value *InsertTuple, *InsertIdx, *InsertValue;
3670 m_Value(InsertValue),
3671 m_Value(InsertIdx))) &&
3672 InsertValue->getType() == ReturnType) {
3673 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3674 // Case where we get the same index right after setting it.
3675 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3676 // InsertValue
3677 if (ExtractIdx == Index)
3678 return replaceInstUsesWith(CI, InsertValue);
3679 // If we are getting a different index than what was set in the
3680 // insert.vector intrinsic. We can just set the input tuple to the one up
3681 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3682 // InsertIndex), ExtractIndex)
3683 // --> extract.vector(InsertTuple, ExtractIndex)
3684 else
3685 return replaceOperand(CI, 0, InsertTuple);
3686 }
3687
3688 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3689 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3690
3691 if (DstTy && VecTy) {
3692 auto DstEltCnt = DstTy->getElementCount();
3693 auto VecEltCnt = VecTy->getElementCount();
3694 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3695
3696 // Extracting the entirety of Vec is a nop.
3697 if (DstEltCnt == VecTy->getElementCount()) {
3698 replaceInstUsesWith(CI, Vec);
3699 return eraseInstFromFunction(CI);
3700 }
3701
3702 // Only canonicalize to shufflevector if the destination vector and
3703 // Vec are fixed vectors.
3704 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3705 break;
3706
3708 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3709 Mask.push_back(IdxN + i);
3710
3711 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3712 return replaceInstUsesWith(CI, Shuffle);
3713 }
3714 break;
3715 }
3716 case Intrinsic::experimental_vp_reverse: {
3717 Value *X;
3718 Value *Vec = II->getArgOperand(0);
3719 Value *Mask = II->getArgOperand(1);
3720 if (!match(Mask, m_AllOnes()))
3721 break;
3722 Value *EVL = II->getArgOperand(2);
3723 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3724 // rev(unop rev(X)) --> unop X
3725 if (match(Vec,
3727 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3728 auto *OldUnOp = cast<UnaryOperator>(Vec);
3730 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3731 II->getIterator());
3732 return replaceInstUsesWith(CI, NewUnOp);
3733 }
3734 break;
3735 }
3736 case Intrinsic::vector_reduce_or:
3737 case Intrinsic::vector_reduce_and: {
3738 // Canonicalize logical or/and reductions:
3739 // Or reduction for i1 is represented as:
3740 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3741 // %res = cmp ne iReduxWidth %val, 0
3742 // And reduction for i1 is represented as:
3743 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3744 // %res = cmp eq iReduxWidth %val, 11111
3745 Value *Arg = II->getArgOperand(0);
3746 Value *Vect;
3747
3748 if (Value *NewOp =
3749 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3750 replaceUse(II->getOperandUse(0), NewOp);
3751 return II;
3752 }
3753
3754 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3755 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3756 if (FTy->getElementType() == Builder.getInt1Ty()) {
3757 Value *Res = Builder.CreateBitCast(
3758 Vect, Builder.getIntNTy(FTy->getNumElements()));
3759 if (IID == Intrinsic::vector_reduce_and) {
3760 Res = Builder.CreateICmpEQ(
3762 } else {
3763 assert(IID == Intrinsic::vector_reduce_or &&
3764 "Expected or reduction.");
3765 Res = Builder.CreateIsNotNull(Res);
3766 }
3767 if (Arg != Vect)
3768 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3769 II->getType());
3770 return replaceInstUsesWith(CI, Res);
3771 }
3772 }
3773 [[fallthrough]];
3774 }
3775 case Intrinsic::vector_reduce_add: {
3776 if (IID == Intrinsic::vector_reduce_add) {
3777 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3778 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3779 // Convert vector_reduce_add(SExt(<n x i1>)) to
3780 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3781 // Convert vector_reduce_add(<n x i1>) to
3782 // Trunc(ctpop(bitcast <n x i1> to in)).
3783 Value *Arg = II->getArgOperand(0);
3784 Value *Vect;
3785
3786 if (Value *NewOp =
3787 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3788 replaceUse(II->getOperandUse(0), NewOp);
3789 return II;
3790 }
3791
3792 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3793 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3794 if (FTy->getElementType() == Builder.getInt1Ty()) {
3795 Value *V = Builder.CreateBitCast(
3796 Vect, Builder.getIntNTy(FTy->getNumElements()));
3797 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3798 if (Res->getType() != II->getType())
3799 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
3800 if (Arg != Vect &&
3801 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
3802 Res = Builder.CreateNeg(Res);
3803 return replaceInstUsesWith(CI, Res);
3804 }
3805 }
3806
3807 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
3808 if (Value *Splat = getSplatValue(Arg)) {
3809 ElementCount VecToReduceCount =
3810 cast<VectorType>(Arg->getType())->getElementCount();
3811 if (VecToReduceCount.isFixed()) {
3812 unsigned VectorSize = VecToReduceCount.getFixedValue();
3813 return BinaryOperator::CreateMul(
3814 Splat,
3815 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
3816 /*ImplicitTrunc=*/true));
3817 }
3818 }
3819 }
3820 [[fallthrough]];
3821 }
3822 case Intrinsic::vector_reduce_xor: {
3823 if (IID == Intrinsic::vector_reduce_xor) {
3824 // Exclusive disjunction reduction over the vector with
3825 // (potentially-extended) i1 element type is actually a
3826 // (potentially-extended) arithmetic `add` reduction over the original
3827 // non-extended value:
3828 // vector_reduce_xor(?ext(<n x i1>))
3829 // -->
3830 // ?ext(vector_reduce_add(<n x i1>))
3831 Value *Arg = II->getArgOperand(0);
3832 Value *Vect;
3833
3834 if (Value *NewOp =
3835 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3836 replaceUse(II->getOperandUse(0), NewOp);
3837 return II;
3838 }
3839
3840 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3841 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3842 if (VTy->getElementType() == Builder.getInt1Ty()) {
3843 Value *Res = Builder.CreateAddReduce(Vect);
3844 if (Arg != Vect)
3845 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3846 II->getType());
3847 return replaceInstUsesWith(CI, Res);
3848 }
3849 }
3850 }
3851 [[fallthrough]];
3852 }
3853 case Intrinsic::vector_reduce_mul: {
3854 if (IID == Intrinsic::vector_reduce_mul) {
3855 // Multiplicative reduction over the vector with (potentially-extended)
3856 // i1 element type is actually a (potentially zero-extended)
3857 // logical `and` reduction over the original non-extended value:
3858 // vector_reduce_mul(?ext(<n x i1>))
3859 // -->
3860 // zext(vector_reduce_and(<n x i1>))
3861 Value *Arg = II->getArgOperand(0);
3862 Value *Vect;
3863
3864 if (Value *NewOp =
3865 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3866 replaceUse(II->getOperandUse(0), NewOp);
3867 return II;
3868 }
3869
3870 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3871 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3872 if (VTy->getElementType() == Builder.getInt1Ty()) {
3873 Value *Res = Builder.CreateAndReduce(Vect);
3874 if (Res->getType() != II->getType())
3875 Res = Builder.CreateZExt(Res, II->getType());
3876 return replaceInstUsesWith(CI, Res);
3877 }
3878 }
3879 }
3880 [[fallthrough]];
3881 }
3882 case Intrinsic::vector_reduce_umin:
3883 case Intrinsic::vector_reduce_umax: {
3884 if (IID == Intrinsic::vector_reduce_umin ||
3885 IID == Intrinsic::vector_reduce_umax) {
3886 // UMin/UMax reduction over the vector with (potentially-extended)
3887 // i1 element type is actually a (potentially-extended)
3888 // logical `and`/`or` reduction over the original non-extended value:
3889 // vector_reduce_u{min,max}(?ext(<n x i1>))
3890 // -->
3891 // ?ext(vector_reduce_{and,or}(<n x i1>))
3892 Value *Arg = II->getArgOperand(0);
3893 Value *Vect;
3894
3895 if (Value *NewOp =
3896 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3897 replaceUse(II->getOperandUse(0), NewOp);
3898 return II;
3899 }
3900
3901 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3902 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3903 if (VTy->getElementType() == Builder.getInt1Ty()) {
3904 Value *Res = IID == Intrinsic::vector_reduce_umin
3905 ? Builder.CreateAndReduce(Vect)
3906 : Builder.CreateOrReduce(Vect);
3907 if (Arg != Vect)
3908 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3909 II->getType());
3910 return replaceInstUsesWith(CI, Res);
3911 }
3912 }
3913 }
3914 [[fallthrough]];
3915 }
3916 case Intrinsic::vector_reduce_smin:
3917 case Intrinsic::vector_reduce_smax: {
3918 if (IID == Intrinsic::vector_reduce_smin ||
3919 IID == Intrinsic::vector_reduce_smax) {
3920 // SMin/SMax reduction over the vector with (potentially-extended)
3921 // i1 element type is actually a (potentially-extended)
3922 // logical `and`/`or` reduction over the original non-extended value:
3923 // vector_reduce_s{min,max}(<n x i1>)
3924 // -->
3925 // vector_reduce_{or,and}(<n x i1>)
3926 // and
3927 // vector_reduce_s{min,max}(sext(<n x i1>))
3928 // -->
3929 // sext(vector_reduce_{or,and}(<n x i1>))
3930 // and
3931 // vector_reduce_s{min,max}(zext(<n x i1>))
3932 // -->
3933 // zext(vector_reduce_{and,or}(<n x i1>))
3934 Value *Arg = II->getArgOperand(0);
3935 Value *Vect;
3936
3937 if (Value *NewOp =
3938 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3939 replaceUse(II->getOperandUse(0), NewOp);
3940 return II;
3941 }
3942
3943 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3944 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3945 if (VTy->getElementType() == Builder.getInt1Ty()) {
3946 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3947 if (Arg != Vect)
3948 ExtOpc = cast<CastInst>(Arg)->getOpcode();
3949 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3950 (ExtOpc == Instruction::CastOps::ZExt))
3951 ? Builder.CreateAndReduce(Vect)
3952 : Builder.CreateOrReduce(Vect);
3953 if (Arg != Vect)
3954 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
3955 return replaceInstUsesWith(CI, Res);
3956 }
3957 }
3958 }
3959 [[fallthrough]];
3960 }
3961 case Intrinsic::vector_reduce_fmax:
3962 case Intrinsic::vector_reduce_fmin:
3963 case Intrinsic::vector_reduce_fadd:
3964 case Intrinsic::vector_reduce_fmul: {
3965 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
3966 IID != Intrinsic::vector_reduce_fmul) ||
3967 II->hasAllowReassoc();
3968 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
3969 IID == Intrinsic::vector_reduce_fmul)
3970 ? 1
3971 : 0;
3972 Value *Arg = II->getArgOperand(ArgIdx);
3973 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
3974 replaceUse(II->getOperandUse(ArgIdx), NewOp);
3975 return nullptr;
3976 }
3977 break;
3978 }
3979 case Intrinsic::is_fpclass: {
3980 if (Instruction *I = foldIntrinsicIsFPClass(*II))
3981 return I;
3982 break;
3983 }
3984 case Intrinsic::threadlocal_address: {
3985 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3986 MaybeAlign Align = II->getRetAlign();
3987 if (MinAlign > Align.valueOrOne()) {
3988 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
3989 return II;
3990 }
3991 break;
3992 }
3993 case Intrinsic::frexp: {
3994 Value *X;
3995 // The first result is idempotent with the added complication of the struct
3996 // return, and the second result is zero because the value is already
3997 // normalized.
3998 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4000 X = Builder.CreateInsertValue(
4001 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4002 1);
4003 return replaceInstUsesWith(*II, X);
4004 }
4005 }
4006 break;
4007 }
4008 case Intrinsic::get_active_lane_mask: {
4009 const APInt *Op0, *Op1;
4010 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4011 match(II->getOperand(1), m_APInt(Op1))) {
4012 Type *OpTy = II->getOperand(0)->getType();
4013 return replaceInstUsesWith(
4014 *II, Builder.CreateIntrinsic(
4015 II->getType(), Intrinsic::get_active_lane_mask,
4016 {Constant::getNullValue(OpTy),
4017 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4018 }
4019 break;
4020 }
4021 case Intrinsic::experimental_get_vector_length: {
4022 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4023 unsigned BitWidth =
4024 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4025 II->getType()->getScalarSizeInBits());
4026 ConstantRange Cnt =
4027 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4028 SQ.getWithInstruction(II))
4030 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4031 ->getValue()
4032 .zextOrTrunc(Cnt.getBitWidth());
4033 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4034 MaxLanes = MaxLanes.multiply(
4035 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4036
4037 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4038 return replaceInstUsesWith(
4039 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4040 return nullptr;
4041 }
4042 default: {
4043 // Handle target specific intrinsics
4044 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4045 if (V)
4046 return *V;
4047 break;
4048 }
4049 }
4050
4051 // Try to fold intrinsic into select/phi operands. This is legal if:
4052 // * The intrinsic is speculatable.
4053 // * The operand is one of the following:
4054 // - a phi.
4055 // - a select with a scalar condition.
4056 // - a select with a vector condition and II is not a cross lane operation.
4058 for (Value *Op : II->args()) {
4059 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4060 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4061 if (IsVectorCond && !isNotCrossLaneOperation(II))
4062 continue;
4063 // Don't replace a scalar select with a more expensive vector select if
4064 // we can't simplify both arms of the select.
4065 bool SimplifyBothArms =
4066 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4068 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4069 return R;
4070 }
4071 if (auto *Phi = dyn_cast<PHINode>(Op))
4072 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4073 return R;
4074 }
4075 }
4076
4078 return Shuf;
4079
4081 return replaceInstUsesWith(*II, Reverse);
4082
4084 return replaceInstUsesWith(*II, Res);
4085
4086 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4087 // context, so it is handled in visitCallBase and we should trigger it.
4088 return visitCallBase(*II);
4089}
4090
4091// Fence instruction simplification
4093 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4094 // This check is solely here to handle arbitrary target-dependent syncscopes.
4095 // TODO: Can remove if does not matter in practice.
4096 if (NFI && FI.isIdenticalTo(NFI))
4097 return eraseInstFromFunction(FI);
4098
4099 // Returns true if FI1 is identical or stronger fence than FI2.
4100 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4101 auto FI1SyncScope = FI1->getSyncScopeID();
4102 // Consider same scope, where scope is global or single-thread.
4103 if (FI1SyncScope != FI2->getSyncScopeID() ||
4104 (FI1SyncScope != SyncScope::System &&
4105 FI1SyncScope != SyncScope::SingleThread))
4106 return false;
4107
4108 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4109 };
4110 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4111 return eraseInstFromFunction(FI);
4112
4113 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4114 if (isIdenticalOrStrongerFence(PFI, &FI))
4115 return eraseInstFromFunction(FI);
4116 return nullptr;
4117}
4118
4119// InvokeInst simplification
4121 return visitCallBase(II);
4122}
4123
4124// CallBrInst simplification
4126 return visitCallBase(CBI);
4127}
4128
4130 if (!CI->hasFnAttr("modular-format"))
4131 return nullptr;
4132
4134 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4135 // TODO: Make use of the first two arguments
4136 unsigned FirstArgIdx;
4137 [[maybe_unused]] bool Error;
4138 Error = Args[2].getAsInteger(10, FirstArgIdx);
4139 assert(!Error && "invalid first arg index");
4140 --FirstArgIdx;
4141 StringRef FnName = Args[3];
4142 StringRef ImplName = Args[4];
4144
4145 if (AllAspects.empty())
4146 return nullptr;
4147
4148 SmallVector<StringRef> NeededAspects;
4149 for (StringRef Aspect : AllAspects) {
4150 if (Aspect == "float") {
4151 if (llvm::any_of(
4152 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4153 CI->arg_end()),
4154 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4155 NeededAspects.push_back("float");
4156 } else {
4157 // Unknown aspects are always considered to be needed.
4158 NeededAspects.push_back(Aspect);
4159 }
4160 }
4161
4162 if (NeededAspects.size() == AllAspects.size())
4163 return nullptr;
4164
4165 Module *M = CI->getModule();
4166 LLVMContext &Ctx = M->getContext();
4167 Function *Callee = CI->getCalledFunction();
4168 FunctionCallee ModularFn = M->getOrInsertFunction(
4169 FnName, Callee->getFunctionType(),
4170 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4171 CallInst *New = cast<CallInst>(CI->clone());
4172 New->setCalledFunction(ModularFn);
4173 New->removeFnAttr("modular-format");
4174 B.Insert(New);
4175
4176 const auto ReferenceAspect = [&](StringRef Aspect) {
4177 SmallString<20> Name = ImplName;
4178 Name += '_';
4179 Name += Aspect;
4180 Function *RelocNoneFn =
4181 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4182 B.CreateCall(RelocNoneFn,
4183 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4184 };
4185
4186 llvm::sort(NeededAspects);
4187 for (StringRef Request : NeededAspects)
4188 ReferenceAspect(Request);
4189
4190 return New;
4191}
4192
4193Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4194 if (!CI->getCalledFunction()) return nullptr;
4195
4196 // Skip optimizing notail and musttail calls so
4197 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4198 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4199 if (CI->isMustTailCall() || CI->isNoTailCall())
4200 return nullptr;
4201
4202 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4203 replaceInstUsesWith(*From, With);
4204 };
4205 auto InstCombineErase = [this](Instruction *I) {
4207 };
4208 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4209 InstCombineRAUW, InstCombineErase);
4210 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4211 ++NumSimplified;
4212 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4213 }
4214 if (Value *With = optimizeModularFormat(CI, Builder)) {
4215 ++NumSimplified;
4216 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4217 }
4218
4219 return nullptr;
4220}
4221
4223 // Strip off at most one level of pointer casts, looking for an alloca. This
4224 // is good enough in practice and simpler than handling any number of casts.
4225 Value *Underlying = TrampMem->stripPointerCasts();
4226 if (Underlying != TrampMem &&
4227 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4228 return nullptr;
4229 if (!isa<AllocaInst>(Underlying))
4230 return nullptr;
4231
4232 IntrinsicInst *InitTrampoline = nullptr;
4233 for (User *U : TrampMem->users()) {
4235 if (!II)
4236 return nullptr;
4237 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4238 if (InitTrampoline)
4239 // More than one init_trampoline writes to this value. Give up.
4240 return nullptr;
4241 InitTrampoline = II;
4242 continue;
4243 }
4244 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4245 // Allow any number of calls to adjust.trampoline.
4246 continue;
4247 return nullptr;
4248 }
4249
4250 // No call to init.trampoline found.
4251 if (!InitTrampoline)
4252 return nullptr;
4253
4254 // Check that the alloca is being used in the expected way.
4255 if (InitTrampoline->getOperand(0) != TrampMem)
4256 return nullptr;
4257
4258 return InitTrampoline;
4259}
4260
4262 Value *TrampMem) {
4263 // Visit all the previous instructions in the basic block, and try to find a
4264 // init.trampoline which has a direct path to the adjust.trampoline.
4265 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4266 E = AdjustTramp->getParent()->begin();
4267 I != E;) {
4268 Instruction *Inst = &*--I;
4270 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4271 II->getOperand(0) == TrampMem)
4272 return II;
4273 if (Inst->mayWriteToMemory())
4274 return nullptr;
4275 }
4276 return nullptr;
4277}
4278
4279// Given a call to llvm.adjust.trampoline, find and return the corresponding
4280// call to llvm.init.trampoline if the call to the trampoline can be optimized
4281// to a direct call to a function. Otherwise return NULL.
4283 Callee = Callee->stripPointerCasts();
4284 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4285 if (!AdjustTramp ||
4286 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4287 return nullptr;
4288
4289 Value *TrampMem = AdjustTramp->getOperand(0);
4290
4292 return IT;
4293 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4294 return IT;
4295 return nullptr;
4296}
4297
4298Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4299 const Value *Callee = Call.getCalledOperand();
4300 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4301 if (!IPC || !IPC->isNoopCast(DL))
4302 return nullptr;
4303
4304 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4305 if (!II)
4306 return nullptr;
4307
4308 Intrinsic::ID IIID = II->getIntrinsicID();
4309 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4310 return nullptr;
4311
4312 // Isolate the ptrauth bundle from the others.
4313 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4315 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4316 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4317 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4318 PtrAuthBundleOrNone = Bundle;
4319 else
4320 NewBundles.emplace_back(Bundle);
4321 }
4322
4323 if (!PtrAuthBundleOrNone)
4324 return nullptr;
4325
4326 Value *NewCallee = nullptr;
4327 switch (IIID) {
4328 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4329 // assuming the call bundle and the sign operands match.
4330 case Intrinsic::ptrauth_resign: {
4331 // Resign result key should match bundle.
4332 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4333 return nullptr;
4334 // Resign result discriminator should match bundle.
4335 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4336 return nullptr;
4337
4338 // Resign input (auth) key should also match: we can't change the key on
4339 // the new call we're generating, because we don't know what keys are valid.
4340 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4341 return nullptr;
4342
4343 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4344 NewBundles.emplace_back("ptrauth", NewBundleOps);
4345 NewCallee = II->getOperand(0);
4346 break;
4347 }
4348
4349 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4350 // assuming the call bundle and the sign operands match.
4351 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4352 case Intrinsic::ptrauth_sign: {
4353 // Sign key should match bundle.
4354 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4355 return nullptr;
4356 // Sign discriminator should match bundle.
4357 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4358 return nullptr;
4359 NewCallee = II->getOperand(0);
4360 break;
4361 }
4362 default:
4363 llvm_unreachable("unexpected intrinsic ID");
4364 }
4365
4366 if (!NewCallee)
4367 return nullptr;
4368
4369 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4370 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4371 NewCall->setCalledOperand(NewCallee);
4372 return NewCall;
4373}
4374
4375Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4377 if (!CPA)
4378 return nullptr;
4379
4380 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4381 // If the ptrauth constant isn't based on a function pointer, bail out.
4382 if (!CalleeF)
4383 return nullptr;
4384
4385 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4387 if (!PAB)
4388 return nullptr;
4389
4390 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4391 Value *Discriminator = PAB->Inputs[1];
4392
4393 // If the bundle doesn't match, this is probably going to fail to auth.
4394 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4395 return nullptr;
4396
4397 // If the bundle matches the constant, proceed in making this a direct call.
4399 NewCall->setCalledOperand(CalleeF);
4400 return NewCall;
4401}
4402
4403bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4404 const TargetLibraryInfo *TLI) {
4405 // Note: We only handle cases which can't be driven from generic attributes
4406 // here. So, for example, nonnull and noalias (which are common properties
4407 // of some allocation functions) are expected to be handled via annotation
4408 // of the respective allocator declaration with generic attributes.
4409 bool Changed = false;
4410
4411 if (!Call.getType()->isPointerTy())
4412 return Changed;
4413
4414 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4415 if (Size && *Size != 0) {
4416 // TODO: We really should just emit deref_or_null here and then
4417 // let the generic inference code combine that with nonnull.
4418 if (Call.hasRetAttr(Attribute::NonNull)) {
4419 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4421 Call.getContext(), Size->getLimitedValue()));
4422 } else {
4423 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4425 Call.getContext(), Size->getLimitedValue()));
4426 }
4427 }
4428
4429 // Add alignment attribute if alignment is a power of two constant.
4430 Value *Alignment = getAllocAlignment(&Call, TLI);
4431 if (!Alignment)
4432 return Changed;
4433
4434 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4435 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4436 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4437 if (llvm::isPowerOf2_64(AlignmentVal)) {
4438 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4439 Align NewAlign = Align(AlignmentVal);
4440 if (NewAlign > ExistingAlign) {
4443 Changed = true;
4444 }
4445 }
4446 }
4447 return Changed;
4448}
4449
4450/// Improvements for call, callbr and invoke instructions.
4451Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4452 bool Changed = annotateAnyAllocSite(Call, &TLI);
4453
4454 // Mark any parameters that are known to be non-null with the nonnull
4455 // attribute. This is helpful for inlining calls to functions with null
4456 // checks on their arguments.
4457 SmallVector<unsigned, 4> ArgNos;
4458 unsigned ArgNo = 0;
4459
4460 for (Value *V : Call.args()) {
4461 if (V->getType()->isPointerTy()) {
4462 // Simplify the nonnull operand if the parameter is known to be nonnull.
4463 // Otherwise, try to infer nonnull for it.
4464 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4465 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4466 (HasDereferenceable &&
4468 V->getType()->getPointerAddressSpace()))) {
4469 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4470 replaceOperand(Call, ArgNo, Res);
4471 Changed = true;
4472 }
4473 } else if (isKnownNonZero(V,
4474 getSimplifyQuery().getWithInstruction(&Call))) {
4475 ArgNos.push_back(ArgNo);
4476 }
4477 }
4478 ArgNo++;
4479 }
4480
4481 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4482
4483 if (!ArgNos.empty()) {
4484 AttributeList AS = Call.getAttributes();
4485 LLVMContext &Ctx = Call.getContext();
4486 AS = AS.addParamAttribute(Ctx, ArgNos,
4487 Attribute::get(Ctx, Attribute::NonNull));
4488 Call.setAttributes(AS);
4489 Changed = true;
4490 }
4491
4492 // If the callee is a pointer to a function, attempt to move any casts to the
4493 // arguments of the call/callbr/invoke.
4495 Function *CalleeF = dyn_cast<Function>(Callee);
4496 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4497 transformConstExprCastCall(Call))
4498 return nullptr;
4499
4500 if (CalleeF) {
4501 // Remove the convergent attr on calls when the callee is not convergent.
4502 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4503 !CalleeF->isIntrinsic()) {
4504 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4505 << "\n");
4507 return &Call;
4508 }
4509
4510 // If the call and callee calling conventions don't match, and neither one
4511 // of the calling conventions is compatible with C calling convention
4512 // this call must be unreachable, as the call is undefined.
4513 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4514 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4518 // Only do this for calls to a function with a body. A prototype may
4519 // not actually end up matching the implementation's calling conv for a
4520 // variety of reasons (e.g. it may be written in assembly).
4521 !CalleeF->isDeclaration()) {
4522 Instruction *OldCall = &Call;
4524 // If OldCall does not return void then replaceInstUsesWith poison.
4525 // This allows ValueHandlers and custom metadata to adjust itself.
4526 if (!OldCall->getType()->isVoidTy())
4527 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4528 if (isa<CallInst>(OldCall))
4529 return eraseInstFromFunction(*OldCall);
4530
4531 // We cannot remove an invoke or a callbr, because it would change thexi
4532 // CFG, just change the callee to a null pointer.
4533 cast<CallBase>(OldCall)->setCalledFunction(
4534 CalleeF->getFunctionType(),
4535 Constant::getNullValue(CalleeF->getType()));
4536 return nullptr;
4537 }
4538 }
4539
4540 // Calling a null function pointer is undefined if a null address isn't
4541 // dereferenceable.
4542 if ((isa<ConstantPointerNull>(Callee) &&
4544 isa<UndefValue>(Callee)) {
4545 // If Call does not return void then replaceInstUsesWith poison.
4546 // This allows ValueHandlers and custom metadata to adjust itself.
4547 if (!Call.getType()->isVoidTy())
4549
4550 if (Call.isTerminator()) {
4551 // Can't remove an invoke or callbr because we cannot change the CFG.
4552 return nullptr;
4553 }
4554
4555 // This instruction is not reachable, just remove it.
4558 }
4559
4560 if (IntrinsicInst *II = findInitTrampoline(Callee))
4561 return transformCallThroughTrampoline(Call, *II);
4562
4563 // Combine calls involving pointer authentication intrinsics.
4564 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4565 return NewCall;
4566
4567 // Combine calls to ptrauth constants.
4568 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4569 return NewCall;
4570
4571 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4572 InlineAsm *IA = cast<InlineAsm>(Callee);
4573 if (!IA->canThrow()) {
4574 // Normal inline asm calls cannot throw - mark them
4575 // 'nounwind'.
4577 Changed = true;
4578 }
4579 }
4580
4581 // Try to optimize the call if possible, we require DataLayout for most of
4582 // this. None of these calls are seen as possibly dead so go ahead and
4583 // delete the instruction now.
4584 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4585 Instruction *I = tryOptimizeCall(CI);
4586 // If we changed something return the result, etc. Otherwise let
4587 // the fallthrough check.
4588 if (I) return eraseInstFromFunction(*I);
4589 }
4590
4591 if (!Call.use_empty() && !Call.isMustTailCall())
4592 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4593 Type *CallTy = Call.getType();
4594 Type *RetArgTy = ReturnedArg->getType();
4595 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4596 return replaceInstUsesWith(
4597 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4598 }
4599
4600 // Drop unnecessary callee_type metadata from calls that were converted
4601 // into direct calls.
4602 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4603 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4604 Changed = true;
4605 }
4606
4607 // Drop unnecessary kcfi operand bundles from calls that were converted
4608 // into direct calls.
4610 if (Bundle && !Call.isIndirectCall()) {
4611 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4612 if (CalleeF) {
4613 ConstantInt *FunctionType = nullptr;
4614 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4615
4616 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4617 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4618
4619 if (FunctionType &&
4620 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4621 dbgs() << Call.getModule()->getName()
4622 << ": warning: kcfi: " << Call.getCaller()->getName()
4623 << ": call to " << CalleeF->getName()
4624 << " using a mismatching function pointer type\n";
4625 }
4626 });
4627
4629 }
4630
4631 if (isRemovableAlloc(&Call, &TLI))
4632 return visitAllocSite(Call);
4633
4634 // Handle intrinsics which can be used in both call and invoke context.
4635 switch (Call.getIntrinsicID()) {
4636 case Intrinsic::experimental_gc_statepoint: {
4637 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4638 SmallPtrSet<Value *, 32> LiveGcValues;
4639 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4640 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4641
4642 // Remove the relocation if unused.
4643 if (GCR.use_empty()) {
4645 continue;
4646 }
4647
4648 Value *DerivedPtr = GCR.getDerivedPtr();
4649 Value *BasePtr = GCR.getBasePtr();
4650
4651 // Undef is undef, even after relocation.
4652 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4655 continue;
4656 }
4657
4658 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4659 // The relocation of null will be null for most any collector.
4660 // TODO: provide a hook for this in GCStrategy. There might be some
4661 // weird collector this property does not hold for.
4662 if (isa<ConstantPointerNull>(DerivedPtr)) {
4663 // Use null-pointer of gc_relocate's type to replace it.
4666 continue;
4667 }
4668
4669 // isKnownNonNull -> nonnull attribute
4670 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4671 isKnownNonZero(DerivedPtr,
4672 getSimplifyQuery().getWithInstruction(&Call))) {
4673 GCR.addRetAttr(Attribute::NonNull);
4674 // We discovered new fact, re-check users.
4675 Worklist.pushUsersToWorkList(GCR);
4676 }
4677 }
4678
4679 // If we have two copies of the same pointer in the statepoint argument
4680 // list, canonicalize to one. This may let us common gc.relocates.
4681 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4682 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4683 auto *OpIntTy = GCR.getOperand(2)->getType();
4684 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4685 }
4686
4687 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4688 // Canonicalize on the type from the uses to the defs
4689
4690 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4691 LiveGcValues.insert(BasePtr);
4692 LiveGcValues.insert(DerivedPtr);
4693 }
4694 std::optional<OperandBundleUse> Bundle =
4696 unsigned NumOfGCLives = LiveGcValues.size();
4697 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4698 break;
4699 // We can reduce the size of gc live bundle.
4700 DenseMap<Value *, unsigned> Val2Idx;
4701 std::vector<Value *> NewLiveGc;
4702 for (Value *V : Bundle->Inputs) {
4703 auto [It, Inserted] = Val2Idx.try_emplace(V);
4704 if (!Inserted)
4705 continue;
4706 if (LiveGcValues.count(V)) {
4707 It->second = NewLiveGc.size();
4708 NewLiveGc.push_back(V);
4709 } else
4710 It->second = NumOfGCLives;
4711 }
4712 // Update all gc.relocates
4713 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4714 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4715 Value *BasePtr = GCR.getBasePtr();
4716 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4717 "Missed live gc for base pointer");
4718 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4719 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4720 Value *DerivedPtr = GCR.getDerivedPtr();
4721 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4722 "Missed live gc for derived pointer");
4723 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4724 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4725 }
4726 // Create new statepoint instruction.
4727 OperandBundleDef NewBundle("gc-live", NewLiveGc);
4728 return CallBase::Create(&Call, NewBundle);
4729 }
4730 default: { break; }
4731 }
4732
4733 return Changed ? &Call : nullptr;
4734}
4735
4736/// If the callee is a constexpr cast of a function, attempt to move the cast to
4737/// the arguments of the call/invoke.
4738/// CallBrInst is not supported.
4739bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4740 auto *Callee =
4742 if (!Callee)
4743 return false;
4744
4746 "CallBr's don't have a single point after a def to insert at");
4747
4748 // Don't perform the transform for declarations, which may not be fully
4749 // accurate. For example, void @foo() is commonly used as a placeholder for
4750 // unknown prototypes.
4751 if (Callee->isDeclaration())
4752 return false;
4753
4754 // If this is a call to a thunk function, don't remove the cast. Thunks are
4755 // used to transparently forward all incoming parameters and outgoing return
4756 // values, so it's important to leave the cast in place.
4757 if (Callee->hasFnAttribute("thunk"))
4758 return false;
4759
4760 // If this is a call to a naked function, the assembly might be
4761 // using an argument, or otherwise rely on the frame layout,
4762 // the function prototype will mismatch.
4763 if (Callee->hasFnAttribute(Attribute::Naked))
4764 return false;
4765
4766 // If this is a musttail call, the callee's prototype must match the caller's
4767 // prototype with the exception of pointee types. The code below doesn't
4768 // implement that, so we can't do this transform.
4769 // TODO: Do the transform if it only requires adding pointer casts.
4770 if (Call.isMustTailCall())
4771 return false;
4772
4774 const AttributeList &CallerPAL = Call.getAttributes();
4775
4776 // Okay, this is a cast from a function to a different type. Unless doing so
4777 // would cause a type conversion of one of our arguments, change this call to
4778 // be a direct call with arguments casted to the appropriate types.
4779 FunctionType *FT = Callee->getFunctionType();
4780 Type *OldRetTy = Caller->getType();
4781 Type *NewRetTy = FT->getReturnType();
4782
4783 // Check to see if we are changing the return type...
4784 if (OldRetTy != NewRetTy) {
4785
4786 if (NewRetTy->isStructTy())
4787 return false; // TODO: Handle multiple return values.
4788
4789 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4790 if (!Caller->use_empty())
4791 return false; // Cannot transform this return value.
4792 }
4793
4794 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4795 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4796 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
4797 NewRetTy, CallerPAL.getRetAttrs())))
4798 return false; // Attribute not compatible with transformed value.
4799 }
4800
4801 // If the callbase is an invoke instruction, and the return value is
4802 // used by a PHI node in a successor, we cannot change the return type of
4803 // the call because there is no place to put the cast instruction (without
4804 // breaking the critical edge). Bail out in this case.
4805 if (!Caller->use_empty()) {
4806 BasicBlock *PhisNotSupportedBlock = nullptr;
4807 if (auto *II = dyn_cast<InvokeInst>(Caller))
4808 PhisNotSupportedBlock = II->getNormalDest();
4809 if (PhisNotSupportedBlock)
4810 for (User *U : Caller->users())
4811 if (PHINode *PN = dyn_cast<PHINode>(U))
4812 if (PN->getParent() == PhisNotSupportedBlock)
4813 return false;
4814 }
4815 }
4816
4817 unsigned NumActualArgs = Call.arg_size();
4818 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4819
4820 // Prevent us turning:
4821 // declare void @takes_i32_inalloca(i32* inalloca)
4822 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4823 //
4824 // into:
4825 // call void @takes_i32_inalloca(i32* null)
4826 //
4827 // Similarly, avoid folding away bitcasts of byval calls.
4828 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4829 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
4830 return false;
4831
4832 auto AI = Call.arg_begin();
4833 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4834 Type *ParamTy = FT->getParamType(i);
4835 Type *ActTy = (*AI)->getType();
4836
4837 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4838 return false; // Cannot transform this parameter value.
4839
4840 // Check if there are any incompatible attributes we cannot drop safely.
4841 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
4842 .overlaps(AttributeFuncs::typeIncompatible(
4843 ParamTy, CallerPAL.getParamAttrs(i),
4844 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
4845 return false; // Attribute not compatible with transformed value.
4846
4847 if (Call.isInAllocaArgument(i) ||
4848 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
4849 return false; // Cannot transform to and from inalloca/preallocated.
4850
4851 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
4852 return false;
4853
4854 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
4855 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
4856 return false; // Cannot transform to or from byval.
4857 }
4858
4859 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4860 !CallerPAL.isEmpty()) {
4861 // In this case we have more arguments than the new function type, but we
4862 // won't be dropping them. Check that these extra arguments have attributes
4863 // that are compatible with being a vararg call argument.
4864 unsigned SRetIdx;
4865 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4866 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
4867 return false;
4868 }
4869
4870 // Okay, we decided that this is a safe thing to do: go ahead and start
4871 // inserting cast instructions as necessary.
4872 SmallVector<Value *, 8> Args;
4874 Args.reserve(NumActualArgs);
4875 ArgAttrs.reserve(NumActualArgs);
4876
4877 // Get any return attributes.
4878 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4879
4880 // If the return value is not being used, the type may not be compatible
4881 // with the existing attributes. Wipe out any problematic attributes.
4882 RAttrs.remove(
4883 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
4884
4885 LLVMContext &Ctx = Call.getContext();
4886 AI = Call.arg_begin();
4887 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4888 Type *ParamTy = FT->getParamType(i);
4889
4890 Value *NewArg = *AI;
4891 if ((*AI)->getType() != ParamTy)
4892 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4893 Args.push_back(NewArg);
4894
4895 // Add any parameter attributes except the ones incompatible with the new
4896 // type. Note that we made sure all incompatible ones are safe to drop.
4897 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4898 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
4899 ArgAttrs.push_back(
4900 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
4901 }
4902
4903 // If the function takes more arguments than the call was taking, add them
4904 // now.
4905 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4906 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
4907 ArgAttrs.push_back(AttributeSet());
4908 }
4909
4910 // If we are removing arguments to the function, emit an obnoxious warning.
4911 if (FT->getNumParams() < NumActualArgs) {
4912 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4913 if (FT->isVarArg()) {
4914 // Add all of the arguments in their promoted form to the arg list.
4915 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4916 Type *PTy = getPromotedType((*AI)->getType());
4917 Value *NewArg = *AI;
4918 if (PTy != (*AI)->getType()) {
4919 // Must promote to pass through va_arg area!
4920 Instruction::CastOps opcode =
4921 CastInst::getCastOpcode(*AI, false, PTy, false);
4922 NewArg = Builder.CreateCast(opcode, *AI, PTy);
4923 }
4924 Args.push_back(NewArg);
4925
4926 // Add any parameter attributes.
4927 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
4928 }
4929 }
4930 }
4931
4932 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4933
4934 if (NewRetTy->isVoidTy())
4935 Caller->setName(""); // Void type should not have a name.
4936
4937 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4938 "missing argument attributes");
4939 AttributeList NewCallerPAL = AttributeList::get(
4940 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4941
4943 Call.getOperandBundlesAsDefs(OpBundles);
4944
4945 CallBase *NewCall;
4946 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4947 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
4948 II->getUnwindDest(), Args, OpBundles);
4949 } else {
4950 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4951 cast<CallInst>(NewCall)->setTailCallKind(
4952 cast<CallInst>(Caller)->getTailCallKind());
4953 }
4954 NewCall->takeName(Caller);
4956 NewCall->setAttributes(NewCallerPAL);
4957
4958 // Preserve prof metadata if any.
4959 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
4960
4961 // Insert a cast of the return type as necessary.
4962 Instruction *NC = NewCall;
4963 Value *NV = NC;
4964 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4965 assert(!NV->getType()->isVoidTy());
4967 NC->setDebugLoc(Caller->getDebugLoc());
4968
4969 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4970 assert(OptInsertPt && "No place to insert cast");
4971 InsertNewInstBefore(NC, *OptInsertPt);
4972 Worklist.pushUsersToWorkList(*Caller);
4973 }
4974
4975 if (!Caller->use_empty())
4976 replaceInstUsesWith(*Caller, NV);
4977 else if (Caller->hasValueHandle()) {
4978 if (OldRetTy == NV->getType())
4980 else
4981 // We cannot call ValueIsRAUWd with a different type, and the
4982 // actual tracked value will disappear.
4984 }
4985
4986 eraseInstFromFunction(*Caller);
4987 return true;
4988}
4989
4990/// Turn a call to a function created by init_trampoline / adjust_trampoline
4991/// intrinsic pair into a direct call to the underlying function.
4993InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4994 IntrinsicInst &Tramp) {
4995 FunctionType *FTy = Call.getFunctionType();
4996 AttributeList Attrs = Call.getAttributes();
4997
4998 // If the call already has the 'nest' attribute somewhere then give up -
4999 // otherwise 'nest' would occur twice after splicing in the chain.
5000 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5001 return nullptr;
5002
5004 FunctionType *NestFTy = NestF->getFunctionType();
5005
5006 AttributeList NestAttrs = NestF->getAttributes();
5007 if (!NestAttrs.isEmpty()) {
5008 unsigned NestArgNo = 0;
5009 Type *NestTy = nullptr;
5010 AttributeSet NestAttr;
5011
5012 // Look for a parameter marked with the 'nest' attribute.
5013 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5014 E = NestFTy->param_end();
5015 I != E; ++NestArgNo, ++I) {
5016 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5017 if (AS.hasAttribute(Attribute::Nest)) {
5018 // Record the parameter type and any other attributes.
5019 NestTy = *I;
5020 NestAttr = AS;
5021 break;
5022 }
5023 }
5024
5025 if (NestTy) {
5026 std::vector<Value*> NewArgs;
5027 std::vector<AttributeSet> NewArgAttrs;
5028 NewArgs.reserve(Call.arg_size() + 1);
5029 NewArgAttrs.reserve(Call.arg_size());
5030
5031 // Insert the nest argument into the call argument list, which may
5032 // mean appending it. Likewise for attributes.
5033
5034 {
5035 unsigned ArgNo = 0;
5036 auto I = Call.arg_begin(), E = Call.arg_end();
5037 do {
5038 if (ArgNo == NestArgNo) {
5039 // Add the chain argument and attributes.
5040 Value *NestVal = Tramp.getArgOperand(2);
5041 if (NestVal->getType() != NestTy)
5042 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5043 NewArgs.push_back(NestVal);
5044 NewArgAttrs.push_back(NestAttr);
5045 }
5046
5047 if (I == E)
5048 break;
5049
5050 // Add the original argument and attributes.
5051 NewArgs.push_back(*I);
5052 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5053
5054 ++ArgNo;
5055 ++I;
5056 } while (true);
5057 }
5058
5059 // The trampoline may have been bitcast to a bogus type (FTy).
5060 // Handle this by synthesizing a new function type, equal to FTy
5061 // with the chain parameter inserted.
5062
5063 std::vector<Type*> NewTypes;
5064 NewTypes.reserve(FTy->getNumParams()+1);
5065
5066 // Insert the chain's type into the list of parameter types, which may
5067 // mean appending it.
5068 {
5069 unsigned ArgNo = 0;
5070 FunctionType::param_iterator I = FTy->param_begin(),
5071 E = FTy->param_end();
5072
5073 do {
5074 if (ArgNo == NestArgNo)
5075 // Add the chain's type.
5076 NewTypes.push_back(NestTy);
5077
5078 if (I == E)
5079 break;
5080
5081 // Add the original type.
5082 NewTypes.push_back(*I);
5083
5084 ++ArgNo;
5085 ++I;
5086 } while (true);
5087 }
5088
5089 // Replace the trampoline call with a direct call. Let the generic
5090 // code sort out any function type mismatches.
5091 FunctionType *NewFTy =
5092 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5093 AttributeList NewPAL =
5094 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5095 Attrs.getRetAttrs(), NewArgAttrs);
5096
5098 Call.getOperandBundlesAsDefs(OpBundles);
5099
5100 Instruction *NewCaller;
5101 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5102 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5103 II->getUnwindDest(), NewArgs, OpBundles);
5104 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5105 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5106 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5107 NewCaller =
5108 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5109 CBI->getIndirectDests(), NewArgs, OpBundles);
5110 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5111 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5112 } else {
5113 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5114 cast<CallInst>(NewCaller)->setTailCallKind(
5115 cast<CallInst>(Call).getTailCallKind());
5116 cast<CallInst>(NewCaller)->setCallingConv(
5117 cast<CallInst>(Call).getCallingConv());
5118 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5119 }
5120 NewCaller->setDebugLoc(Call.getDebugLoc());
5121
5122 return NewCaller;
5123 }
5124 }
5125
5126 // Replace the trampoline call with a direct call. Since there is no 'nest'
5127 // parameter, there is no need to adjust the argument list. Let the generic
5128 // code sort out any function type mismatches.
5129 Call.setCalledFunction(FTy, NestF);
5130 return &Call;
5131}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp, ZeroUndef),...
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
bool isNegative() const
Definition APFloat.h:1431
void clearSign()
Definition APFloat.h:1280
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2036
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:312
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:304
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
bool hasOperandBundles() const
Return true if this User has any operand bundles.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:610
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:594
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:249
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2212
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1226
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1279
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:829
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1625
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1580
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1775
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1561
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1598
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:111
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:286
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const