LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/DebugInfo.h"
40#include "llvm/IR/Function.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/IR/IntrinsicsAArch64.h"
49#include "llvm/IR/IntrinsicsAMDGPU.h"
50#include "llvm/IR/IntrinsicsARM.h"
51#include "llvm/IR/IntrinsicsHexagon.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Statepoint.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/User.h"
59#include "llvm/IR/Value.h"
60#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/Debug.h"
76#include <algorithm>
77#include <cassert>
78#include <cstdint>
79#include <optional>
80#include <utility>
81#include <vector>
82
83#define DEBUG_TYPE "instcombine"
85
86using namespace llvm;
87using namespace PatternMatch;
88
89STATISTIC(NumSimplified, "Number of library calls simplified");
90
92 "instcombine-guard-widening-window",
93 cl::init(3),
94 cl::desc("How wide an instruction window to bypass looking for "
95 "another guard"));
96
97/// Return the specified type promoted as it would be to pass though a va_arg
98/// area.
100 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
101 if (ITy->getBitWidth() < 32)
102 return Type::getInt32Ty(Ty->getContext());
103 }
104 return Ty;
105}
106
107/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
108/// TODO: This should probably be integrated with visitAllocSites, but that
109/// requires a deeper change to allow either unread or unwritten objects.
111 auto *Src = MI->getRawSource();
112 while (isa<GetElementPtrInst>(Src)) {
113 if (!Src->hasOneUse())
114 return false;
115 Src = cast<Instruction>(Src)->getOperand(0);
116 }
117 return isa<AllocaInst>(Src) && Src->hasOneUse();
118}
119
121 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
122 MaybeAlign CopyDstAlign = MI->getDestAlign();
123 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
124 MI->setDestAlignment(DstAlign);
125 return MI;
126 }
127
128 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
129 MaybeAlign CopySrcAlign = MI->getSourceAlign();
130 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
131 MI->setSourceAlignment(SrcAlign);
132 return MI;
133 }
134
135 // If we have a store to a location which is known constant, we can conclude
136 // that the store must be storing the constant value (else the memory
137 // wouldn't be constant), and this must be a noop.
138 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
139 // Set the size of the copy to 0, it will be deleted on the next iteration.
140 MI->setLength((uint64_t)0);
141 return MI;
142 }
143
144 // If the source is provably undef, the memcpy/memmove doesn't do anything
145 // (unless the transfer is volatile).
146 if (hasUndefSource(MI) && !MI->isVolatile()) {
147 // Set the size of the copy to 0, it will be deleted on the next iteration.
148 MI->setLength((uint64_t)0);
149 return MI;
150 }
151
152 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
153 // load/store.
154 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
155 if (!MemOpLength) return nullptr;
156
157 // Source and destination pointer types are always "i8*" for intrinsic. See
158 // if the size is something we can handle with a single primitive load/store.
159 // A single load+store correctly handles overlapping memory in the memmove
160 // case.
161 uint64_t Size = MemOpLength->getLimitedValue();
162 assert(Size && "0-sized memory transferring should be removed already.");
163
164 if (Size > 8 || (Size&(Size-1)))
165 return nullptr; // If not 1/2/4/8 bytes, exit.
166
167 // If it is an atomic and alignment is less than the size then we will
168 // introduce the unaligned memory access which will be later transformed
169 // into libcall in CodeGen. This is not evident performance gain so disable
170 // it now.
171 if (MI->isAtomic())
172 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
173 return nullptr;
174
175 // Use an integer load+store unless we can find something better.
176 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
177
178 // If the memcpy has metadata describing the members, see if we can get the
179 // TBAA, scope and noalias tags describing our copy.
180 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
181
182 Value *Src = MI->getArgOperand(1);
183 Value *Dest = MI->getArgOperand(0);
184 LoadInst *L = Builder.CreateLoad(IntType, Src);
185 // Alignment from the mem intrinsic will be better, so use it.
186 L->setAlignment(*CopySrcAlign);
187 L->setAAMetadata(AACopyMD);
188 MDNode *LoopMemParallelMD =
189 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
190 if (LoopMemParallelMD)
191 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
192 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
193 if (AccessGroupMD)
194 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
195
196 StoreInst *S = Builder.CreateStore(L, Dest);
197 // Alignment from the mem intrinsic will be better, so use it.
198 S->setAlignment(*CopyDstAlign);
199 S->setAAMetadata(AACopyMD);
200 if (LoopMemParallelMD)
201 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
202 if (AccessGroupMD)
203 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
204 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
205
206 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
207 // non-atomics can be volatile
208 L->setVolatile(MT->isVolatile());
209 S->setVolatile(MT->isVolatile());
210 }
211 if (MI->isAtomic()) {
212 // atomics have to be unordered
213 L->setOrdering(AtomicOrdering::Unordered);
215 }
216
217 // Set the size of the copy to 0, it will be deleted on the next iteration.
218 MI->setLength((uint64_t)0);
219 return MI;
220}
221
223 const Align KnownAlignment =
224 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
225 MaybeAlign MemSetAlign = MI->getDestAlign();
226 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
227 MI->setDestAlignment(KnownAlignment);
228 return MI;
229 }
230
231 // If we have a store to a location which is known constant, we can conclude
232 // that the store must be storing the constant value (else the memory
233 // wouldn't be constant), and this must be a noop.
234 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
235 // Set the size of the copy to 0, it will be deleted on the next iteration.
236 MI->setLength((uint64_t)0);
237 return MI;
238 }
239
240 // Remove memset with an undef value.
241 // FIXME: This is technically incorrect because it might overwrite a poison
242 // value. Change to PoisonValue once #52930 is resolved.
243 if (isa<UndefValue>(MI->getValue())) {
244 // Set the size of the copy to 0, it will be deleted on the next iteration.
245 MI->setLength((uint64_t)0);
246 return MI;
247 }
248
249 // Extract the length and alignment and fill if they are constant.
250 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
251 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
252 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
253 return nullptr;
254 const uint64_t Len = LenC->getLimitedValue();
255 assert(Len && "0-sized memory setting should be removed already.");
256 const Align Alignment = MI->getDestAlign().valueOrOne();
257
258 // If it is an atomic and alignment is less than the size then we will
259 // introduce the unaligned memory access which will be later transformed
260 // into libcall in CodeGen. This is not evident performance gain so disable
261 // it now.
262 if (MI->isAtomic() && Alignment < Len)
263 return nullptr;
264
265 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
266 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
267 Value *Dest = MI->getDest();
268
269 // Extract the fill value and store.
270 Constant *FillVal = ConstantInt::get(
271 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
272 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
273 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
274 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
275 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
276 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
277 }
278
279 S->setAlignment(Alignment);
280 if (MI->isAtomic())
282
283 // Set the size of the copy to 0, it will be deleted on the next iteration.
284 MI->setLength((uint64_t)0);
285 return MI;
286 }
287
288 return nullptr;
289}
290
291// TODO, Obvious Missing Transforms:
292// * Narrow width by halfs excluding zero/undef lanes
293Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
294 Value *LoadPtr = II.getArgOperand(0);
295 const Align Alignment = II.getParamAlign(0).valueOrOne();
296 Value *Mask = II.getArgOperand(1);
297
298 // If the mask is all ones or poison, this is a plain vector load of the 1st
299 // argument.
300 if (match(Mask, m_AllOnesOrPoison())) {
301 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
302 "unmaskedload");
303 L->copyMetadata(II);
304 return L;
305 }
306
307 // If we can unconditionally load from this address, replace with a
308 // load/select idiom.
309 if (isDereferenceablePointer(LoadPtr, II.getType(),
311 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
312 "unmaskedload");
313 LI->copyMetadata(II);
314 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
315 }
316
317 return nullptr;
318}
319
320// TODO, Obvious Missing Transforms:
321// * Single constant active lane -> store
322// * Narrow width by halfs excluding zero/undef lanes
323Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
324 Value *StorePtr = II.getArgOperand(1);
325 Align Alignment = II.getParamAlign(1).valueOrOne();
326 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
327 if (!ConstMask)
328 return nullptr;
329
330 // If the mask is all zeros or poison, this instruction does nothing.
331 if (match(ConstMask, m_ZeroOrPoison()))
333
334 // If the mask is all ones or poison, this is a plain vector store of the 1st
335 // argument.
336 if (match(ConstMask, m_AllOnesOrPoison())) {
337 StoreInst *S =
338 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
339 S->copyMetadata(II);
340 return S;
341 }
342
343 if (isa<ScalableVectorType>(ConstMask->getType()))
344 return nullptr;
345
346 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
347 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
348 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
349 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
350 PoisonElts))
351 return replaceOperand(II, 0, V);
352
353 return nullptr;
354}
355
356// TODO, Obvious Missing Transforms:
357// * Single constant active lane load -> load
358// * Dereferenceable address & few lanes -> scalarize speculative load/selects
359// * Adjacent vector addresses -> masked.load
360// * Narrow width by halfs excluding zero/undef lanes
361// * Vector incrementing address -> vector masked load
362Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
363 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
364 if (!ConstMask)
365 return nullptr;
366
367 // Vector splat address w/known mask -> scalar load
368 // Fold the gather to load the source vector first lane
369 // because it is reloading the same value each time
370 if (ConstMask->isAllOnesValue())
371 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
372 auto *VecTy = cast<VectorType>(II.getType());
373 const Align Alignment = II.getParamAlign(0).valueOrOne();
374 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
375 Alignment, "load.scalar");
376 Value *Shuf =
377 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
379 }
380
381 return nullptr;
382}
383
384// TODO, Obvious Missing Transforms:
385// * Single constant active lane -> store
386// * Adjacent vector addresses -> masked.store
387// * Narrow store width by halfs excluding zero/undef lanes
388// * Vector incrementing address -> vector masked store
389Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
390 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
391 if (!ConstMask)
392 return nullptr;
393
394 // If the mask is all zeros or poison, a scatter does nothing.
395 if (match(ConstMask, m_ZeroOrPoison()))
397
398 // Vector splat address -> scalar store
399 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
400 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
401 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
402 if (maskContainsAllOneOrUndef(ConstMask)) {
403 Align Alignment = II.getParamAlign(1).valueOrOne();
404 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
405 Alignment);
406 S->copyMetadata(II);
407 return S;
408 }
409 }
410 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
411 // lastlane), ptr
412 if (ConstMask->isAllOnesValue()) {
413 Align Alignment = II.getParamAlign(1).valueOrOne();
414 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
415 ElementCount VF = WideLoadTy->getElementCount();
416 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
417 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
418 Value *Extract =
419 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
420 StoreInst *S =
421 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
422 S->copyMetadata(II);
423 return S;
424 }
425 }
426 if (isa<ScalableVectorType>(ConstMask->getType()))
427 return nullptr;
428
429 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
430 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
431 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 0, V);
435 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
436 PoisonElts))
437 return replaceOperand(II, 1, V);
438
439 return nullptr;
440}
441
442/// This function transforms launder.invariant.group and strip.invariant.group
443/// like:
444/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
445/// launder(strip(%x)) -> launder(%x)
446/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
447/// strip(launder(%x)) -> strip(%x)
448/// This is legal because it preserves the most recent information about
449/// the presence or absence of invariant.group.
451 InstCombinerImpl &IC) {
452 auto *Arg = II.getArgOperand(0);
453 auto *StrippedArg = Arg->stripPointerCasts();
454 auto *StrippedInvariantGroupsArg = StrippedArg;
455 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
456 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
457 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
458 break;
459 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
460 }
461 if (StrippedArg == StrippedInvariantGroupsArg)
462 return nullptr; // No launders/strips to remove.
463
464 Value *Result = nullptr;
465
466 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
467 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
468 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
469 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
470 else
472 "simplifyInvariantGroupIntrinsic only handles launder and strip");
473 if (Result->getType()->getPointerAddressSpace() !=
474 II.getType()->getPointerAddressSpace())
475 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
476
477 return cast<Instruction>(Result);
478}
479
481 assert((II.getIntrinsicID() == Intrinsic::cttz ||
482 II.getIntrinsicID() == Intrinsic::ctlz) &&
483 "Expected cttz or ctlz intrinsic");
484 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
485 Value *Op0 = II.getArgOperand(0);
486 Value *Op1 = II.getArgOperand(1);
487 Value *X;
488 // ctlz(bitreverse(x)) -> cttz(x)
489 // cttz(bitreverse(x)) -> ctlz(x)
490 if (match(Op0, m_BitReverse(m_Value(X)))) {
491 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
492 Function *F =
493 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
494 return CallInst::Create(F, {X, II.getArgOperand(1)});
495 }
496
497 if (II.getType()->isIntOrIntVectorTy(1)) {
498 // ctlz/cttz i1 Op0 --> not Op0
499 if (match(Op1, m_Zero()))
500 return BinaryOperator::CreateNot(Op0);
501 // If zero is poison, then the input can be assumed to be "true", so the
502 // instruction simplifies to "false".
503 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
504 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
505 }
506
507 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
508 if (II.hasOneUse() && match(Op1, m_Zero()) &&
509 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
510 II.dropUBImplyingAttrsAndMetadata();
511 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
512 }
513
514 Constant *C;
515
516 if (IsTZ) {
517 // cttz(-x) -> cttz(x)
518 if (match(Op0, m_Neg(m_Value(X))))
519 return IC.replaceOperand(II, 0, X);
520
521 // cttz(-x & x) -> cttz(x)
522 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
523 return IC.replaceOperand(II, 0, X);
524
525 // cttz(sext(x)) -> cttz(zext(x))
526 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
527 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
528 auto *CttzZext =
529 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
530 return IC.replaceInstUsesWith(II, CttzZext);
531 }
532
533 // Zext doesn't change the number of trailing zeros, so narrow:
534 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
535 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
536 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
537 IC.Builder.getTrue());
538 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
539 return IC.replaceInstUsesWith(II, ZextCttz);
540 }
541
542 // cttz(abs(x)) -> cttz(x)
543 // cttz(nabs(x)) -> cttz(x)
544 Value *Y;
546 if (SPF == SPF_ABS || SPF == SPF_NABS)
547 return IC.replaceOperand(II, 0, X);
548
550 return IC.replaceOperand(II, 0, X);
551
552 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
553 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
554 match(Op1, m_One())) {
555 Value *ConstCttz =
556 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
557 return BinaryOperator::CreateAdd(ConstCttz, X);
558 }
559
560 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
561 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
562 match(Op1, m_One())) {
563 Value *ConstCttz =
564 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
565 return BinaryOperator::CreateSub(ConstCttz, X);
566 }
567
568 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
569 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
570 Value *Width =
571 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
572 return BinaryOperator::CreateSub(Width, X);
573 }
574 } else {
575 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
576 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
577 match(Op1, m_One())) {
578 Value *ConstCtlz =
579 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
580 return BinaryOperator::CreateAdd(ConstCtlz, X);
581 }
582
583 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
584 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
585 match(Op1, m_One())) {
586 Value *ConstCtlz =
587 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
588 return BinaryOperator::CreateSub(ConstCtlz, X);
589 }
590
591 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
592 if (Op0->hasOneUse() &&
593 match(Op0,
595 Type *Ty = II.getType();
596 unsigned BitWidth = Ty->getScalarSizeInBits();
597 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
598 {X, IC.Builder.getFalse()});
599 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
600 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
601 }
602 }
603
604 // cttz(Pow2) -> Log2(Pow2)
605 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
606 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
607 if (IsTZ)
608 return IC.replaceInstUsesWith(II, R);
609 BinaryOperator *BO = BinaryOperator::CreateSub(
610 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
611 R);
612 BO->setHasNoSignedWrap();
614 return BO;
615 }
616
617 KnownBits Known = IC.computeKnownBits(Op0, &II);
618
619 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
620 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
621 : Known.countMaxLeadingZeros();
622 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
623 : Known.countMinLeadingZeros();
624
625 // If all bits above (ctlz) or below (cttz) the first known one are known
626 // zero, this value is constant.
627 // FIXME: This should be in InstSimplify because we're replacing an
628 // instruction with a constant.
629 if (PossibleZeros == DefiniteZeros) {
630 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
631 return IC.replaceInstUsesWith(II, C);
632 }
633
634 // If the input to cttz/ctlz is known to be non-zero,
635 // then change the 'ZeroIsPoison' parameter to 'true'
636 // because we know the zero behavior can't affect the result.
637 if (!Known.One.isZero() ||
639 if (!match(II.getArgOperand(1), m_One()))
640 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
641 }
642
643 // Add range attribute since known bits can't completely reflect what we know.
644 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
645 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
646 !II.getMetadata(LLVMContext::MD_range)) {
647 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
648 APInt(BitWidth, PossibleZeros + 1));
649 II.addRangeRetAttr(Range);
650 return &II;
651 }
652
653 return nullptr;
654}
655
657 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
658 "Expected ctpop intrinsic");
659 Type *Ty = II.getType();
660 unsigned BitWidth = Ty->getScalarSizeInBits();
661 Value *Op0 = II.getArgOperand(0);
662 Value *X, *Y;
663
664 // ctpop(bitreverse(x)) -> ctpop(x)
665 // ctpop(bswap(x)) -> ctpop(x)
666 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
667 return IC.replaceOperand(II, 0, X);
668
669 // ctpop(rot(x)) -> ctpop(x)
670 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
671 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
672 X == Y)
673 return IC.replaceOperand(II, 0, X);
674
675 // ctpop(x | -x) -> bitwidth - cttz(x, false)
676 if (Op0->hasOneUse() &&
677 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
678 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
679 {X, IC.Builder.getFalse()});
680 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
681 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
682 }
683
684 // ctpop(~x & (x - 1)) -> cttz(x, false)
685 if (match(Op0,
687 Function *F =
688 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
689 return CallInst::Create(F, {X, IC.Builder.getFalse()});
690 }
691
692 // Zext doesn't change the number of set bits, so narrow:
693 // ctpop (zext X) --> zext (ctpop X)
694 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
695 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
696 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
697 }
698
699 KnownBits Known(BitWidth);
700 IC.computeKnownBits(Op0, Known, &II);
701
702 // If all bits are zero except for exactly one fixed bit, then the result
703 // must be 0 or 1, and we can get that answer by shifting to LSB:
704 // ctpop (X & 32) --> (X & 32) >> 5
705 // TODO: Investigate removing this as its likely unnecessary given the below
706 // `isKnownToBeAPowerOfTwo` check.
707 if ((~Known.Zero).isPowerOf2())
708 return BinaryOperator::CreateLShr(
709 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
710
711 // More generally we can also handle non-constant power of 2 patterns such as
712 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
713 // ctpop(Pow2OrZero) --> icmp ne X, 0
714 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
715 return CastInst::Create(Instruction::ZExt,
718 Ty);
719
720 // Add range attribute since known bits can't completely reflect what we know.
721 if (BitWidth != 1) {
722 ConstantRange OldRange =
723 II.getRange().value_or(ConstantRange::getFull(BitWidth));
724
725 unsigned Lower = Known.countMinPopulation();
726 unsigned Upper = Known.countMaxPopulation() + 1;
727
728 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
730 Lower = 1;
731
733 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
734
735 if (Range != OldRange) {
736 II.addRangeRetAttr(Range);
737 return &II;
738 }
739 }
740
741 return nullptr;
742}
743
744/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
745/// at most two source operands are actually referenced.
747 bool IsExtension) {
748 // Bail out if the mask is not a constant.
749 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
750 if (!C)
751 return nullptr;
752
753 auto *RetTy = cast<FixedVectorType>(II.getType());
754 unsigned NumIndexes = RetTy->getNumElements();
755
756 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
757 if (!RetTy->getElementType()->isIntegerTy(8) ||
758 (NumIndexes != 8 && NumIndexes != 16))
759 return nullptr;
760
761 // For tbx instructions, the first argument is the "fallback" vector, which
762 // has the same length as the mask and return type.
763 unsigned int StartIndex = (unsigned)IsExtension;
764 auto *SourceTy =
765 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
766 // Note that the element count of each source vector does *not* need to be the
767 // same as the element count of the return type and mask! All source vectors
768 // must have the same element count as each other, though.
769 unsigned NumElementsPerSource = SourceTy->getNumElements();
770
771 // There are no tbl/tbx intrinsics for which the destination size exceeds the
772 // source size. However, our definitions of the intrinsics, at least in
773 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
774 // *could* technically happen.
775 if (NumIndexes > NumElementsPerSource)
776 return nullptr;
777
778 // The tbl/tbx intrinsics take several source operands followed by a mask
779 // operand.
780 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
781
782 // Map input operands to shuffle indices. This also helpfully deduplicates the
783 // input arguments, in case the same value is passed as an argument multiple
784 // times.
785 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
786 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
787 PoisonValue::get(SourceTy)};
788
789 int Indexes[16];
790 for (unsigned I = 0; I < NumIndexes; ++I) {
791 Constant *COp = C->getAggregateElement(I);
792
793 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
794 return nullptr;
795
796 if (isa<UndefValue>(COp)) {
797 Indexes[I] = -1;
798 continue;
799 }
800
801 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
802 // The index of the input argument that this index references (0 = first
803 // source argument, etc).
804 unsigned SourceOperandIndex = Index / NumElementsPerSource;
805 // The index of the element at that source operand.
806 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
807
808 Value *SourceOperand;
809 if (SourceOperandIndex >= NumSourceOperands) {
810 // This index is out of bounds. Map it to index into either the fallback
811 // vector (tbx) or vector of zeroes (tbl).
812 SourceOperandIndex = NumSourceOperands;
813 if (IsExtension) {
814 // For out-of-bounds indices in tbx, choose the `I`th element of the
815 // fallback.
816 SourceOperand = II.getArgOperand(0);
817 SourceOperandElementIndex = I;
818 } else {
819 // Otherwise, choose some element from the dummy vector of zeroes (we'll
820 // always choose the first).
821 SourceOperand = Constant::getNullValue(SourceTy);
822 SourceOperandElementIndex = 0;
823 }
824 } else {
825 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
826 }
827
828 // The source operand may be the fallback vector, which may not have the
829 // same number of elements as the source vector. In that case, we *could*
830 // choose to extend its length with another shufflevector, but it's simpler
831 // to just bail instead.
832 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
833 NumElementsPerSource)
834 return nullptr;
835
836 // We now know the source operand referenced by this index. Make it a
837 // shufflevector operand, if it isn't already.
838 unsigned NumSlots = ValueToShuffleSlot.size();
839 // This shuffle references more than two sources, and hence cannot be
840 // represented as a shufflevector.
841 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
842 return nullptr;
843
844 auto [It, Inserted] =
845 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
846 if (Inserted)
847 ShuffleOperands[It->getSecond()] = SourceOperand;
848
849 unsigned RemappedIndex =
850 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
851 Indexes[I] = RemappedIndex;
852 }
853
855 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
856 return IC.replaceInstUsesWith(II, Shuf);
857}
858
859// Returns true iff the 2 intrinsics have the same operands, limiting the
860// comparison to the first NumOperands.
861static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
862 unsigned NumOperands) {
863 assert(I.arg_size() >= NumOperands && "Not enough operands");
864 assert(E.arg_size() >= NumOperands && "Not enough operands");
865 for (unsigned i = 0; i < NumOperands; i++)
866 if (I.getArgOperand(i) != E.getArgOperand(i))
867 return false;
868 return true;
869}
870
871// Remove trivially empty start/end intrinsic ranges, i.e. a start
872// immediately followed by an end (ignoring debuginfo or other
873// start/end intrinsics in between). As this handles only the most trivial
874// cases, tracking the nesting level is not needed:
875//
876// call @llvm.foo.start(i1 0)
877// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
878// call @llvm.foo.end(i1 0)
879// call @llvm.foo.end(i1 0) ; &I
880static bool
882 std::function<bool(const IntrinsicInst &)> IsStart) {
883 // We start from the end intrinsic and scan backwards, so that InstCombine
884 // has already processed (and potentially removed) all the instructions
885 // before the end intrinsic.
886 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
887 for (; BI != BE; ++BI) {
888 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
889 if (I->isDebugOrPseudoInst() ||
890 I->getIntrinsicID() == EndI.getIntrinsicID())
891 continue;
892 if (IsStart(*I)) {
893 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
895 IC.eraseInstFromFunction(EndI);
896 return true;
897 }
898 // Skip start intrinsics that don't pair with this end intrinsic.
899 continue;
900 }
901 }
902 break;
903 }
904
905 return false;
906}
907
909 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
910 // Bail out on the case where the source va_list of a va_copy is destroyed
911 // immediately by a follow-up va_end.
912 return II.getIntrinsicID() == Intrinsic::vastart ||
913 (II.getIntrinsicID() == Intrinsic::vacopy &&
914 I.getArgOperand(0) != II.getArgOperand(1));
915 });
916 return nullptr;
917}
918
920 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
921 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
922 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
923 Call.setArgOperand(0, Arg1);
924 Call.setArgOperand(1, Arg0);
925 return &Call;
926 }
927 return nullptr;
928}
929
930/// Creates a result tuple for an overflow intrinsic \p II with a given
931/// \p Result and a constant \p Overflow value.
933 Constant *Overflow) {
934 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
935 StructType *ST = cast<StructType>(II->getType());
936 Constant *Struct = ConstantStruct::get(ST, V);
937 return InsertValueInst::Create(Struct, Result, 0);
938}
939
941InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
942 WithOverflowInst *WO = cast<WithOverflowInst>(II);
943 Value *OperationResult = nullptr;
944 Constant *OverflowResult = nullptr;
945 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
946 WO->getRHS(), *WO, OperationResult, OverflowResult))
947 return createOverflowTuple(WO, OperationResult, OverflowResult);
948
949 // See whether we can optimize the overflow check with assumption information.
950 for (User *U : WO->users()) {
951 if (!match(U, m_ExtractValue<1>(m_Value())))
952 continue;
953
954 for (auto &AssumeVH : AC.assumptionsFor(U)) {
955 if (!AssumeVH)
956 continue;
957 CallInst *I = cast<CallInst>(AssumeVH);
958 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
959 continue;
960 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
961 /*AllowEphemerals=*/true))
962 continue;
963 Value *Result =
964 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
965 Result->takeName(WO);
966 if (auto *Inst = dyn_cast<Instruction>(Result)) {
967 if (WO->isSigned())
968 Inst->setHasNoSignedWrap();
969 else
970 Inst->setHasNoUnsignedWrap();
971 }
972 return createOverflowTuple(WO, Result,
973 ConstantInt::getFalse(U->getType()));
974 }
975 }
976
977 return nullptr;
978}
979
980static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
981 Ty = Ty->getScalarType();
982 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
983}
984
985static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
986 Ty = Ty->getScalarType();
987 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
988}
989
990/// \returns the compare predicate type if the test performed by
991/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
992/// floating-point environment assumed for \p F for type \p Ty
994 const Function &F, Type *Ty) {
995 switch (static_cast<unsigned>(Mask)) {
996 case fcZero:
997 if (inputDenormalIsIEEE(F, Ty))
998 return FCmpInst::FCMP_OEQ;
999 break;
1000 case fcZero | fcSubnormal:
1001 if (inputDenormalIsDAZ(F, Ty))
1002 return FCmpInst::FCMP_OEQ;
1003 break;
1004 case fcPositive | fcNegZero:
1005 if (inputDenormalIsIEEE(F, Ty))
1006 return FCmpInst::FCMP_OGE;
1007 break;
1009 if (inputDenormalIsDAZ(F, Ty))
1010 return FCmpInst::FCMP_OGE;
1011 break;
1013 if (inputDenormalIsIEEE(F, Ty))
1014 return FCmpInst::FCMP_OGT;
1015 break;
1016 case fcNegative | fcPosZero:
1017 if (inputDenormalIsIEEE(F, Ty))
1018 return FCmpInst::FCMP_OLE;
1019 break;
1021 if (inputDenormalIsDAZ(F, Ty))
1022 return FCmpInst::FCMP_OLE;
1023 break;
1025 if (inputDenormalIsIEEE(F, Ty))
1026 return FCmpInst::FCMP_OLT;
1027 break;
1028 case fcPosNormal | fcPosInf:
1029 if (inputDenormalIsDAZ(F, Ty))
1030 return FCmpInst::FCMP_OGT;
1031 break;
1032 case fcNegNormal | fcNegInf:
1033 if (inputDenormalIsDAZ(F, Ty))
1034 return FCmpInst::FCMP_OLT;
1035 break;
1036 case ~fcZero & ~fcNan:
1037 if (inputDenormalIsIEEE(F, Ty))
1038 return FCmpInst::FCMP_ONE;
1039 break;
1040 case ~(fcZero | fcSubnormal) & ~fcNan:
1041 if (inputDenormalIsDAZ(F, Ty))
1042 return FCmpInst::FCMP_ONE;
1043 break;
1044 default:
1045 break;
1046 }
1047
1049}
1050
1051Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1052 Value *Src0 = II.getArgOperand(0);
1053 Value *Src1 = II.getArgOperand(1);
1054 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1055 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1056 const bool IsUnordered = (Mask & fcNan) == fcNan;
1057 const bool IsOrdered = (Mask & fcNan) == fcNone;
1058 const FPClassTest OrderedMask = Mask & ~fcNan;
1059 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1060
1061 const bool IsStrict =
1062 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1063
1064 Value *FNegSrc;
1065 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1066 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1067
1068 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1069 return replaceOperand(II, 0, FNegSrc);
1070 }
1071
1072 Value *FAbsSrc;
1073 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1074 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1075 return replaceOperand(II, 0, FAbsSrc);
1076 }
1077
1078 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1079 (IsOrdered || IsUnordered) && !IsStrict) {
1080 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1081 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1082 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1083 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1085 FCmpInst::Predicate Pred =
1086 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1087 if (OrderedInvertedMask == fcInf)
1088 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1089
1090 Value *Fabs = Builder.CreateFAbs(Src0);
1091 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1092 CmpInf->takeName(&II);
1093 return replaceInstUsesWith(II, CmpInf);
1094 }
1095
1096 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1097 (IsOrdered || IsUnordered) && !IsStrict) {
1098 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1099 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1100 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1101 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1102 Constant *Inf =
1103 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1104 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1105 : Builder.CreateFCmpOEQ(Src0, Inf);
1106
1107 EqInf->takeName(&II);
1108 return replaceInstUsesWith(II, EqInf);
1109 }
1110
1111 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1112 (IsOrdered || IsUnordered) && !IsStrict) {
1113 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1114 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1115 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1116 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1118 OrderedInvertedMask == fcNegInf);
1119 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1120 : Builder.CreateFCmpONE(Src0, Inf);
1121 NeInf->takeName(&II);
1122 return replaceInstUsesWith(II, NeInf);
1123 }
1124
1125 if (Mask == fcNan && !IsStrict) {
1126 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1127 // exceptions.
1128 Value *IsNan =
1129 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1130 IsNan->takeName(&II);
1131 return replaceInstUsesWith(II, IsNan);
1132 }
1133
1134 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1135 // Equivalent of !isnan. Replace with standard fcmp.
1136 Value *FCmp =
1137 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1138 FCmp->takeName(&II);
1139 return replaceInstUsesWith(II, FCmp);
1140 }
1141
1143
1144 // Try to replace with an fcmp with 0
1145 //
1146 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1147 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1148 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1149 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1150 //
1151 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1152 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1153 //
1154 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1155 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1156 //
1157 if (!IsStrict && (IsOrdered || IsUnordered) &&
1158 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1159 Src0->getType())) !=
1162 // Equivalent of == 0.
1163 Value *FCmp = Builder.CreateFCmp(
1164 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1165 Src0, Zero);
1166
1167 FCmp->takeName(&II);
1168 return replaceInstUsesWith(II, FCmp);
1169 }
1170
1171 KnownFPClass Known =
1172 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1173
1174 // Clear test bits we know must be false from the source value.
1175 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1176 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1177 if ((Mask & Known.KnownFPClasses) != Mask) {
1178 II.setArgOperand(
1179 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1180 return &II;
1181 }
1182
1183 // If none of the tests which can return false are possible, fold to true.
1184 // fp_class (nnan x), ~(qnan|snan) -> true
1185 // fp_class (ninf x), ~(ninf|pinf) -> true
1186 if (Mask == Known.KnownFPClasses)
1187 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1188
1189 return nullptr;
1190}
1191
1192static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1193 KnownBits Known = computeKnownBits(Op, SQ);
1194 if (Known.isNonNegative())
1195 return false;
1196 if (Known.isNegative())
1197 return true;
1198
1199 Value *X, *Y;
1200 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1202
1203 return std::nullopt;
1204}
1205
1206static std::optional<bool> getKnownSignOrZero(Value *Op,
1207 const SimplifyQuery &SQ) {
1208 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1209 return Sign;
1210
1211 Value *X, *Y;
1212 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1214
1215 return std::nullopt;
1216}
1217
1218/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1219static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1220 const SimplifyQuery &SQ) {
1221 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1222 if (!Known1)
1223 return false;
1224 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1225 if (!Known0)
1226 return false;
1227 return *Known0 == *Known1;
1228}
1229
1230// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1231//
1232// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1233// produce 0 or inf.
1234static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1235 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1236 if (!APFloat::semanticsHasInf(FltSem))
1237 return false;
1238
1239 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1240 // reasonable fp type (for example, `double` only has 11 exponent bits).
1241 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1242 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1243 int SignedMin = static_cast<int>(minIntN(ExpBits));
1244 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1246 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1248 return ScaledUp.isInfinity() && ScaledDown.isZero();
1249}
1250
1251/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1252/// can trigger other combines.
1254 InstCombiner::BuilderTy &Builder) {
1255 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1256 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1257 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1258 "Expected a min or max intrinsic");
1259
1260 // TODO: Match vectors with undef elements, but undef may not propagate.
1261 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1262 Value *X;
1263 const APInt *C0, *C1;
1264 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1265 !match(Op1, m_APInt(C1)))
1266 return nullptr;
1267
1268 // Check for necessary no-wrap and overflow constraints.
1269 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1270 auto *Add = cast<BinaryOperator>(Op0);
1271 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1272 (!IsSigned && !Add->hasNoUnsignedWrap()))
1273 return nullptr;
1274
1275 // If the constant difference overflows, then instsimplify should reduce the
1276 // min/max to the add or C1.
1277 bool Overflow;
1278 APInt CDiff =
1279 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1280 assert(!Overflow && "Expected simplify of min/max");
1281
1282 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1283 // Note: the "mismatched" no-overflow setting does not propagate.
1284 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1285 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1286 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1287 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1288}
1289/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1290Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1291 Type *Ty = MinMax1.getType();
1292
1293 // We are looking for a tree of:
1294 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1295 // Where the min and max could be reversed
1296 Instruction *MinMax2;
1297 BinaryOperator *AddSub;
1298 const APInt *MinValue, *MaxValue;
1299 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1300 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1301 return nullptr;
1302 } else if (match(&MinMax1,
1303 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1304 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1305 return nullptr;
1306 } else
1307 return nullptr;
1308
1309 // Check that the constants clamp a saturate, and that the new type would be
1310 // sensible to convert to.
1311 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1312 return nullptr;
1313 // In what bitwidth can this be treated as saturating arithmetics?
1314 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1315 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1316 // good first approximation for what should be done there.
1317 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1318 return nullptr;
1319
1320 // Also make sure that the inner min/max and the add/sub have one use.
1321 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1322 return nullptr;
1323
1324 // Create the new type (which can be a vector type)
1325 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1326
1327 Intrinsic::ID IntrinsicID;
1328 if (AddSub->getOpcode() == Instruction::Add)
1329 IntrinsicID = Intrinsic::sadd_sat;
1330 else if (AddSub->getOpcode() == Instruction::Sub)
1331 IntrinsicID = Intrinsic::ssub_sat;
1332 else
1333 return nullptr;
1334
1335 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1336 // is usually achieved via a sext from a smaller type.
1337 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1338 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1339 return nullptr;
1340
1341 // Finally create and return the sat intrinsic, truncated to the new type
1342 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1343 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1344 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1345 return CastInst::Create(Instruction::SExt, Sat, Ty);
1346}
1347
1348
1349/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1350/// can only be one of two possible constant values -- turn that into a select
1351/// of constants.
1353 InstCombiner::BuilderTy &Builder) {
1354 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1355 Value *X;
1356 const APInt *C0, *C1;
1357 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1358 return nullptr;
1359
1361 switch (II->getIntrinsicID()) {
1362 case Intrinsic::smax:
1363 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1364 Pred = ICmpInst::ICMP_SGT;
1365 break;
1366 case Intrinsic::smin:
1367 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1368 Pred = ICmpInst::ICMP_SLT;
1369 break;
1370 case Intrinsic::umax:
1371 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1372 Pred = ICmpInst::ICMP_UGT;
1373 break;
1374 case Intrinsic::umin:
1375 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1376 Pred = ICmpInst::ICMP_ULT;
1377 break;
1378 default:
1379 llvm_unreachable("Expected min/max intrinsic");
1380 }
1381 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1382 return nullptr;
1383
1384 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1385 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1386 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1387 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1388}
1389
1390/// If this min/max has a constant operand and an operand that is a matching
1391/// min/max with a constant operand, constant-fold the 2 constant operands.
1393 IRBuilderBase &Builder,
1394 const SimplifyQuery &SQ) {
1395 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1396 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1397 if (!LHS)
1398 return nullptr;
1399
1400 Constant *C0, *C1;
1401 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1402 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1403 return nullptr;
1404
1405 // max (max X, C0), C1 --> max X, (max C0, C1)
1406 // min (min X, C0), C1 --> min X, (min C0, C1)
1407 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1408 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1409 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1410 if (InnerMinMaxID != MinMaxID &&
1411 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1412 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1413 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1414 return nullptr;
1415
1417 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1418 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1419 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1420 {LHS->getArgOperand(0), NewC});
1421}
1422
1423/// If this min/max has a matching min/max operand with a constant, try to push
1424/// the constant operand into this instruction. This can enable more folds.
1425static Instruction *
1427 InstCombiner::BuilderTy &Builder) {
1428 // Match and capture a min/max operand candidate.
1429 Value *X, *Y;
1430 Constant *C;
1431 Instruction *Inner;
1433 m_Instruction(Inner),
1435 m_Value(Y))))
1436 return nullptr;
1437
1438 // The inner op must match. Check for constants to avoid infinite loops.
1439 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1440 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1441 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1443 return nullptr;
1444
1445 // max (max X, C), Y --> max (max X, Y), C
1447 MinMaxID, II->getType());
1448 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1449 NewInner->takeName(Inner);
1450 return CallInst::Create(MinMax, {NewInner, C});
1451}
1452
1453/// Reduce a sequence of min/max intrinsics with a common operand.
1455 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1456 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1457 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1458 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1459 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1460 RHS->getIntrinsicID() != MinMaxID ||
1461 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1462 return nullptr;
1463
1464 Value *A = LHS->getArgOperand(0);
1465 Value *B = LHS->getArgOperand(1);
1466 Value *C = RHS->getArgOperand(0);
1467 Value *D = RHS->getArgOperand(1);
1468
1469 // Look for a common operand.
1470 Value *MinMaxOp = nullptr;
1471 Value *ThirdOp = nullptr;
1472 if (LHS->hasOneUse()) {
1473 // If the LHS is only used in this chain and the RHS is used outside of it,
1474 // reuse the RHS min/max because that will eliminate the LHS.
1475 if (D == A || C == A) {
1476 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1477 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1478 MinMaxOp = RHS;
1479 ThirdOp = B;
1480 } else if (D == B || C == B) {
1481 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1482 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1483 MinMaxOp = RHS;
1484 ThirdOp = A;
1485 }
1486 } else {
1487 assert(RHS->hasOneUse() && "Expected one-use operand");
1488 // Reuse the LHS. This will eliminate the RHS.
1489 if (D == A || D == B) {
1490 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1491 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1492 MinMaxOp = LHS;
1493 ThirdOp = C;
1494 } else if (C == A || C == B) {
1495 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1496 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1497 MinMaxOp = LHS;
1498 ThirdOp = D;
1499 }
1500 }
1501
1502 if (!MinMaxOp || !ThirdOp)
1503 return nullptr;
1504
1505 Module *Mod = II->getModule();
1506 Function *MinMax =
1507 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1508 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1509}
1510
1511/// If all arguments of the intrinsic are unary shuffles with the same mask,
1512/// try to shuffle after the intrinsic.
1515 if (!II->getType()->isVectorTy() ||
1516 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1517 !II->getCalledFunction()->isSpeculatable())
1518 return nullptr;
1519
1520 Value *X;
1521 Constant *C;
1522 ArrayRef<int> Mask;
1523 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1524 return isa<Constant>(Arg.get()) ||
1525 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1526 Arg.getOperandNo(), nullptr);
1527 });
1528 if (!NonConstArg ||
1529 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1530 return nullptr;
1531
1532 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1533 // instructions.
1534 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1535 return nullptr;
1536
1537 // See if all arguments are shuffled with the same mask.
1539 Type *SrcTy = X->getType();
1540 for (Use &Arg : II->args()) {
1541 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1542 Arg.getOperandNo(), nullptr))
1543 NewArgs.push_back(Arg);
1544 else if (match(&Arg,
1545 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1546 X->getType() == SrcTy)
1547 NewArgs.push_back(X);
1548 else if (match(&Arg, m_ImmConstant(C))) {
1549 // If it's a constant, try find the constant that would be shuffled to C.
1550 if (Constant *ShuffledC =
1551 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1552 NewArgs.push_back(ShuffledC);
1553 else
1554 return nullptr;
1555 } else
1556 return nullptr;
1557 }
1558
1559 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1560 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1561 // Result type might be a different vector width.
1562 // TODO: Check that the result type isn't widened?
1563 VectorType *ResTy =
1564 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1565 Value *NewIntrinsic =
1566 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1567 return new ShuffleVectorInst(NewIntrinsic, Mask);
1568}
1569
1570/// If all arguments of the intrinsic are reverses, try to pull the reverse
1571/// after the intrinsic.
1573 if (!II->getType()->isVectorTy() ||
1574 !isTriviallyVectorizable(II->getIntrinsicID()))
1575 return nullptr;
1576
1577 // At least 1 operand must be a reverse with 1 use because we are creating 2
1578 // instructions.
1579 if (none_of(II->args(), [](Value *V) {
1580 return match(V, m_OneUse(m_VecReverse(m_Value())));
1581 }))
1582 return nullptr;
1583
1584 Value *X;
1585 Constant *C;
1586 SmallVector<Value *> NewArgs;
1587 for (Use &Arg : II->args()) {
1588 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1589 Arg.getOperandNo(), nullptr))
1590 NewArgs.push_back(Arg);
1591 else if (match(&Arg, m_VecReverse(m_Value(X))))
1592 NewArgs.push_back(X);
1593 else if (isSplatValue(Arg))
1594 NewArgs.push_back(Arg);
1595 else if (match(&Arg, m_ImmConstant(C)))
1596 NewArgs.push_back(Builder.CreateVectorReverse(C));
1597 else
1598 return nullptr;
1599 }
1600
1601 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1602 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1603 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1604 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1605 return Builder.CreateVectorReverse(NewIntrinsic);
1606}
1607
1608/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1609/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1610/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1611template <Intrinsic::ID IntrID>
1613 InstCombiner::BuilderTy &Builder) {
1614 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1615 "This helper only supports BSWAP and BITREVERSE intrinsics");
1616
1617 Value *X, *Y;
1618 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1619 // don't match ConstantExpr that aren't meaningful for this transform.
1622 Value *OldReorderX, *OldReorderY;
1624
1625 // If both X and Y are bswap/bitreverse, the transform reduces the number
1626 // of instructions even if there's multiuse.
1627 // If only one operand is bswap/bitreverse, we need to ensure the operand
1628 // have only one use.
1629 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1630 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1631 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1632 }
1633
1634 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1635 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1636 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1637 }
1638
1639 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1640 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1641 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1642 }
1643 }
1644 return nullptr;
1645}
1646
1647/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1648/// `f(f(x, y), y) == f(x, y)` holds.
1650 switch (IID) {
1651 case Intrinsic::smax:
1652 case Intrinsic::smin:
1653 case Intrinsic::umax:
1654 case Intrinsic::umin:
1655 case Intrinsic::maximum:
1656 case Intrinsic::minimum:
1657 case Intrinsic::maximumnum:
1658 case Intrinsic::minimumnum:
1659 case Intrinsic::maxnum:
1660 case Intrinsic::minnum:
1661 return true;
1662 default:
1663 return false;
1664 }
1665}
1666
1667/// Attempt to simplify value-accumulating recurrences of kind:
1668/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1669/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1670/// And let the idempotent binary intrinsic be hoisted, when the operands are
1671/// known to be loop-invariant.
1673 IntrinsicInst *II) {
1674 PHINode *PN;
1675 Value *Init, *OtherOp;
1676
1677 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1678 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1679 auto IID = II->getIntrinsicID();
1680 if (!isIdempotentBinaryIntrinsic(IID) ||
1682 !IC.getDominatorTree().dominates(OtherOp, PN))
1683 return nullptr;
1684
1685 auto *InvariantBinaryInst =
1686 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1687 if (isa<FPMathOperator>(InvariantBinaryInst))
1688 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1689 return InvariantBinaryInst;
1690}
1691
1692static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1693 if (!CanReorderLanes)
1694 return nullptr;
1695
1696 Value *V;
1697 if (match(Arg, m_VecReverse(m_Value(V))))
1698 return V;
1699
1700 ArrayRef<int> Mask;
1701 if (!isa<FixedVectorType>(Arg->getType()) ||
1702 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1703 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1704 return nullptr;
1705
1706 int Sz = Mask.size();
1707 SmallBitVector UsedIndices(Sz);
1708 for (int Idx : Mask) {
1709 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1710 return nullptr;
1711 UsedIndices.set(Idx);
1712 }
1713
1714 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1715 // other changes.
1716 return UsedIndices.all() ? V : nullptr;
1717}
1718
1719/// Fold an unsigned minimum of trailing or leading zero bits counts:
1720/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1721/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1722/// >> ConstOp))
1723/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1724/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1725template <Intrinsic::ID IntrID>
1726static Value *
1728 const DataLayout &DL,
1729 InstCombiner::BuilderTy &Builder) {
1730 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1731 "This helper only supports cttz and ctlz intrinsics");
1732
1733 Value *CtOp1, *CtOp2;
1734 Value *ZeroUndef1, *ZeroUndef2;
1735 if (!match(I0, m_OneUse(
1736 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1737 return nullptr;
1738
1739 if (match(I1,
1740 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1741 return Builder.CreateBinaryIntrinsic(
1742 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1743 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1744
1745 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1746 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1747 if (!match(I1, m_CheckedInt(LessBitWidth)))
1748 // We have a constant >= BitWidth (which can be handled by CVP)
1749 // or a non-splat vector with elements < and >= BitWidth
1750 return nullptr;
1751
1752 Type *Ty = I1->getType();
1754 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1755 IntrID == Intrinsic::cttz
1756 ? ConstantInt::get(Ty, 1)
1757 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1758 cast<Constant>(I1), DL);
1759 return Builder.CreateBinaryIntrinsic(
1760 IntrID, Builder.CreateOr(CtOp1, NewConst),
1761 ConstantInt::getTrue(ZeroUndef1->getType()));
1762}
1763
1764/// Return whether "X LOp (Y ROp Z)" is always equal to
1765/// "(X LOp Y) ROp (X LOp Z)".
1767 bool HasNSW, Intrinsic::ID ROp) {
1768 switch (ROp) {
1769 case Intrinsic::umax:
1770 case Intrinsic::umin:
1771 if (HasNUW && LOp == Instruction::Add)
1772 return true;
1773 if (HasNUW && LOp == Instruction::Shl)
1774 return true;
1775 return false;
1776 case Intrinsic::smax:
1777 case Intrinsic::smin:
1778 return HasNSW && LOp == Instruction::Add;
1779 default:
1780 return false;
1781 }
1782}
1783
1784/// Return whether "(X ROp Y) LOp Z" is always equal to
1785/// "(X LOp Z) ROp (Y LOp Z)".
1787 bool HasNSW, Intrinsic::ID ROp) {
1788 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1789 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1790 switch (ROp) {
1791 case Intrinsic::umax:
1792 case Intrinsic::umin:
1793 return HasNUW && LOp == Instruction::Sub;
1794 case Intrinsic::smax:
1795 case Intrinsic::smin:
1796 return HasNSW && LOp == Instruction::Sub;
1797 default:
1798 return false;
1799 }
1800}
1801
1802// Attempts to factorise a common term
1803// in an instruction that has the form "(A op' B) op (C op' D)
1804// where op is an intrinsic and op' is a binop
1805static Value *
1807 InstCombiner::BuilderTy &Builder) {
1808 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1809 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1810
1813
1814 if (!Op0 || !Op1)
1815 return nullptr;
1816
1817 if (Op0->getOpcode() != Op1->getOpcode())
1818 return nullptr;
1819
1820 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1821 return nullptr;
1822
1823 Instruction::BinaryOps InnerOpcode =
1824 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1825 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1826 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1827
1828 Value *A = Op0->getOperand(0);
1829 Value *B = Op0->getOperand(1);
1830 Value *C = Op1->getOperand(0);
1831 Value *D = Op1->getOperand(1);
1832
1833 // Attempts to swap variables such that A equals C or B equals D,
1834 // if the inner operation is commutative.
1835 if (Op0->isCommutative() && A != C && B != D) {
1836 if (A == D || B == C)
1837 std::swap(C, D);
1838 else
1839 return nullptr;
1840 }
1841
1842 BinaryOperator *NewBinop;
1843 if (A == C &&
1844 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1845 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1846 NewBinop =
1847 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1848 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1849 TopLevelOpcode)) {
1850 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1851 NewBinop =
1852 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1853 } else {
1854 return nullptr;
1855 }
1856
1857 NewBinop->setHasNoUnsignedWrap(HasNUW);
1858 NewBinop->setHasNoSignedWrap(HasNSW);
1859
1860 return NewBinop;
1861}
1862
1864 Value *Arg0 = II->getArgOperand(0);
1865 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1866 if (!ShiftConst)
1867 return nullptr;
1868
1869 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1870 bool AllPositive = true;
1871 bool AllNegative = true;
1872
1873 auto Check = [&](Constant *C) -> bool {
1874 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1875 const APInt &V = CI->getValue();
1876 if (V.isNonNegative()) {
1877 AllNegative = false;
1878 return AllPositive && V.ult(ElemBits);
1879 }
1880 AllPositive = false;
1881 return AllNegative && V.sgt(-ElemBits);
1882 }
1883 return false;
1884 };
1885
1886 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1887 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1888 if (!Check(ShiftConst->getAggregateElement(I)))
1889 return nullptr;
1890 }
1891
1892 } else if (!Check(ShiftConst))
1893 return nullptr;
1894
1895 IRBuilderBase &B = IC.Builder;
1896 if (AllPositive)
1897 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1898
1899 Value *NegAmt = B.CreateNeg(ShiftConst);
1900 Intrinsic::ID IID = II->getIntrinsicID();
1901 const bool IsSigned =
1902 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1903 Value *Result =
1904 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1905 return IC.replaceInstUsesWith(*II, Result);
1906}
1907
1908/// CallInst simplification. This mostly only handles folding of intrinsic
1909/// instructions. For normal calls, it allows visitCallBase to do the heavy
1910/// lifting.
1912 // Don't try to simplify calls without uses. It will not do anything useful,
1913 // but will result in the following folds being skipped.
1914 if (!CI.use_empty()) {
1915 SmallVector<Value *, 8> Args(CI.args());
1916 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1917 SQ.getWithInstruction(&CI)))
1918 return replaceInstUsesWith(CI, V);
1919 }
1920
1921 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1922 return visitFree(CI, FreedOp);
1923
1924 // If the caller function (i.e. us, the function that contains this CallInst)
1925 // is nounwind, mark the call as nounwind, even if the callee isn't.
1926 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1927 CI.setDoesNotThrow();
1928 return &CI;
1929 }
1930
1932 if (!II)
1933 return visitCallBase(CI);
1934
1935 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1936 // instead of in visitCallBase.
1937 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1938 if (auto NumBytes = MI->getLengthInBytes()) {
1939 // memmove/cpy/set of zero bytes is a noop.
1940 if (NumBytes->isZero())
1941 return eraseInstFromFunction(CI);
1942
1943 // For atomic unordered mem intrinsics if len is not a positive or
1944 // not a multiple of element size then behavior is undefined.
1945 if (MI->isAtomic() &&
1946 (NumBytes->isNegative() ||
1947 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1949 assert(MI->getType()->isVoidTy() &&
1950 "non void atomic unordered mem intrinsic");
1951 return eraseInstFromFunction(*MI);
1952 }
1953 }
1954
1955 // No other transformations apply to volatile transfers.
1956 if (MI->isVolatile())
1957 return nullptr;
1958
1960 // memmove(x,x,size) -> noop.
1961 if (MTI->getSource() == MTI->getDest())
1962 return eraseInstFromFunction(CI);
1963 }
1964
1965 auto IsPointerUndefined = [MI](Value *Ptr) {
1966 return isa<ConstantPointerNull>(Ptr) &&
1968 MI->getFunction(),
1969 cast<PointerType>(Ptr->getType())->getAddressSpace());
1970 };
1971 bool SrcIsUndefined = false;
1972 // If we can determine a pointer alignment that is bigger than currently
1973 // set, update the alignment.
1974 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1976 return I;
1977 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1978 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1979 if (Instruction *I = SimplifyAnyMemSet(MSI))
1980 return I;
1981 }
1982
1983 // If src/dest is null, this memory intrinsic must be a noop.
1984 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1985 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1986 return eraseInstFromFunction(CI);
1987 }
1988
1989 // If we have a memmove and the source operation is a constant global,
1990 // then the source and dest pointers can't alias, so we can change this
1991 // into a call to memcpy.
1992 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1993 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1994 if (GVSrc->isConstant()) {
1995 Module *M = CI.getModule();
1996 Intrinsic::ID MemCpyID =
1997 MMI->isAtomic()
1998 ? Intrinsic::memcpy_element_unordered_atomic
1999 : Intrinsic::memcpy;
2000 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2001 CI.getArgOperand(1)->getType(),
2002 CI.getArgOperand(2)->getType() };
2004 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2005 return II;
2006 }
2007 }
2008 }
2009
2010 // For fixed width vector result intrinsics, use the generic demanded vector
2011 // support.
2012 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2013 auto VWidth = IIFVTy->getNumElements();
2014 APInt PoisonElts(VWidth, 0);
2015 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2016 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2017 if (V != II)
2018 return replaceInstUsesWith(*II, V);
2019 return II;
2020 }
2021 }
2022
2023 if (II->isCommutative()) {
2024 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2025 replaceOperand(*II, 0, Pair->first);
2026 replaceOperand(*II, 1, Pair->second);
2027 return II;
2028 }
2029
2030 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2031 return NewCall;
2032 }
2033
2034 // Unused constrained FP intrinsic calls may have declared side effect, which
2035 // prevents it from being removed. In some cases however the side effect is
2036 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2037 // returns a replacement, the call may be removed.
2038 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2039 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2040 return eraseInstFromFunction(CI);
2041 }
2042
2043 Intrinsic::ID IID = II->getIntrinsicID();
2044 switch (IID) {
2045 case Intrinsic::objectsize: {
2046 SmallVector<Instruction *> InsertedInstructions;
2047 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2048 &InsertedInstructions)) {
2049 for (Instruction *Inserted : InsertedInstructions)
2050 Worklist.add(Inserted);
2051 return replaceInstUsesWith(CI, V);
2052 }
2053 return nullptr;
2054 }
2055 case Intrinsic::abs: {
2056 Value *IIOperand = II->getArgOperand(0);
2057 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2058
2059 // abs(-x) -> abs(x)
2060 Value *X;
2061 if (match(IIOperand, m_Neg(m_Value(X)))) {
2062 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2063 replaceOperand(*II, 1, Builder.getTrue());
2064 return replaceOperand(*II, 0, X);
2065 }
2066 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2067 return replaceOperand(*II, 0, X);
2068
2069 Value *Y;
2070 // abs(a * abs(b)) -> abs(a * b)
2071 if (match(IIOperand,
2074 bool NSW =
2075 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2076 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2077 return replaceOperand(*II, 0, XY);
2078 }
2079
2080 if (std::optional<bool> Known =
2081 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2082 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2083 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2084 if (!*Known)
2085 return replaceInstUsesWith(*II, IIOperand);
2086
2087 // abs(x) -> -x if x < 0
2088 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2089 if (IntMinIsPoison)
2090 return BinaryOperator::CreateNSWNeg(IIOperand);
2091 return BinaryOperator::CreateNeg(IIOperand);
2092 }
2093
2094 // abs (sext X) --> zext (abs X*)
2095 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2096 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2097 Value *NarrowAbs =
2098 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2099 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2100 }
2101
2102 // Match a complicated way to check if a number is odd/even:
2103 // abs (srem X, 2) --> and X, 1
2104 const APInt *C;
2105 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2106 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2107
2108 break;
2109 }
2110 case Intrinsic::umin: {
2111 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2112 // umin(x, 1) == zext(x != 0)
2113 if (match(I1, m_One())) {
2114 assert(II->getType()->getScalarSizeInBits() != 1 &&
2115 "Expected simplify of umin with max constant");
2116 Value *Zero = Constant::getNullValue(I0->getType());
2117 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2118 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2119 }
2120 // umin(cttz(x), const) --> cttz(x | (1 << const))
2121 if (Value *FoldedCttz =
2123 I0, I1, DL, Builder))
2124 return replaceInstUsesWith(*II, FoldedCttz);
2125 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2126 if (Value *FoldedCtlz =
2128 I0, I1, DL, Builder))
2129 return replaceInstUsesWith(*II, FoldedCtlz);
2130 [[fallthrough]];
2131 }
2132 case Intrinsic::umax: {
2133 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2134 Value *X, *Y;
2135 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2136 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2137 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2138 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2139 }
2140 Constant *C;
2141 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2142 I0->hasOneUse()) {
2143 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2144 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2145 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2146 }
2147 }
2148 // If C is not 0:
2149 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2150 // If C is not 0 or 1:
2151 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2152 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2153 const APInt *C;
2154 Value *X;
2155 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2156 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2157 return nullptr;
2158 if (C->isZero())
2159 return nullptr;
2160 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2161 return nullptr;
2162
2163 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2164 Value *NewSelect = nullptr;
2165 NewSelect = Builder.CreateSelectWithUnknownProfile(
2166 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2167 return replaceInstUsesWith(*II, NewSelect);
2168 };
2169
2170 if (IID == Intrinsic::umax) {
2171 if (Instruction *I = foldMaxMulShift(I0, I1))
2172 return I;
2173 if (Instruction *I = foldMaxMulShift(I1, I0))
2174 return I;
2175 }
2176
2177 // If both operands of unsigned min/max are sign-extended, it is still ok
2178 // to narrow the operation.
2179 [[fallthrough]];
2180 }
2181 case Intrinsic::smax:
2182 case Intrinsic::smin: {
2183 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2184 Value *X, *Y;
2185 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2186 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2187 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2188 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2189 }
2190
2191 Constant *C;
2192 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2193 I0->hasOneUse()) {
2194 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2195 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2196 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2197 }
2198 }
2199
2200 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2201 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2202 const APInt *MinC, *MaxC;
2203 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2204 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2205 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2206 Value *NewMax = Builder.CreateBinaryIntrinsic(
2207 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2208 return replaceInstUsesWith(
2209 *II, Builder.CreateBinaryIntrinsic(
2210 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2211 };
2212 if (IID == Intrinsic::smax &&
2214 m_APInt(MinC)))) &&
2215 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2216 return CreateCanonicalClampForm(true);
2217 if (IID == Intrinsic::umax &&
2219 m_APInt(MinC)))) &&
2220 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2221 return CreateCanonicalClampForm(false);
2222
2223 // umin(i1 X, i1 Y) -> and i1 X, Y
2224 // smax(i1 X, i1 Y) -> and i1 X, Y
2225 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2226 II->getType()->isIntOrIntVectorTy(1)) {
2227 return BinaryOperator::CreateAnd(I0, I1);
2228 }
2229
2230 // umax(i1 X, i1 Y) -> or i1 X, Y
2231 // smin(i1 X, i1 Y) -> or i1 X, Y
2232 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2233 II->getType()->isIntOrIntVectorTy(1)) {
2234 return BinaryOperator::CreateOr(I0, I1);
2235 }
2236
2237 // smin(smax(X, -1), 1) -> scmp(X, 0)
2238 // smax(smin(X, 1), -1) -> scmp(X, 0)
2239 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2240 // And i1's have been changed to and/ors
2241 // So we only need to check for smin
2242 if (IID == Intrinsic::smin) {
2243 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2244 match(I1, m_One())) {
2245 Value *Zero = ConstantInt::get(X->getType(), 0);
2246 return replaceInstUsesWith(
2247 CI,
2248 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2249 }
2250 }
2251
2252 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2253 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2254 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2255 // TODO: Canonicalize neg after min/max if I1 is constant.
2256 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2257 (I0->hasOneUse() || I1->hasOneUse())) {
2259 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2260 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2261 }
2262 }
2263
2264 // (umax X, (xor X, Pow2))
2265 // -> (or X, Pow2)
2266 // (umin X, (xor X, Pow2))
2267 // -> (and X, ~Pow2)
2268 // (smax X, (xor X, Pos_Pow2))
2269 // -> (or X, Pos_Pow2)
2270 // (smin X, (xor X, Pos_Pow2))
2271 // -> (and X, ~Pos_Pow2)
2272 // (smax X, (xor X, Neg_Pow2))
2273 // -> (and X, ~Neg_Pow2)
2274 // (smin X, (xor X, Neg_Pow2))
2275 // -> (or X, Neg_Pow2)
2276 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2277 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2278 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2279 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2280 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2281
2282 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2283 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2284 if (KnownSign == std::nullopt) {
2285 UseOr = false;
2286 UseAndN = false;
2287 } else if (*KnownSign /* true is Signed. */) {
2288 UseOr ^= true;
2289 UseAndN ^= true;
2290 Type *Ty = I0->getType();
2291 // Negative power of 2 must be IntMin. It's possible to be able to
2292 // prove negative / power of 2 without actually having known bits, so
2293 // just get the value by hand.
2295 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2296 }
2297 }
2298 if (UseOr)
2299 return BinaryOperator::CreateOr(I0, X);
2300 else if (UseAndN)
2301 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2302 }
2303
2304 // If we can eliminate ~A and Y is free to invert:
2305 // max ~A, Y --> ~(min A, ~Y)
2306 //
2307 // Examples:
2308 // max ~A, ~Y --> ~(min A, Y)
2309 // max ~A, C --> ~(min A, ~C)
2310 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2311 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2312 Value *A;
2313 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2314 !isFreeToInvert(A, A->hasOneUse())) {
2315 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2317 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2318 return BinaryOperator::CreateNot(InvMaxMin);
2319 }
2320 }
2321 return nullptr;
2322 };
2323
2324 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2325 return I;
2326 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2327 return I;
2328
2330 return I;
2331
2332 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2333 const APInt *RHSC;
2334 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2335 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2336 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2337 ConstantInt::get(II->getType(), *RHSC));
2338
2339 // smax(X, -X) --> abs(X)
2340 // smin(X, -X) --> -abs(X)
2341 // umax(X, -X) --> -abs(X)
2342 // umin(X, -X) --> abs(X)
2343 if (isKnownNegation(I0, I1)) {
2344 // We can choose either operand as the input to abs(), but if we can
2345 // eliminate the only use of a value, that's better for subsequent
2346 // transforms/analysis.
2347 if (I0->hasOneUse() && !I1->hasOneUse())
2348 std::swap(I0, I1);
2349
2350 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2351 // operation and potentially its negation.
2352 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2353 Value *Abs = Builder.CreateBinaryIntrinsic(
2354 Intrinsic::abs, I0,
2355 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2356
2357 // We don't have a "nabs" intrinsic, so negate if needed based on the
2358 // max/min operation.
2359 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2360 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2361 return replaceInstUsesWith(CI, Abs);
2362 }
2363
2365 return Sel;
2366
2367 if (Instruction *SAdd = matchSAddSubSat(*II))
2368 return SAdd;
2369
2370 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2371 return replaceInstUsesWith(*II, NewMinMax);
2372
2374 return R;
2375
2376 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2377 return NewMinMax;
2378
2379 // Try to fold minmax with constant RHS based on range information
2380 if (match(I1, m_APIntAllowPoison(RHSC))) {
2381 ICmpInst::Predicate Pred =
2383 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2385 I0, IsSigned, SQ.getWithInstruction(II));
2386 if (!LHS_CR.isFullSet()) {
2387 if (LHS_CR.icmp(Pred, *RHSC))
2388 return replaceInstUsesWith(*II, I0);
2389 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2390 return replaceInstUsesWith(*II,
2391 ConstantInt::get(II->getType(), *RHSC));
2392 }
2393 }
2394
2396 return replaceInstUsesWith(*II, V);
2397
2398 break;
2399 }
2400 case Intrinsic::scmp: {
2401 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2402 Value *LHS, *RHS;
2403 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2404 return replaceInstUsesWith(
2405 CI,
2406 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2407 break;
2408 }
2409 case Intrinsic::bitreverse: {
2410 Value *IIOperand = II->getArgOperand(0);
2411 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2412 Value *X;
2413 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2414 X->getType()->isIntOrIntVectorTy(1)) {
2415 Type *Ty = II->getType();
2416 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2417 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2419 }
2420
2421 if (Instruction *crossLogicOpFold =
2423 return crossLogicOpFold;
2424
2425 break;
2426 }
2427 case Intrinsic::bswap: {
2428 Value *IIOperand = II->getArgOperand(0);
2429
2430 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2431 // inverse-shift-of-bswap:
2432 // bswap (shl X, Y) --> lshr (bswap X), Y
2433 // bswap (lshr X, Y) --> shl (bswap X), Y
2434 Value *X, *Y;
2435 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2436 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2438 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2439 BinaryOperator::BinaryOps InverseShift =
2440 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2441 ? Instruction::LShr
2442 : Instruction::Shl;
2443 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2444 }
2445 }
2446
2447 KnownBits Known = computeKnownBits(IIOperand, II);
2448 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2449 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2450 unsigned BW = Known.getBitWidth();
2451
2452 // bswap(x) -> shift(x) if x has exactly one "active byte"
2453 if (BW - LZ - TZ == 8) {
2454 assert(LZ != TZ && "active byte cannot be in the middle");
2455 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2456 return BinaryOperator::CreateNUWShl(
2457 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2458 // -> lshr(x) if the "active byte" is in the high part of x
2459 return BinaryOperator::CreateExactLShr(
2460 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2461 }
2462
2463 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2464 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2465 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2466 Value *CV = ConstantInt::get(X->getType(), C);
2467 Value *V = Builder.CreateLShr(X, CV);
2468 return new TruncInst(V, IIOperand->getType());
2469 }
2470
2471 if (Instruction *crossLogicOpFold =
2473 return crossLogicOpFold;
2474 }
2475
2476 // Try to fold into bitreverse if bswap is the root of the expression tree.
2477 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2478 /*MatchBitReversals*/ true))
2479 return BitOp;
2480 break;
2481 }
2482 case Intrinsic::masked_load:
2483 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2484 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2485 break;
2486 case Intrinsic::masked_store:
2487 return simplifyMaskedStore(*II);
2488 case Intrinsic::masked_gather:
2489 return simplifyMaskedGather(*II);
2490 case Intrinsic::masked_scatter:
2491 return simplifyMaskedScatter(*II);
2492 case Intrinsic::launder_invariant_group:
2493 case Intrinsic::strip_invariant_group:
2494 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2495 return replaceInstUsesWith(*II, SkippedBarrier);
2496 break;
2497 case Intrinsic::powi:
2498 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2499 // 0 and 1 are handled in instsimplify
2500 // powi(x, -1) -> 1/x
2501 if (Power->isMinusOne())
2502 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2503 II->getArgOperand(0), II);
2504 // powi(x, 2) -> x*x
2505 if (Power->equalsInt(2))
2506 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2507 II->getArgOperand(0), II);
2508
2509 if (!Power->getValue()[0]) {
2510 Value *X;
2511 // If power is even:
2512 // powi(-x, p) -> powi(x, p)
2513 // powi(fabs(x), p) -> powi(x, p)
2514 // powi(copysign(x, y), p) -> powi(x, p)
2515 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2516 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2517 match(II->getArgOperand(0),
2519 return replaceOperand(*II, 0, X);
2520 }
2521 }
2522 break;
2523
2524 case Intrinsic::cttz:
2525 case Intrinsic::ctlz:
2526 if (auto *I = foldCttzCtlz(*II, *this))
2527 return I;
2528 break;
2529
2530 case Intrinsic::ctpop:
2531 if (auto *I = foldCtpop(*II, *this))
2532 return I;
2533 break;
2534
2535 case Intrinsic::fshl:
2536 case Intrinsic::fshr: {
2537 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2538 Type *Ty = II->getType();
2539 unsigned BitWidth = Ty->getScalarSizeInBits();
2540 Constant *ShAmtC;
2541 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2542 // Canonicalize a shift amount constant operand to modulo the bit-width.
2543 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2544 Constant *ModuloC =
2545 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2546 if (!ModuloC)
2547 return nullptr;
2548 if (ModuloC != ShAmtC)
2549 return replaceOperand(*II, 2, ModuloC);
2550
2552 ShAmtC, DL),
2553 m_One()) &&
2554 "Shift amount expected to be modulo bitwidth");
2555
2556 // Canonicalize funnel shift right by constant to funnel shift left. This
2557 // is not entirely arbitrary. For historical reasons, the backend may
2558 // recognize rotate left patterns but miss rotate right patterns.
2559 if (IID == Intrinsic::fshr) {
2560 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2561 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2562 return nullptr;
2563
2564 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2565 Module *Mod = II->getModule();
2566 Function *Fshl =
2567 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2568 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2569 }
2570 assert(IID == Intrinsic::fshl &&
2571 "All funnel shifts by simple constants should go left");
2572
2573 // fshl(X, 0, C) --> shl X, C
2574 // fshl(X, undef, C) --> shl X, C
2575 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2576 return BinaryOperator::CreateShl(Op0, ShAmtC);
2577
2578 // fshl(0, X, C) --> lshr X, (BW-C)
2579 // fshl(undef, X, C) --> lshr X, (BW-C)
2580 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2581 return BinaryOperator::CreateLShr(Op1,
2582 ConstantExpr::getSub(WidthC, ShAmtC));
2583
2584 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2585 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2586 Module *Mod = II->getModule();
2587 Function *Bswap =
2588 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2589 return CallInst::Create(Bswap, { Op0 });
2590 }
2591 if (Instruction *BitOp =
2592 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2593 /*MatchBitReversals*/ true))
2594 return BitOp;
2595
2596 // R = fshl(X, X, C2)
2597 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2598 Value *InnerOp;
2599 const APInt *ShAmtInnerC, *ShAmtOuterC;
2600 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2601 m_APInt(ShAmtInnerC))) &&
2602 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2603 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2604 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2605 if (Modulo.isZero())
2606 return replaceInstUsesWith(*II, InnerOp);
2607 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2609 {InnerOp, InnerOp, ModuloC});
2610 }
2611 }
2612
2613 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2614 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2615 // if BitWidth is a power-of-2
2616 Value *Y;
2617 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2618 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2619 Module *Mod = II->getModule();
2621 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2622 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2623 }
2624
2625 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2626 // power-of-2
2627 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2628 match(Op1, m_ZeroInt())) {
2629 Value *Op2 = II->getArgOperand(2);
2630 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2631 return BinaryOperator::CreateShl(Op0, And);
2632 }
2633
2634 // Left or right might be masked.
2636 return &CI;
2637
2638 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2639 // so only the low bits of the shift amount are demanded if the bitwidth is
2640 // a power-of-2.
2641 if (!isPowerOf2_32(BitWidth))
2642 break;
2644 KnownBits Op2Known(BitWidth);
2645 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2646 return &CI;
2647 break;
2648 }
2649 case Intrinsic::ptrmask: {
2650 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2651 KnownBits Known(BitWidth);
2653 return II;
2654
2655 Value *InnerPtr, *InnerMask;
2656 bool Changed = false;
2657 // Combine:
2658 // (ptrmask (ptrmask p, A), B)
2659 // -> (ptrmask p, (and A, B))
2660 if (match(II->getArgOperand(0),
2662 m_Value(InnerMask))))) {
2663 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2664 "Mask types must match");
2665 // TODO: If InnerMask == Op1, we could copy attributes from inner
2666 // callsite -> outer callsite.
2667 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2668 replaceOperand(CI, 0, InnerPtr);
2669 replaceOperand(CI, 1, NewMask);
2670 Changed = true;
2671 }
2672
2673 // See if we can deduce non-null.
2674 if (!CI.hasRetAttr(Attribute::NonNull) &&
2675 (Known.isNonZero() ||
2676 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2677 CI.addRetAttr(Attribute::NonNull);
2678 Changed = true;
2679 }
2680
2681 unsigned NewAlignmentLog =
2683 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2684 // Known bits will capture if we had alignment information associated with
2685 // the pointer argument.
2686 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2688 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2689 Changed = true;
2690 }
2691 if (Changed)
2692 return &CI;
2693 break;
2694 }
2695 case Intrinsic::uadd_with_overflow:
2696 case Intrinsic::sadd_with_overflow: {
2697 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2698 return I;
2699
2700 // Given 2 constant operands whose sum does not overflow:
2701 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2702 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2703 Value *X;
2704 const APInt *C0, *C1;
2705 Value *Arg0 = II->getArgOperand(0);
2706 Value *Arg1 = II->getArgOperand(1);
2707 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2708 bool HasNWAdd = IsSigned
2709 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2710 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2711 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2712 bool Overflow;
2713 APInt NewC =
2714 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2715 if (!Overflow)
2716 return replaceInstUsesWith(
2717 *II, Builder.CreateBinaryIntrinsic(
2718 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2719 }
2720 break;
2721 }
2722
2723 case Intrinsic::umul_with_overflow:
2724 case Intrinsic::smul_with_overflow:
2725 case Intrinsic::usub_with_overflow:
2726 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2727 return I;
2728 break;
2729
2730 case Intrinsic::ssub_with_overflow: {
2731 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2732 return I;
2733
2734 Constant *C;
2735 Value *Arg0 = II->getArgOperand(0);
2736 Value *Arg1 = II->getArgOperand(1);
2737 // Given a constant C that is not the minimum signed value
2738 // for an integer of a given bit width:
2739 //
2740 // ssubo X, C -> saddo X, -C
2741 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2742 Value *NegVal = ConstantExpr::getNeg(C);
2743 // Build a saddo call that is equivalent to the discovered
2744 // ssubo call.
2745 return replaceInstUsesWith(
2746 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2747 Arg0, NegVal));
2748 }
2749
2750 break;
2751 }
2752
2753 case Intrinsic::uadd_sat:
2754 case Intrinsic::sadd_sat:
2755 case Intrinsic::usub_sat:
2756 case Intrinsic::ssub_sat: {
2758 Type *Ty = SI->getType();
2759 Value *Arg0 = SI->getLHS();
2760 Value *Arg1 = SI->getRHS();
2761
2762 // Make use of known overflow information.
2763 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2764 Arg0, Arg1, SI);
2765 switch (OR) {
2767 break;
2769 if (SI->isSigned())
2770 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2771 else
2772 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2774 unsigned BitWidth = Ty->getScalarSizeInBits();
2775 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2776 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2777 }
2779 unsigned BitWidth = Ty->getScalarSizeInBits();
2780 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2781 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2782 }
2783 }
2784
2785 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2786 // which after that:
2787 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2788 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2789 Constant *C, *C1;
2790 Value *A;
2791 if (IID == Intrinsic::usub_sat &&
2792 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2793 match(Arg1, m_ImmConstant(C1))) {
2794 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2795 auto *NewSub =
2796 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2797 return replaceInstUsesWith(*SI, NewSub);
2798 }
2799
2800 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2801 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2802 C->isNotMinSignedValue()) {
2803 Value *NegVal = ConstantExpr::getNeg(C);
2804 return replaceInstUsesWith(
2805 *II, Builder.CreateBinaryIntrinsic(
2806 Intrinsic::sadd_sat, Arg0, NegVal));
2807 }
2808
2809 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2810 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2811 // if Val and Val2 have the same sign
2812 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2813 Value *X;
2814 const APInt *Val, *Val2;
2815 APInt NewVal;
2816 bool IsUnsigned =
2817 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2818 if (Other->getIntrinsicID() == IID &&
2819 match(Arg1, m_APInt(Val)) &&
2820 match(Other->getArgOperand(0), m_Value(X)) &&
2821 match(Other->getArgOperand(1), m_APInt(Val2))) {
2822 if (IsUnsigned)
2823 NewVal = Val->uadd_sat(*Val2);
2824 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2825 bool Overflow;
2826 NewVal = Val->sadd_ov(*Val2, Overflow);
2827 if (Overflow) {
2828 // Both adds together may add more than SignedMaxValue
2829 // without saturating the final result.
2830 break;
2831 }
2832 } else {
2833 // Cannot fold saturated addition with different signs.
2834 break;
2835 }
2836
2837 return replaceInstUsesWith(
2838 *II, Builder.CreateBinaryIntrinsic(
2839 IID, X, ConstantInt::get(II->getType(), NewVal)));
2840 }
2841 }
2842 break;
2843 }
2844
2845 case Intrinsic::minnum:
2846 case Intrinsic::maxnum:
2847 case Intrinsic::minimumnum:
2848 case Intrinsic::maximumnum:
2849 case Intrinsic::minimum:
2850 case Intrinsic::maximum: {
2851 Value *Arg0 = II->getArgOperand(0);
2852 Value *Arg1 = II->getArgOperand(1);
2853 Value *X, *Y;
2854 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2855 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2856 // If both operands are negated, invert the call and negate the result:
2857 // min(-X, -Y) --> -(max(X, Y))
2858 // max(-X, -Y) --> -(min(X, Y))
2859 Intrinsic::ID NewIID;
2860 switch (IID) {
2861 case Intrinsic::maxnum:
2862 NewIID = Intrinsic::minnum;
2863 break;
2864 case Intrinsic::minnum:
2865 NewIID = Intrinsic::maxnum;
2866 break;
2867 case Intrinsic::maximumnum:
2868 NewIID = Intrinsic::minimumnum;
2869 break;
2870 case Intrinsic::minimumnum:
2871 NewIID = Intrinsic::maximumnum;
2872 break;
2873 case Intrinsic::maximum:
2874 NewIID = Intrinsic::minimum;
2875 break;
2876 case Intrinsic::minimum:
2877 NewIID = Intrinsic::maximum;
2878 break;
2879 default:
2880 llvm_unreachable("unexpected intrinsic ID");
2881 }
2882 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2883 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2884 FNeg->copyIRFlags(II);
2885 return FNeg;
2886 }
2887
2888 // m(m(X, C2), C1) -> m(X, C)
2889 const APFloat *C1, *C2;
2890 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2891 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2892 ((match(M->getArgOperand(0), m_Value(X)) &&
2893 match(M->getArgOperand(1), m_APFloat(C2))) ||
2894 (match(M->getArgOperand(1), m_Value(X)) &&
2895 match(M->getArgOperand(0), m_APFloat(C2))))) {
2896 APFloat Res(0.0);
2897 switch (IID) {
2898 case Intrinsic::maxnum:
2899 Res = maxnum(*C1, *C2);
2900 break;
2901 case Intrinsic::minnum:
2902 Res = minnum(*C1, *C2);
2903 break;
2904 case Intrinsic::maximumnum:
2905 Res = maximumnum(*C1, *C2);
2906 break;
2907 case Intrinsic::minimumnum:
2908 Res = minimumnum(*C1, *C2);
2909 break;
2910 case Intrinsic::maximum:
2911 Res = maximum(*C1, *C2);
2912 break;
2913 case Intrinsic::minimum:
2914 Res = minimum(*C1, *C2);
2915 break;
2916 default:
2917 llvm_unreachable("unexpected intrinsic ID");
2918 }
2919 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2920 // was a simplification (so Arg0 and its original flags could
2921 // propagate?)
2922 Value *V = Builder.CreateBinaryIntrinsic(
2923 IID, X, ConstantFP::get(Arg0->getType(), Res),
2925 return replaceInstUsesWith(*II, V);
2926 }
2927 }
2928
2929 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2930 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2931 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2932 X->getType() == Y->getType()) {
2933 Value *NewCall =
2934 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2935 return new FPExtInst(NewCall, II->getType());
2936 }
2937
2938 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2939 Constant *C;
2940 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2941 match(Arg1, m_ImmConstant(C))) {
2942 if (Constant *TruncC =
2943 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2944 Value *NewCall =
2945 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2946 return new FPExtInst(NewCall, II->getType());
2947 }
2948 }
2949
2950 // max X, -X --> fabs X
2951 // min X, -X --> -(fabs X)
2952 // TODO: Remove one-use limitation? That is obviously better for max,
2953 // hence why we don't check for one-use for that. However,
2954 // it would be an extra instruction for min (fnabs), but
2955 // that is still likely better for analysis and codegen.
2956 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2957 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2958 return Op0->hasOneUse() ||
2959 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2960 IID != Intrinsic::minimumnum);
2961 return false;
2962 };
2963
2964 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2965 Value *R = Builder.CreateFAbs(X, II);
2966 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2967 IID == Intrinsic::minimumnum)
2968 R = Builder.CreateFNegFMF(R, II);
2969 return replaceInstUsesWith(*II, R);
2970 }
2971
2972 break;
2973 }
2974 case Intrinsic::matrix_multiply: {
2975 // Optimize negation in matrix multiplication.
2976
2977 // -A * -B -> A * B
2978 Value *A, *B;
2979 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2980 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2981 replaceOperand(*II, 0, A);
2982 replaceOperand(*II, 1, B);
2983 return II;
2984 }
2985
2986 Value *Op0 = II->getOperand(0);
2987 Value *Op1 = II->getOperand(1);
2988 Value *OpNotNeg, *NegatedOp;
2989 unsigned NegatedOpArg, OtherOpArg;
2990 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2991 NegatedOp = Op0;
2992 NegatedOpArg = 0;
2993 OtherOpArg = 1;
2994 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2995 NegatedOp = Op1;
2996 NegatedOpArg = 1;
2997 OtherOpArg = 0;
2998 } else
2999 // Multiplication doesn't have a negated operand.
3000 break;
3001
3002 // Only optimize if the negated operand has only one use.
3003 if (!NegatedOp->hasOneUse())
3004 break;
3005
3006 Value *OtherOp = II->getOperand(OtherOpArg);
3007 VectorType *RetTy = cast<VectorType>(II->getType());
3008 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3009 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3010 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3011 ElementCount OtherCount = OtherOpTy->getElementCount();
3012 ElementCount RetCount = RetTy->getElementCount();
3013 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3014 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3015 ElementCount::isKnownLT(OtherCount, RetCount)) {
3016 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3017 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3018 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3019 return II;
3020 }
3021 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3022 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3023 SmallVector<Value *, 5> NewArgs(II->args());
3024 NewArgs[NegatedOpArg] = OpNotNeg;
3025 Instruction *NewMul =
3026 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3027 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3028 }
3029 break;
3030 }
3031 case Intrinsic::fmuladd: {
3032 // Try to simplify the underlying FMul.
3033 if (Value *V =
3034 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3035 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3036 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3037 II->getFastMathFlags());
3038
3039 [[fallthrough]];
3040 }
3041 case Intrinsic::fma: {
3042 // fma fneg(x), fneg(y), z -> fma x, y, z
3043 Value *Src0 = II->getArgOperand(0);
3044 Value *Src1 = II->getArgOperand(1);
3045 Value *Src2 = II->getArgOperand(2);
3046 Value *X, *Y;
3047 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3048 replaceOperand(*II, 0, X);
3049 replaceOperand(*II, 1, Y);
3050 return II;
3051 }
3052
3053 // fma fabs(x), fabs(x), z -> fma x, x, z
3054 if (match(Src0, m_FAbs(m_Value(X))) &&
3055 match(Src1, m_FAbs(m_Specific(X)))) {
3056 replaceOperand(*II, 0, X);
3057 replaceOperand(*II, 1, X);
3058 return II;
3059 }
3060
3061 // Try to simplify the underlying FMul. We can only apply simplifications
3062 // that do not require rounding.
3063 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3064 SQ.getWithInstruction(II)))
3065 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3066
3067 // fma x, y, 0 -> fmul x, y
3068 // This is always valid for -0.0, but requires nsz for +0.0 as
3069 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3070 if (match(Src2, m_NegZeroFP()) ||
3071 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3072 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3073
3074 // fma x, -1.0, y -> fsub y, x
3075 if (match(Src1, m_SpecificFP(-1.0)))
3076 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3077
3078 break;
3079 }
3080 case Intrinsic::copysign: {
3081 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3082 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3083 Sign, getSimplifyQuery().getWithInstruction(II))) {
3084 if (*KnownSignBit) {
3085 // If we know that the sign argument is negative, reduce to FNABS:
3086 // copysign Mag, -Sign --> fneg (fabs Mag)
3087 Value *Fabs = Builder.CreateFAbs(Mag, II);
3088 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3089 }
3090
3091 // If we know that the sign argument is positive, reduce to FABS:
3092 // copysign Mag, +Sign --> fabs Mag
3093 Value *Fabs = Builder.CreateFAbs(Mag, II);
3094 return replaceInstUsesWith(*II, Fabs);
3095 }
3096
3097 // Propagate sign argument through nested calls:
3098 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3099 Value *X;
3101 Value *CopySign =
3102 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3103 return replaceInstUsesWith(*II, CopySign);
3104 }
3105
3106 // Clear sign-bit of constant magnitude:
3107 // copysign -MagC, X --> copysign MagC, X
3108 // TODO: Support constant folding for fabs
3109 const APFloat *MagC;
3110 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3111 APFloat PosMagC = *MagC;
3112 PosMagC.clearSign();
3113 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3114 }
3115
3116 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3117 // copysign (fabs X), Sign --> copysign X, Sign
3118 // copysign (fneg X), Sign --> copysign X, Sign
3119 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3120 return replaceOperand(*II, 0, X);
3121
3122 Type *SignEltTy = Sign->getType()->getScalarType();
3123
3124 Value *CastSrc;
3125 if (match(Sign,
3127 CastSrc->getType()->isIntOrIntVectorTy() &&
3129 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3131 APInt::getSignMask(Known.getBitWidth()), Known,
3132 SQ))
3133 return II;
3134 }
3135
3136 break;
3137 }
3138 case Intrinsic::fabs: {
3139 Value *Cond, *TVal, *FVal;
3140 Value *Arg = II->getArgOperand(0);
3141 Value *X;
3142 // fabs (-X) --> fabs (X)
3143 if (match(Arg, m_FNeg(m_Value(X)))) {
3144 Value *Fabs = Builder.CreateFAbs(X, II);
3145 return replaceInstUsesWith(CI, Fabs);
3146 }
3147
3148 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3149 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3150 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3151 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3152 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3153 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3154 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3155 SI->setFastMathFlags(II->getFastMathFlags() |
3156 cast<SelectInst>(Arg)->getFastMathFlags());
3157 // Can't copy nsz to select, as even with the nsz flag the fabs result
3158 // always has the sign bit unset.
3159 SI->setHasNoSignedZeros(false);
3160 return SI;
3161 }
3162 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3163 if (match(TVal, m_FNeg(m_Specific(FVal))))
3164 return replaceOperand(*II, 0, FVal);
3165 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3166 if (match(FVal, m_FNeg(m_Specific(TVal))))
3167 return replaceOperand(*II, 0, TVal);
3168 }
3169
3170 Value *Magnitude, *Sign;
3171 if (match(II->getArgOperand(0),
3172 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3173 // fabs (copysign x, y) -> (fabs x)
3174 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3175 return replaceInstUsesWith(*II, AbsSign);
3176 }
3177
3178 [[fallthrough]];
3179 }
3180 case Intrinsic::ceil:
3181 case Intrinsic::floor:
3182 case Intrinsic::round:
3183 case Intrinsic::roundeven:
3184 case Intrinsic::nearbyint:
3185 case Intrinsic::rint:
3186 case Intrinsic::trunc: {
3187 Value *ExtSrc;
3188 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3189 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3190 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3191 return new FPExtInst(NarrowII, II->getType());
3192 }
3193 break;
3194 }
3195 case Intrinsic::cos:
3196 case Intrinsic::amdgcn_cos:
3197 case Intrinsic::cosh: {
3198 Value *X, *Sign;
3199 Value *Src = II->getArgOperand(0);
3200 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3201 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3202 // f(-x) --> f(x)
3203 // f(fabs(x)) --> f(x)
3204 // f(copysign(x, y)) --> f(x)
3205 // for f in {cos, cosh}
3206 return replaceOperand(*II, 0, X);
3207 }
3208 break;
3209 }
3210 case Intrinsic::sin:
3211 case Intrinsic::amdgcn_sin:
3212 case Intrinsic::sinh:
3213 case Intrinsic::tan:
3214 case Intrinsic::tanh: {
3215 Value *X;
3216 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3217 // f(-x) --> -f(x)
3218 // for f in {sin, sinh, tan, tanh}
3219 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3220 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3221 }
3222 break;
3223 }
3224 case Intrinsic::ldexp: {
3225 Value *Src = II->getArgOperand(0);
3226 Value *Exp = II->getArgOperand(1);
3227
3228 // ldexp(x, K) -> fmul x, 2^K
3229 uint64_t ConstExp;
3230 if (match(Exp, m_ConstantInt(ConstExp))) {
3231 const fltSemantics &FPTy =
3232 Src->getType()->getScalarType()->getFltSemantics();
3233
3234 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3236 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3237 // Skip overflow and underflow cases.
3238 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3239 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3240 }
3241 }
3242
3243 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3244 //
3245 // A danger is if the first ldexp would overflow to infinity or underflow to
3246 // zero, but the combined exponent avoids it.
3247 //
3248 // We ignore this with reassoc, or if we know both exponents have the same
3249 // sign (since then we'd just double down on the over/underflow which would
3250 // occur anyway).
3251 //
3252 // ldexp can take arbitrary integer types, so we also need to ensure that
3253 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3254 // then ldexp at the saturated exponent saturates to inf or zero as well.
3255 //
3256 // TODO: Could do better if we had range tracking for the input value
3257 // exponent. Also could broaden sign check to cover == 0 case.
3258 Value *InnerSrc;
3259 Value *InnerExp;
3261 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3262 Exp->getType() == InnerExp->getType()) {
3263 FastMathFlags FMF = II->getFastMathFlags();
3264 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3265
3266 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3267 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3268 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3269 Value *NewExp =
3270 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3271 II->setArgOperand(1, NewExp);
3272 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3273 return replaceOperand(*II, 0, InnerSrc);
3274 }
3275 }
3276
3277 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3278 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3279 Value *ExtSrc;
3280 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3281 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3282 Value *Select =
3283 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3284 ConstantFP::get(II->getType(), 1.0));
3286 }
3287 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3288 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3289 Value *Select =
3290 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3291 ConstantFP::get(II->getType(), 1.0));
3293 }
3294
3295 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3296 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3297 ///
3298 // TODO: If we cared, should insert a canonicalize for x
3299 Value *SelectCond, *SelectLHS, *SelectRHS;
3300 if (match(II->getArgOperand(1),
3301 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3302 m_Value(SelectRHS))))) {
3303 Value *NewLdexp = nullptr;
3304 Value *Select = nullptr;
3305 if (match(SelectRHS, m_ZeroInt())) {
3306 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3307 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3308 } else if (match(SelectLHS, m_ZeroInt())) {
3309 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3310 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3311 }
3312
3313 if (NewLdexp) {
3314 Select->takeName(II);
3315 return replaceInstUsesWith(*II, Select);
3316 }
3317 }
3318
3319 break;
3320 }
3321 case Intrinsic::ptrauth_auth:
3322 case Intrinsic::ptrauth_resign: {
3323 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3324 // sign+auth component if the key and discriminator match.
3325 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3326 Value *Ptr = II->getArgOperand(0);
3327 Value *Key = II->getArgOperand(1);
3328 Value *Disc = II->getArgOperand(2);
3329 Value *DS = nullptr;
3330 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3331 DS = Bundle->Inputs[0];
3332
3333 // AuthKey will be the key we need to end up authenticating against in
3334 // whatever we replace this sequence with.
3335 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3336 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3337 Value *OtherDS = nullptr;
3338 if (auto Bundle =
3340 OtherDS = Bundle->Inputs[0];
3341 if (DS != OtherDS)
3342 break;
3343
3344 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3345 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3346 break;
3347 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3348 // The resign intrinsic does not support deactivation symbols.
3349 assert(!DS);
3350 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3351 break;
3352 AuthKey = CI->getArgOperand(1);
3353 AuthDisc = CI->getArgOperand(2);
3354 } else
3355 break;
3356 BasePtr = CI->getArgOperand(0);
3357 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3358 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3359 // our purposes, so check for that too.
3360 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3361 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3362 break;
3363
3364 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3365 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3366 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3367 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3368 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3369 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3370 SignDisc, /*AddrDisc=*/Null,
3371 /*DeactivationSymbol=*/Null);
3373 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3374 return eraseInstFromFunction(*II);
3375 }
3376
3377 // auth(ptrauth(p,k,d),k,d) -> p
3378 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3379 } else
3380 break;
3381
3382 unsigned NewIntrin;
3383 if (AuthKey && NeedSign) {
3384 // resign(0,1) + resign(1,2) = resign(0, 2)
3385 NewIntrin = Intrinsic::ptrauth_resign;
3386 } else if (AuthKey) {
3387 // resign(0,1) + auth(1) = auth(0)
3388 NewIntrin = Intrinsic::ptrauth_auth;
3389 } else if (NeedSign) {
3390 // sign(0) + resign(0, 1) = sign(1)
3391 NewIntrin = Intrinsic::ptrauth_sign;
3392 } else {
3393 // sign(0) + auth(0) = nop
3394 replaceInstUsesWith(*II, BasePtr);
3395 return eraseInstFromFunction(*II);
3396 }
3397
3398 SmallVector<Value *, 4> CallArgs;
3399 CallArgs.push_back(BasePtr);
3400 if (AuthKey) {
3401 CallArgs.push_back(AuthKey);
3402 CallArgs.push_back(AuthDisc);
3403 }
3404
3405 if (NeedSign) {
3406 CallArgs.push_back(II->getArgOperand(3));
3407 CallArgs.push_back(II->getArgOperand(4));
3408 }
3409
3410 std::vector<OperandBundleDef> Bundles;
3411 if (DS)
3412 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3413
3414 Function *NewFn =
3415 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3416 return CallInst::Create(NewFn, CallArgs, Bundles);
3417 }
3418 case Intrinsic::arm_neon_vtbl1:
3419 case Intrinsic::arm_neon_vtbl2:
3420 case Intrinsic::arm_neon_vtbl3:
3421 case Intrinsic::arm_neon_vtbl4:
3422 case Intrinsic::aarch64_neon_tbl1:
3423 case Intrinsic::aarch64_neon_tbl2:
3424 case Intrinsic::aarch64_neon_tbl3:
3425 case Intrinsic::aarch64_neon_tbl4:
3426 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3427 case Intrinsic::arm_neon_vtbx1:
3428 case Intrinsic::arm_neon_vtbx2:
3429 case Intrinsic::arm_neon_vtbx3:
3430 case Intrinsic::arm_neon_vtbx4:
3431 case Intrinsic::aarch64_neon_tbx1:
3432 case Intrinsic::aarch64_neon_tbx2:
3433 case Intrinsic::aarch64_neon_tbx3:
3434 case Intrinsic::aarch64_neon_tbx4:
3435 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3436
3437 case Intrinsic::arm_neon_vmulls:
3438 case Intrinsic::arm_neon_vmullu:
3439 case Intrinsic::aarch64_neon_smull:
3440 case Intrinsic::aarch64_neon_umull: {
3441 Value *Arg0 = II->getArgOperand(0);
3442 Value *Arg1 = II->getArgOperand(1);
3443
3444 // Handle mul by zero first:
3446 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3447 }
3448
3449 // Check for constant LHS & RHS - in this case we just simplify.
3450 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3451 IID == Intrinsic::aarch64_neon_umull);
3452 VectorType *NewVT = cast<VectorType>(II->getType());
3453 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3454 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3455 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3456 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3457 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3458 }
3459
3460 // Couldn't simplify - canonicalize constant to the RHS.
3461 std::swap(Arg0, Arg1);
3462 }
3463
3464 // Handle mul by one:
3465 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3466 if (ConstantInt *Splat =
3467 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3468 if (Splat->isOne())
3469 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3470 /*isSigned=*/!Zext);
3471
3472 break;
3473 }
3474 case Intrinsic::arm_neon_aesd:
3475 case Intrinsic::arm_neon_aese:
3476 case Intrinsic::aarch64_crypto_aesd:
3477 case Intrinsic::aarch64_crypto_aese:
3478 case Intrinsic::aarch64_sve_aesd:
3479 case Intrinsic::aarch64_sve_aese: {
3480 Value *DataArg = II->getArgOperand(0);
3481 Value *KeyArg = II->getArgOperand(1);
3482
3483 // Accept zero on either operand.
3484 if (!match(KeyArg, m_ZeroInt()))
3485 std::swap(KeyArg, DataArg);
3486
3487 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3488 Value *Data, *Key;
3489 if (match(KeyArg, m_ZeroInt()) &&
3490 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3491 replaceOperand(*II, 0, Data);
3492 replaceOperand(*II, 1, Key);
3493 return II;
3494 }
3495 break;
3496 }
3497 case Intrinsic::arm_neon_vshifts:
3498 case Intrinsic::arm_neon_vshiftu:
3499 case Intrinsic::aarch64_neon_sshl:
3500 case Intrinsic::aarch64_neon_ushl:
3501 return foldNeonShift(II, *this);
3502 case Intrinsic::hexagon_V6_vandvrt:
3503 case Intrinsic::hexagon_V6_vandvrt_128B: {
3504 // Simplify Q -> V -> Q conversion.
3505 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3506 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3507 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3508 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3509 break;
3510 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3511 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3512 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3513 // Check if every byte has common bits in Bytes and Mask.
3514 uint64_t C = Bytes1 & Mask1;
3515 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3516 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3517 }
3518 break;
3519 }
3520 case Intrinsic::stackrestore: {
3521 enum class ClassifyResult {
3522 None,
3523 Alloca,
3524 StackRestore,
3525 CallWithSideEffects,
3526 };
3527 auto Classify = [](const Instruction *I) {
3528 if (isa<AllocaInst>(I))
3529 return ClassifyResult::Alloca;
3530
3531 if (auto *CI = dyn_cast<CallInst>(I)) {
3532 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3533 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3534 return ClassifyResult::StackRestore;
3535
3536 if (II->mayHaveSideEffects())
3537 return ClassifyResult::CallWithSideEffects;
3538 } else {
3539 // Consider all non-intrinsic calls to be side effects
3540 return ClassifyResult::CallWithSideEffects;
3541 }
3542 }
3543
3544 return ClassifyResult::None;
3545 };
3546
3547 // If the stacksave and the stackrestore are in the same BB, and there is
3548 // no intervening call, alloca, or stackrestore of a different stacksave,
3549 // remove the restore. This can happen when variable allocas are DCE'd.
3550 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3551 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3552 SS->getParent() == II->getParent()) {
3553 BasicBlock::iterator BI(SS);
3554 bool CannotRemove = false;
3555 for (++BI; &*BI != II; ++BI) {
3556 switch (Classify(&*BI)) {
3557 case ClassifyResult::None:
3558 // So far so good, look at next instructions.
3559 break;
3560
3561 case ClassifyResult::StackRestore:
3562 // If we found an intervening stackrestore for a different
3563 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3564 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3565 CannotRemove = true;
3566 break;
3567
3568 case ClassifyResult::Alloca:
3569 case ClassifyResult::CallWithSideEffects:
3570 // If we found an alloca, a non-intrinsic call, or an intrinsic
3571 // call with side effects, we can't remove the stackrestore.
3572 CannotRemove = true;
3573 break;
3574 }
3575 if (CannotRemove)
3576 break;
3577 }
3578
3579 if (!CannotRemove)
3580 return eraseInstFromFunction(CI);
3581 }
3582 }
3583
3584 // Scan down this block to see if there is another stack restore in the
3585 // same block without an intervening call/alloca.
3587 Instruction *TI = II->getParent()->getTerminator();
3588 bool CannotRemove = false;
3589 for (++BI; &*BI != TI; ++BI) {
3590 switch (Classify(&*BI)) {
3591 case ClassifyResult::None:
3592 // So far so good, look at next instructions.
3593 break;
3594
3595 case ClassifyResult::StackRestore:
3596 // If there is a stackrestore below this one, remove this one.
3597 return eraseInstFromFunction(CI);
3598
3599 case ClassifyResult::Alloca:
3600 case ClassifyResult::CallWithSideEffects:
3601 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3602 // with side effects (such as llvm.stacksave and llvm.read_register),
3603 // we can't remove the stack restore.
3604 CannotRemove = true;
3605 break;
3606 }
3607 if (CannotRemove)
3608 break;
3609 }
3610
3611 // If the stack restore is in a return, resume, or unwind block and if there
3612 // are no allocas or calls between the restore and the return, nuke the
3613 // restore.
3614 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3615 return eraseInstFromFunction(CI);
3616 break;
3617 }
3618 case Intrinsic::lifetime_end:
3619 // Asan needs to poison memory to detect invalid access which is possible
3620 // even for empty lifetime range.
3621 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3622 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3623 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3624 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3625 break;
3626
3627 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3628 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3629 }))
3630 return nullptr;
3631 break;
3632 case Intrinsic::assume: {
3633 Value *IIOperand = II->getArgOperand(0);
3634
3635 // Canonicalize assume(a && b) -> assume(a); assume(b);
3636 // Note: New assumption intrinsics created here are registered by
3637 // the InstCombineIRInserter object.
3638 Value *A, *B;
3639 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3640 Builder.CreateAssumption(A);
3641 Builder.CreateAssumption(B);
3642 return eraseInstFromFunction(*II);
3643 }
3644 // assume(!(a || b)) -> assume(!a); assume(!b);
3645 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3646 Builder.CreateAssumption(Builder.CreateNot(A));
3647 Builder.CreateAssumption(Builder.CreateNot(B));
3648 return eraseInstFromFunction(*II);
3649 }
3650
3651 for (auto [Idx, OBU] : llvm::enumerate(II->operand_bundles())) {
3652 switch (getBundleAttrFromOBU(OBU)) {
3653 case BundleAttr::None:
3654 llvm_unreachable("Unexpected Attribute");
3655 case BundleAttr::Align: {
3656 // Try to remove redundant alignment assumptions.
3657 auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU);
3658
3659 if (!Alignment || !Offset || *Offset != 0)
3660 break;
3661
3662 // Remove align 1 and non-power-of-two bundles; they don't add any
3663 // useful information.
3664 if (*Alignment == 1 || !isPowerOf2_64(*Alignment))
3666
3667 // Don't try to remove align assumptions for pointers derived from
3668 // arguments. We might lose information if the function gets inline and
3669 // the align argument attribute disappears.
3670 Value *UO = getUnderlyingObject(Ptr);
3671 if (!UO || isa<Argument>(UO))
3672 break;
3673
3674 // Compute known bits for the pointer and drop the assume if the
3675 // known alignment isn't increased by it.
3676 if (computeKnownBits(Ptr, II).countMinTrailingZeros() <
3677 Log2_64(*Alignment))
3678 continue;
3680 }
3681
3682 case BundleAttr::Dereferenceable: {
3683 auto [Ptr, _, Count] = getAssumeDereferenceableInfo(OBU);
3684
3685 if (Count && *Count == 0)
3687 break;
3688 }
3689
3690 case BundleAttr::NonNull: {
3691 auto [Ptr] = llvm::getAssumeNonNullInfo(OBU);
3692
3693 // Drop assume if we can prove nonnull without it
3694 if (isKnownNonZero(Ptr, getSimplifyQuery().getWithInstruction(II)))
3696
3697 // Fold the assume into metadata if it's valid at the load
3698 if (auto *LI = dyn_cast<LoadInst>(Ptr);
3699 LI &&
3700 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3701 MDNode *MD = MDNode::get(II->getContext(), {});
3702 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3703 LI->setMetadata(LLVMContext::MD_noundef, MD);
3705 }
3706
3707 // TODO: apply nonnull return attributes to calls and invokes
3708 break;
3709 }
3710
3711 case BundleAttr::SeparateStorage: {
3712 auto [Ptr1, Ptr2] = getAssumeSeparateStorageInfo(OBU);
3713 // Separate storage assumptions apply to the underlying allocations, not
3714 // any particular pointer within them. When evaluating the hints for AA
3715 // purposes we getUnderlyingObject them; by precomputing the answers
3716 // here we can avoid having to do so repeatedly there.
3717 auto MaybeSimplifyHint = [&](const Use &U) {
3718 Value *Hint = U.get();
3719 // Not having a limit is safe because InstCombine removes unreachable
3720 // code.
3721 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3722 if (Hint != UnderlyingObject)
3723 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3724 };
3725 MaybeSimplifyHint(Ptr1);
3726 MaybeSimplifyHint(Ptr2);
3727 } break;
3728
3729 // TODO: Drop these assumes when they are redundant
3730 case BundleAttr::DereferenceableOrNull:
3731 case BundleAttr::Ignore:
3732 case BundleAttr::NoUndef:
3733 break;
3734
3735 // This cannot be simplified
3736 case BundleAttr::Cold:
3737 break;
3738 }
3739 }
3740
3741 // Convert nonnull assume like:
3742 // %A = icmp ne i32* %PTR, null
3743 // call void @llvm.assume(i1 %A)
3744 // into
3745 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3746 if (match(IIOperand,
3748 A->getType()->isPointerTy()) {
3749 Builder.CreateNonnullAssumption(A);
3750 return eraseInstFromFunction(*II);
3751 }
3752
3753 // Convert alignment assume like:
3754 // %B = ptrtoint i32* %A to i64
3755 // %C = and i64 %B, Constant
3756 // %D = icmp eq i64 %C, 0
3757 // call void @llvm.assume(i1 %D)
3758 // into
3759 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3760 uint64_t AlignMask = 1;
3761 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3762 match(IIOperand,
3764 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3765 m_Zero())))) {
3766 if (isPowerOf2_64(AlignMask + 1)) {
3767 uint64_t Offset = 0;
3769 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3770 /// Note: this doesn't preserve the offset information but merges
3771 /// offset and alignment.
3772 /// TODO: we can generate a GEP instead of merging the alignment with
3773 /// the offset.
3774 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3775 MinAlign(Offset, AlignMask + 1));
3776 return eraseInstFromFunction(*II);
3777 }
3778 }
3779 }
3780
3781 /// Canonicalize Knowledge in operand bundles.
3782 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3783 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3784 auto &BOI = II->bundle_op_info_begin()[Idx];
3787 if (BOI.End - BOI.Begin > 2)
3788 continue; // Prevent reducing knowledge in an align with offset since
3789 // extracting a RetainedKnowledge from them looses offset
3790 // information
3791 RetainedKnowledge CanonRK =
3794 &getDominatorTree());
3795 if (CanonRK == RK)
3796 continue;
3797 if (!CanonRK) {
3798 if (BOI.End - BOI.Begin > 0) {
3799 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3800 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3801 }
3802 continue;
3803 }
3804 assert(RK.AttrKind == CanonRK.AttrKind);
3805 if (BOI.End - BOI.Begin > 0)
3806 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3807 if (BOI.End - BOI.Begin > 1)
3808 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3809 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3810 if (RK.WasOn)
3811 Worklist.pushValue(RK.WasOn);
3812 return II;
3813 }
3814 }
3815
3816 // If there is a dominating assume with the same condition as this one,
3817 // then this one is redundant, and should be removed.
3818 KnownBits Known(1);
3819 computeKnownBits(IIOperand, Known, II);
3821 return eraseInstFromFunction(*II);
3822
3823 // assume(false) is unreachable.
3824 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3826 return eraseInstFromFunction(*II);
3827 }
3828
3829 // Update the cache of affected values for this assumption (we might be
3830 // here because we just simplified the condition).
3831 AC.updateAffectedValues(cast<AssumeInst>(II));
3832 break;
3833 }
3834 case Intrinsic::experimental_guard: {
3835 // Is this guard followed by another guard? We scan forward over a small
3836 // fixed window of instructions to handle common cases with conditions
3837 // computed between guards.
3838 Instruction *NextInst = II->getNextNode();
3839 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3840 // Note: Using context-free form to avoid compile time blow up
3841 if (!isSafeToSpeculativelyExecute(NextInst))
3842 break;
3843 NextInst = NextInst->getNextNode();
3844 }
3845 Value *NextCond = nullptr;
3846 if (match(NextInst,
3848 Value *CurrCond = II->getArgOperand(0);
3849
3850 // Remove a guard that it is immediately preceded by an identical guard.
3851 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3852 if (CurrCond != NextCond) {
3853 Instruction *MoveI = II->getNextNode();
3854 while (MoveI != NextInst) {
3855 auto *Temp = MoveI;
3856 MoveI = MoveI->getNextNode();
3857 Temp->moveBefore(II->getIterator());
3858 }
3859 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3860 }
3861 eraseInstFromFunction(*NextInst);
3862 return II;
3863 }
3864 break;
3865 }
3866 case Intrinsic::vector_insert: {
3867 Value *Vec = II->getArgOperand(0);
3868 Value *SubVec = II->getArgOperand(1);
3869 Value *Idx = II->getArgOperand(2);
3870 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3871 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3872 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3873
3874 // Only canonicalize if the destination vector, Vec, and SubVec are all
3875 // fixed vectors.
3876 if (DstTy && VecTy && SubVecTy) {
3877 unsigned DstNumElts = DstTy->getNumElements();
3878 unsigned VecNumElts = VecTy->getNumElements();
3879 unsigned SubVecNumElts = SubVecTy->getNumElements();
3880 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3881
3882 // An insert that entirely overwrites Vec with SubVec is a nop.
3883 if (VecNumElts == SubVecNumElts)
3884 return replaceInstUsesWith(CI, SubVec);
3885
3886 // Widen SubVec into a vector of the same width as Vec, since
3887 // shufflevector requires the two input vectors to be the same width.
3888 // Elements beyond the bounds of SubVec within the widened vector are
3889 // undefined.
3890 SmallVector<int, 8> WidenMask;
3891 unsigned i;
3892 for (i = 0; i != SubVecNumElts; ++i)
3893 WidenMask.push_back(i);
3894 for (; i != VecNumElts; ++i)
3895 WidenMask.push_back(PoisonMaskElem);
3896
3897 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3898
3900 for (unsigned i = 0; i != IdxN; ++i)
3901 Mask.push_back(i);
3902 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3903 Mask.push_back(i);
3904 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3905 Mask.push_back(i);
3906
3907 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3908 return replaceInstUsesWith(CI, Shuffle);
3909 }
3910 break;
3911 }
3912 case Intrinsic::vector_extract: {
3913 Value *Vec = II->getArgOperand(0);
3914 Value *Idx = II->getArgOperand(1);
3915
3916 Type *ReturnType = II->getType();
3917 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3918 // ExtractIdx)
3919 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3920 Value *InsertTuple, *InsertIdx, *InsertValue;
3922 m_Value(InsertValue),
3923 m_Value(InsertIdx))) &&
3924 InsertValue->getType() == ReturnType) {
3925 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3926 // Case where we get the same index right after setting it.
3927 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3928 // InsertValue
3929 if (ExtractIdx == Index)
3930 return replaceInstUsesWith(CI, InsertValue);
3931 // If we are getting a different index than what was set in the
3932 // insert.vector intrinsic. We can just set the input tuple to the one up
3933 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3934 // InsertIndex), ExtractIndex)
3935 // --> extract.vector(InsertTuple, ExtractIndex)
3936 else
3937 return replaceOperand(CI, 0, InsertTuple);
3938 }
3939
3940 ConstantInt *ALMUpperBound;
3942 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3943 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3944 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3945 unsigned ScaleFactor =
3946 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
3947 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3948 return replaceInstUsesWith(CI,
3949 ConstantVector::getNullValue(ReturnType));
3950 }
3951
3952 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3953 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3954
3955 if (DstTy && VecTy) {
3956 auto DstEltCnt = DstTy->getElementCount();
3957 auto VecEltCnt = VecTy->getElementCount();
3958 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3959
3960 // Extracting the entirety of Vec is a nop.
3961 if (DstEltCnt == VecTy->getElementCount()) {
3962 replaceInstUsesWith(CI, Vec);
3963 return eraseInstFromFunction(CI);
3964 }
3965
3966 // Only canonicalize to shufflevector if the destination vector and
3967 // Vec are fixed vectors.
3968 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3969 break;
3970
3972 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3973 Mask.push_back(IdxN + i);
3974
3975 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3976 return replaceInstUsesWith(CI, Shuffle);
3977 }
3978 break;
3979 }
3980 case Intrinsic::experimental_vp_reverse: {
3981 Value *X;
3982 Value *Vec = II->getArgOperand(0);
3983 Value *Mask = II->getArgOperand(1);
3984 if (!match(Mask, m_AllOnes()))
3985 break;
3986 Value *EVL = II->getArgOperand(2);
3987 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3988 // rev(unop rev(X)) --> unop X
3989 if (match(Vec,
3991 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3992 auto *OldUnOp = cast<UnaryOperator>(Vec);
3994 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3995 II->getIterator());
3996 return replaceInstUsesWith(CI, NewUnOp);
3997 }
3998 break;
3999 }
4000 case Intrinsic::vector_reduce_or:
4001 case Intrinsic::vector_reduce_and: {
4002 // Canonicalize logical or/and reductions:
4003 // Or reduction for i1 is represented as:
4004 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4005 // %res = cmp ne iReduxWidth %val, 0
4006 // And reduction for i1 is represented as:
4007 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4008 // %res = cmp eq iReduxWidth %val, 11111
4009 Value *Arg = II->getArgOperand(0);
4010 Value *Vect;
4011
4012 if (Value *NewOp =
4013 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4014 replaceUse(II->getOperandUse(0), NewOp);
4015 return II;
4016 }
4017
4018 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4019 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4020 if (FTy->getElementType() == Builder.getInt1Ty()) {
4021 Value *Res = Builder.CreateBitCast(
4022 Vect, Builder.getIntNTy(FTy->getNumElements()));
4023 if (IID == Intrinsic::vector_reduce_and) {
4024 Res = Builder.CreateICmpEQ(
4026 } else {
4027 assert(IID == Intrinsic::vector_reduce_or &&
4028 "Expected or reduction.");
4029 Res = Builder.CreateIsNotNull(Res);
4030 }
4031 if (Arg != Vect)
4032 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4033 II->getType());
4034 return replaceInstUsesWith(CI, Res);
4035 }
4036 }
4037 [[fallthrough]];
4038 }
4039 case Intrinsic::vector_reduce_add: {
4040 if (IID == Intrinsic::vector_reduce_add) {
4041 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4042 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4043 // Convert vector_reduce_add(SExt(<n x i1>)) to
4044 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4045 // Convert vector_reduce_add(<n x i1>) to
4046 // Trunc(ctpop(bitcast <n x i1> to in)).
4047 Value *Arg = II->getArgOperand(0);
4048 Value *Vect;
4049
4050 if (Value *NewOp =
4051 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4052 replaceUse(II->getOperandUse(0), NewOp);
4053 return II;
4054 }
4055
4056 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4057 if (Value *Splat = getSplatValue(Arg)) {
4058 ElementCount VecToReduceCount =
4059 cast<VectorType>(Arg->getType())->getElementCount();
4060 if (VecToReduceCount.isFixed()) {
4061 unsigned VectorSize = VecToReduceCount.getFixedValue();
4062 return BinaryOperator::CreateMul(
4063 Splat,
4064 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4065 /*ImplicitTrunc=*/true));
4066 }
4067 }
4068
4069 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4070 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4071 if (FTy->getElementType() == Builder.getInt1Ty()) {
4072 Value *V = Builder.CreateBitCast(
4073 Vect, Builder.getIntNTy(FTy->getNumElements()));
4074 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4075 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4076 if (Arg != Vect &&
4077 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4078 Res = Builder.CreateNeg(Res);
4079 return replaceInstUsesWith(CI, Res);
4080 }
4081 }
4082 }
4083 [[fallthrough]];
4084 }
4085 case Intrinsic::vector_reduce_xor: {
4086 if (IID == Intrinsic::vector_reduce_xor) {
4087 // Exclusive disjunction reduction over the vector with
4088 // (potentially-extended) i1 element type is actually a
4089 // (potentially-extended) arithmetic `add` reduction over the original
4090 // non-extended value:
4091 // vector_reduce_xor(?ext(<n x i1>))
4092 // -->
4093 // ?ext(vector_reduce_add(<n x i1>))
4094 Value *Arg = II->getArgOperand(0);
4095 Value *Vect;
4096
4097 if (Value *NewOp =
4098 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4099 replaceUse(II->getOperandUse(0), NewOp);
4100 return II;
4101 }
4102
4103 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4104 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4105 if (VTy->getElementType() == Builder.getInt1Ty()) {
4106 Value *Res = Builder.CreateAddReduce(Vect);
4107 if (Arg != Vect)
4108 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4109 II->getType());
4110 return replaceInstUsesWith(CI, Res);
4111 }
4112 }
4113 }
4114 [[fallthrough]];
4115 }
4116 case Intrinsic::vector_reduce_mul: {
4117 if (IID == Intrinsic::vector_reduce_mul) {
4118 Value *Arg = II->getArgOperand(0);
4119
4120 if (Value *NewOp =
4121 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4122 replaceUse(II->getOperandUse(0), NewOp);
4123 return II;
4124 }
4125
4126 // vector_reduce_mul(zext(<n x i1>)), or
4127 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4128 // zext(vector_reduce_and(<n x i1>)).
4129 // (The sext case doesn't work if n is odd because multiplying an odd
4130 // number of -1's produces -1, not 1.)
4131 Value *Vect;
4132 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4133 Vect->getType()->isIntOrIntVectorTy(1);
4134 bool IsSext =
4135 match(Arg, m_SExt(m_Value(Vect))) &&
4136 Vect->getType()->isIntOrIntVectorTy(1) &&
4137 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4138 if (IsZext || IsSext) {
4139 Value *Res = Builder.CreateAndReduce(Vect);
4140 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4141 }
4142
4143 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4144 if (Arg->getType()->isIntOrIntVectorTy(1))
4145 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4146 }
4147 [[fallthrough]];
4148 }
4149 case Intrinsic::vector_reduce_umin:
4150 case Intrinsic::vector_reduce_umax: {
4151 if (IID == Intrinsic::vector_reduce_umin ||
4152 IID == Intrinsic::vector_reduce_umax) {
4153 // UMin/UMax reduction over the vector with (potentially-extended)
4154 // i1 element type is actually a (potentially-extended)
4155 // logical `and`/`or` reduction over the original non-extended value:
4156 // vector_reduce_u{min,max}(?ext(<n x i1>))
4157 // -->
4158 // ?ext(vector_reduce_{and,or}(<n x i1>))
4159 Value *Arg = II->getArgOperand(0);
4160 Value *Vect;
4161
4162 if (Value *NewOp =
4163 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4164 replaceUse(II->getOperandUse(0), NewOp);
4165 return II;
4166 }
4167
4168 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4169 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4170 if (VTy->getElementType() == Builder.getInt1Ty()) {
4171 Value *Res = IID == Intrinsic::vector_reduce_umin
4172 ? Builder.CreateAndReduce(Vect)
4173 : Builder.CreateOrReduce(Vect);
4174 if (Arg != Vect)
4175 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4176 II->getType());
4177 return replaceInstUsesWith(CI, Res);
4178 }
4179 }
4180 }
4181 [[fallthrough]];
4182 }
4183 case Intrinsic::vector_reduce_smin:
4184 case Intrinsic::vector_reduce_smax: {
4185 if (IID == Intrinsic::vector_reduce_smin ||
4186 IID == Intrinsic::vector_reduce_smax) {
4187 // SMin/SMax reduction over the vector with (potentially-extended)
4188 // i1 element type is actually a (potentially-extended)
4189 // logical `and`/`or` reduction over the original non-extended value:
4190 // vector_reduce_s{min,max}(<n x i1>)
4191 // -->
4192 // vector_reduce_{or,and}(<n x i1>)
4193 // and
4194 // vector_reduce_s{min,max}(sext(<n x i1>))
4195 // -->
4196 // sext(vector_reduce_{or,and}(<n x i1>))
4197 // and
4198 // vector_reduce_s{min,max}(zext(<n x i1>))
4199 // -->
4200 // zext(vector_reduce_{and,or}(<n x i1>))
4201 Value *Arg = II->getArgOperand(0);
4202 Value *Vect;
4203
4204 if (Value *NewOp =
4205 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4206 replaceUse(II->getOperandUse(0), NewOp);
4207 return II;
4208 }
4209
4210 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4211 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4212 if (VTy->getElementType() == Builder.getInt1Ty()) {
4213 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4214 if (Arg != Vect)
4215 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4216 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4217 (ExtOpc == Instruction::CastOps::ZExt))
4218 ? Builder.CreateAndReduce(Vect)
4219 : Builder.CreateOrReduce(Vect);
4220 if (Arg != Vect)
4221 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4222 return replaceInstUsesWith(CI, Res);
4223 }
4224 }
4225 }
4226 [[fallthrough]];
4227 }
4228 case Intrinsic::vector_reduce_fmax:
4229 case Intrinsic::vector_reduce_fmin:
4230 case Intrinsic::vector_reduce_fadd:
4231 case Intrinsic::vector_reduce_fmul: {
4232 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4233 IID != Intrinsic::vector_reduce_fmul) ||
4234 II->hasAllowReassoc();
4235 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4236 IID == Intrinsic::vector_reduce_fmul)
4237 ? 1
4238 : 0;
4239 Value *Arg = II->getArgOperand(ArgIdx);
4240 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4241 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4242 return nullptr;
4243 }
4244 break;
4245 }
4246 case Intrinsic::is_fpclass: {
4247 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4248 return I;
4249 break;
4250 }
4251 case Intrinsic::threadlocal_address: {
4252 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4253 MaybeAlign Align = II->getRetAlign();
4254 if (MinAlign > Align.valueOrOne()) {
4255 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4256 return II;
4257 }
4258 break;
4259 }
4260 case Intrinsic::fptoui_sat:
4261 case Intrinsic::fptosi_sat:
4262 if (Instruction *I = foldItoFPtoI(*II))
4263 return I;
4264 break;
4265 case Intrinsic::frexp: {
4266 // frexp(frexp(x).fract) -> { frexp(x).fract, 0 }: the fraction operand is
4267 // already normalized, so the first result is idempotent and the second is
4268 // zero.
4269 if (match(II->getArgOperand(0),
4271 Value *Res = Builder.CreateInsertValue(PoisonValue::get(II->getType()),
4272 II->getArgOperand(0), 0);
4273 Res = Builder.CreateInsertValue(
4274 Res, Constant::getNullValue(II->getType()->getStructElementType(1)),
4275 1);
4276 return replaceInstUsesWith(*II, Res);
4277 }
4278 break;
4279 }
4280 case Intrinsic::get_active_lane_mask: {
4281 const APInt *Op0, *Op1;
4282 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4283 match(II->getOperand(1), m_APInt(Op1))) {
4284 Type *OpTy = II->getOperand(0)->getType();
4285 return replaceInstUsesWith(
4286 *II, Builder.CreateIntrinsic(
4287 II->getType(), Intrinsic::get_active_lane_mask,
4288 {Constant::getNullValue(OpTy),
4289 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4290 }
4291 break;
4292 }
4293 case Intrinsic::experimental_get_vector_length: {
4294 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4295 unsigned BitWidth =
4296 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4297 II->getType()->getScalarSizeInBits());
4298 ConstantRange Cnt =
4299 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4300 SQ.getWithInstruction(II))
4302 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4303 ->getValue()
4304 .zextOrTrunc(Cnt.getBitWidth());
4305 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4306 MaxLanes = MaxLanes.multiply(
4307 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4308
4309 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4310 return replaceInstUsesWith(
4311 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4312 return nullptr;
4313 }
4314 default: {
4315 // Handle target specific intrinsics
4316 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4317 if (V)
4318 return *V;
4319 break;
4320 }
4321 }
4322
4323 // Try to fold intrinsic into select/phi operands. This is legal if:
4324 // * The intrinsic is speculatable.
4325 // * The operand is one of the following:
4326 // - a phi.
4327 // - a select with a scalar condition.
4328 // - a select with a vector condition and II is not a cross lane operation.
4330 for (Value *Op : II->args()) {
4331 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4332 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4333 if (IsVectorCond &&
4334 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4335 continue;
4336 // Don't replace a scalar select with a more expensive vector select if
4337 // we can't simplify both arms of the select.
4338 bool SimplifyBothArms =
4339 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4341 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4342 return R;
4343 }
4344 if (auto *Phi = dyn_cast<PHINode>(Op))
4345 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4346 return R;
4347 }
4348 }
4349
4351 return Shuf;
4352
4354 return replaceInstUsesWith(*II, Reverse);
4355
4357 return replaceInstUsesWith(*II, Res);
4358
4359 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4360 // context, so it is handled in visitCallBase and we should trigger it.
4361 return visitCallBase(*II);
4362}
4363
4364// Fence instruction simplification
4366 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4367 // This check is solely here to handle arbitrary target-dependent syncscopes.
4368 // TODO: Can remove if does not matter in practice.
4369 if (NFI && FI.isIdenticalTo(NFI))
4370 return eraseInstFromFunction(FI);
4371
4372 // Returns true if FI1 is identical or stronger fence than FI2.
4373 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4374 auto FI1SyncScope = FI1->getSyncScopeID();
4375 // Consider same scope, where scope is global or single-thread.
4376 if (FI1SyncScope != FI2->getSyncScopeID() ||
4377 (FI1SyncScope != SyncScope::System &&
4378 FI1SyncScope != SyncScope::SingleThread))
4379 return false;
4380
4381 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4382 };
4383 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4384 return eraseInstFromFunction(FI);
4385
4386 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4387 if (isIdenticalOrStrongerFence(PFI, &FI))
4388 return eraseInstFromFunction(FI);
4389 return nullptr;
4390}
4391
4392// InvokeInst simplification
4394 return visitCallBase(II);
4395}
4396
4397// CallBrInst simplification
4399 return visitCallBase(CBI);
4400}
4401
4403 if (!CI->hasFnAttr("modular-format"))
4404 return nullptr;
4405
4407 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4408 // TODO: Make use of the first two arguments
4409 unsigned FirstArgIdx;
4410 [[maybe_unused]] bool Error;
4411 Error = Args[2].getAsInteger(10, FirstArgIdx);
4412 assert(!Error && "invalid first arg index");
4413 if (FirstArgIdx == 0)
4414 return nullptr;
4415 --FirstArgIdx;
4416 StringRef FnName = Args[3];
4417 StringRef ImplName = Args[4];
4419
4420 if (AllAspects.empty())
4421 return nullptr;
4422
4423 SmallVector<StringRef> NeededAspects;
4424 for (StringRef Aspect : AllAspects) {
4425 if (Aspect == "float") {
4426 if (llvm::any_of(
4427 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4428 CI->arg_end()),
4429 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4430 NeededAspects.push_back("float");
4431 } else {
4432 // Unknown aspects are always considered to be needed.
4433 NeededAspects.push_back(Aspect);
4434 }
4435 }
4436
4437 if (NeededAspects.size() == AllAspects.size())
4438 return nullptr;
4439
4440 Module *M = CI->getModule();
4441 LLVMContext &Ctx = M->getContext();
4442 Function *Callee = CI->getCalledFunction();
4443 FunctionCallee ModularFn = M->getOrInsertFunction(
4444 FnName, Callee->getFunctionType(),
4445 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4446 CallInst *New = cast<CallInst>(CI->clone());
4447 New->setCalledFunction(ModularFn);
4448 New->removeFnAttr("modular-format");
4449 B.Insert(New);
4450
4451 const auto ReferenceAspect = [&](StringRef Aspect) {
4452 SmallString<20> Name = ImplName;
4453 Name += '_';
4454 Name += Aspect;
4455 Function *RelocNoneFn =
4456 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4457 B.CreateCall(RelocNoneFn,
4458 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4459 };
4460
4461 llvm::sort(NeededAspects);
4462 for (StringRef Request : NeededAspects)
4463 ReferenceAspect(Request);
4464
4465 return New;
4466}
4467
4468Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4469 if (!CI->getCalledFunction()) return nullptr;
4470
4471 // Skip optimizing notail and musttail calls so
4472 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4473 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4474 if (CI->isMustTailCall() || CI->isNoTailCall())
4475 return nullptr;
4476
4477 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4478 replaceInstUsesWith(*From, With);
4479 };
4480 auto InstCombineErase = [this](Instruction *I) {
4482 };
4483 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4484 InstCombineRAUW, InstCombineErase);
4485 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4486 ++NumSimplified;
4487 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4488 }
4489 if (Value *With = optimizeModularFormat(CI, Builder)) {
4490 ++NumSimplified;
4491 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4492 }
4493
4494 return nullptr;
4495}
4496
4498 // Strip off at most one level of pointer casts, looking for an alloca. This
4499 // is good enough in practice and simpler than handling any number of casts.
4500 Value *Underlying = TrampMem->stripPointerCasts();
4501 if (Underlying != TrampMem &&
4502 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4503 return nullptr;
4504 if (!isa<AllocaInst>(Underlying))
4505 return nullptr;
4506
4507 IntrinsicInst *InitTrampoline = nullptr;
4508 for (User *U : TrampMem->users()) {
4510 if (!II)
4511 return nullptr;
4512 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4513 if (InitTrampoline)
4514 // More than one init_trampoline writes to this value. Give up.
4515 return nullptr;
4516 InitTrampoline = II;
4517 continue;
4518 }
4519 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4520 // Allow any number of calls to adjust.trampoline.
4521 continue;
4522 return nullptr;
4523 }
4524
4525 // No call to init.trampoline found.
4526 if (!InitTrampoline)
4527 return nullptr;
4528
4529 // Check that the alloca is being used in the expected way.
4530 if (InitTrampoline->getOperand(0) != TrampMem)
4531 return nullptr;
4532
4533 return InitTrampoline;
4534}
4535
4537 Value *TrampMem) {
4538 // Visit all the previous instructions in the basic block, and try to find a
4539 // init.trampoline which has a direct path to the adjust.trampoline.
4540 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4541 E = AdjustTramp->getParent()->begin();
4542 I != E;) {
4543 Instruction *Inst = &*--I;
4545 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4546 II->getOperand(0) == TrampMem)
4547 return II;
4548 if (Inst->mayWriteToMemory())
4549 return nullptr;
4550 }
4551 return nullptr;
4552}
4553
4554// Given a call to llvm.adjust.trampoline, find and return the corresponding
4555// call to llvm.init.trampoline if the call to the trampoline can be optimized
4556// to a direct call to a function. Otherwise return NULL.
4558 Callee = Callee->stripPointerCasts();
4559 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4560 if (!AdjustTramp ||
4561 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4562 return nullptr;
4563
4564 Value *TrampMem = AdjustTramp->getOperand(0);
4565
4567 return IT;
4568 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4569 return IT;
4570 return nullptr;
4571}
4572
4573Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4574 const Value *Callee = Call.getCalledOperand();
4575 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4576 if (!IPC || !IPC->isNoopCast(DL))
4577 return nullptr;
4578
4579 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4580 if (!II)
4581 return nullptr;
4582
4583 Intrinsic::ID IIID = II->getIntrinsicID();
4584 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4585 return nullptr;
4586
4587 // Isolate the ptrauth bundle from the others.
4588 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4590 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4591 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4592 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4593 PtrAuthBundleOrNone = Bundle;
4594 else
4595 NewBundles.emplace_back(Bundle);
4596 }
4597
4598 if (!PtrAuthBundleOrNone)
4599 return nullptr;
4600
4601 Value *NewCallee = nullptr;
4602 switch (IIID) {
4603 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4604 // assuming the call bundle and the sign operands match.
4605 case Intrinsic::ptrauth_resign: {
4606 // Resign result key should match bundle.
4607 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4608 return nullptr;
4609 // Resign result discriminator should match bundle.
4610 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4611 return nullptr;
4612
4613 // Resign input (auth) key should also match: we can't change the key on
4614 // the new call we're generating, because we don't know what keys are valid.
4615 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4616 return nullptr;
4617
4618 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4619 NewBundles.emplace_back("ptrauth", NewBundleOps);
4620 NewCallee = II->getOperand(0);
4621 break;
4622 }
4623
4624 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4625 // assuming the call bundle and the sign operands match.
4626 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4627 case Intrinsic::ptrauth_sign: {
4628 // Sign key should match bundle.
4629 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4630 return nullptr;
4631 // Sign discriminator should match bundle.
4632 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4633 return nullptr;
4634 NewCallee = II->getOperand(0);
4635 break;
4636 }
4637 default:
4638 llvm_unreachable("unexpected intrinsic ID");
4639 }
4640
4641 if (!NewCallee)
4642 return nullptr;
4643
4644 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4645 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4646 NewCall->setCalledOperand(NewCallee);
4647 return NewCall;
4648}
4649
4650Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4652 if (!CPA)
4653 return nullptr;
4654
4655 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4656 // If the ptrauth constant isn't based on a function pointer, bail out.
4657 if (!CalleeF)
4658 return nullptr;
4659
4660 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4662 if (!PAB)
4663 return nullptr;
4664
4665 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4666 Value *Discriminator = PAB->Inputs[1];
4667
4668 // If the bundle doesn't match, this is probably going to fail to auth.
4669 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4670 return nullptr;
4671
4672 // If the bundle matches the constant, proceed in making this a direct call.
4674 NewCall->setCalledOperand(CalleeF);
4675 return NewCall;
4676}
4677
4678bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4679 const TargetLibraryInfo *TLI) {
4680 // Note: We only handle cases which can't be driven from generic attributes
4681 // here. So, for example, nonnull and noalias (which are common properties
4682 // of some allocation functions) are expected to be handled via annotation
4683 // of the respective allocator declaration with generic attributes.
4684 bool Changed = false;
4685
4686 if (!Call.getType()->isPointerTy())
4687 return Changed;
4688
4689 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4690 if (Size && *Size != 0) {
4691 // TODO: We really should just emit deref_or_null here and then
4692 // let the generic inference code combine that with nonnull.
4693 if (Call.hasRetAttr(Attribute::NonNull)) {
4694 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4696 Call.getContext(), Size->getLimitedValue()));
4697 } else {
4698 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4700 Call.getContext(), Size->getLimitedValue()));
4701 }
4702 }
4703
4704 // Add alignment attribute if alignment is a power of two constant.
4705 Value *Alignment = getAllocAlignment(&Call, TLI);
4706 if (!Alignment)
4707 return Changed;
4708
4709 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4710 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4711 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4712 if (llvm::isPowerOf2_64(AlignmentVal)) {
4713 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4714 Align NewAlign = Align(AlignmentVal);
4715 if (NewAlign > ExistingAlign) {
4718 Changed = true;
4719 }
4720 }
4721 }
4722 return Changed;
4723}
4724
4725/// Improvements for call, callbr and invoke instructions.
4726Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4727 bool Changed = annotateAnyAllocSite(Call, &TLI);
4728
4729 // Mark any parameters that are known to be non-null with the nonnull
4730 // attribute. This is helpful for inlining calls to functions with null
4731 // checks on their arguments.
4732 SmallVector<unsigned, 4> ArgNos;
4733 unsigned ArgNo = 0;
4734
4735 for (Value *V : Call.args()) {
4736 if (V->getType()->isPointerTy()) {
4737 // Simplify the nonnull operand if the parameter is known to be nonnull.
4738 // Otherwise, try to infer nonnull for it.
4739 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4740 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4741 (HasDereferenceable &&
4743 V->getType()->getPointerAddressSpace()))) {
4744 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4745 replaceOperand(Call, ArgNo, Res);
4746 Changed = true;
4747 }
4748 } else if (isKnownNonZero(V,
4749 getSimplifyQuery().getWithInstruction(&Call))) {
4750 ArgNos.push_back(ArgNo);
4751 }
4752 }
4753 ArgNo++;
4754 }
4755
4756 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4757
4758 if (!ArgNos.empty()) {
4759 AttributeList AS = Call.getAttributes();
4760 LLVMContext &Ctx = Call.getContext();
4761 AS = AS.addParamAttribute(Ctx, ArgNos,
4762 Attribute::get(Ctx, Attribute::NonNull));
4763 Call.setAttributes(AS);
4764 Changed = true;
4765 }
4766
4767 // If the callee is a pointer to a function, attempt to move any casts to the
4768 // arguments of the call/callbr/invoke.
4770 Function *CalleeF = dyn_cast<Function>(Callee);
4771 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4772 transformConstExprCastCall(Call))
4773 return nullptr;
4774
4775 if (CalleeF) {
4776 // Remove the convergent attr on calls when the callee is not convergent.
4777 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4778 !CalleeF->isIntrinsic()) {
4779 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4780 << "\n");
4782 return &Call;
4783 }
4784
4785 // If the call and callee calling conventions don't match, and neither one
4786 // of the calling conventions is compatible with C calling convention
4787 // this call must be unreachable, as the call is undefined.
4788 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4789 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4793 // Only do this for calls to a function with a body. A prototype may
4794 // not actually end up matching the implementation's calling conv for a
4795 // variety of reasons (e.g. it may be written in assembly).
4796 !CalleeF->isDeclaration()) {
4797 Instruction *OldCall = &Call;
4799 // If OldCall does not return void then replaceInstUsesWith poison.
4800 // This allows ValueHandlers and custom metadata to adjust itself.
4801 if (!OldCall->getType()->isVoidTy())
4802 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4803 if (isa<CallInst>(OldCall))
4804 return eraseInstFromFunction(*OldCall);
4805
4806 // We cannot remove an invoke or a callbr, because it would change thexi
4807 // CFG, just change the callee to a null pointer.
4808 cast<CallBase>(OldCall)->setCalledFunction(
4809 CalleeF->getFunctionType(),
4810 Constant::getNullValue(CalleeF->getType()));
4811 return nullptr;
4812 }
4813 }
4814
4815 // Calling a null function pointer is undefined if a null address isn't
4816 // dereferenceable.
4817 if ((isa<ConstantPointerNull>(Callee) &&
4819 isa<UndefValue>(Callee)) {
4820 // If Call does not return void then replaceInstUsesWith poison.
4821 // This allows ValueHandlers and custom metadata to adjust itself.
4822 if (!Call.getType()->isVoidTy())
4824
4825 if (Call.isTerminator()) {
4826 // Can't remove an invoke or callbr because we cannot change the CFG.
4827 return nullptr;
4828 }
4829
4830 // This instruction is not reachable, just remove it.
4833 }
4834
4835 if (IntrinsicInst *II = findInitTrampoline(Callee))
4836 return transformCallThroughTrampoline(Call, *II);
4837
4838 // Combine calls involving pointer authentication intrinsics.
4839 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4840 return NewCall;
4841
4842 // Combine calls to ptrauth constants.
4843 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4844 return NewCall;
4845
4846 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4847 InlineAsm *IA = cast<InlineAsm>(Callee);
4848 if (!IA->canThrow()) {
4849 // Normal inline asm calls cannot throw - mark them
4850 // 'nounwind'.
4852 Changed = true;
4853 }
4854 }
4855
4856 // Try to optimize the call if possible, we require DataLayout for most of
4857 // this. None of these calls are seen as possibly dead so go ahead and
4858 // delete the instruction now.
4859 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4860 Instruction *I = tryOptimizeCall(CI);
4861 // If we changed something return the result, etc. Otherwise let
4862 // the fallthrough check.
4863 if (I) return eraseInstFromFunction(*I);
4864 }
4865
4866 if (!Call.use_empty() && !Call.isMustTailCall())
4867 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4868 Type *CallTy = Call.getType();
4869 Type *RetArgTy = ReturnedArg->getType();
4870 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4871 return replaceInstUsesWith(
4872 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4873 }
4874
4875 // Drop unnecessary callee_type metadata from calls that were converted
4876 // into direct calls.
4877 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4878 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4879 Changed = true;
4880 }
4881
4882 // Drop unnecessary kcfi operand bundles from calls that were converted
4883 // into direct calls.
4885 if (Bundle && !Call.isIndirectCall()) {
4886 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4887 if (CalleeF) {
4888 ConstantInt *FunctionType = nullptr;
4889 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4890
4891 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4892 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4893
4894 if (FunctionType &&
4895 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4896 dbgs() << Call.getModule()->getName()
4897 << ": warning: kcfi: " << Call.getCaller()->getName()
4898 << ": call to " << CalleeF->getName()
4899 << " using a mismatching function pointer type\n";
4900 }
4901 });
4902
4904 }
4905
4906 if (isRemovableAlloc(&Call, &TLI))
4907 return visitAllocSite(Call);
4908
4909 // Handle intrinsics which can be used in both call and invoke context.
4910 switch (Call.getIntrinsicID()) {
4911 case Intrinsic::experimental_gc_statepoint: {
4912 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4913 SmallPtrSet<Value *, 32> LiveGcValues;
4914 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4915 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4916
4917 // Remove the relocation if unused.
4918 if (GCR.use_empty()) {
4920 continue;
4921 }
4922
4923 Value *DerivedPtr = GCR.getDerivedPtr();
4924 Value *BasePtr = GCR.getBasePtr();
4925
4926 // Undef is undef, even after relocation.
4927 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4930 continue;
4931 }
4932
4933 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4934 // The relocation of null will be null for most any collector.
4935 // TODO: provide a hook for this in GCStrategy. There might be some
4936 // weird collector this property does not hold for.
4937 if (isa<ConstantPointerNull>(DerivedPtr)) {
4938 // Use null-pointer of gc_relocate's type to replace it.
4941 continue;
4942 }
4943
4944 // isKnownNonNull -> nonnull attribute
4945 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4946 isKnownNonZero(DerivedPtr,
4947 getSimplifyQuery().getWithInstruction(&Call))) {
4948 GCR.addRetAttr(Attribute::NonNull);
4949 // We discovered new fact, re-check users.
4950 Worklist.pushUsersToWorkList(GCR);
4951 }
4952 }
4953
4954 // If we have two copies of the same pointer in the statepoint argument
4955 // list, canonicalize to one. This may let us common gc.relocates.
4956 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4957 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4958 auto *OpIntTy = GCR.getOperand(2)->getType();
4959 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4960 }
4961
4962 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4963 // Canonicalize on the type from the uses to the defs
4964
4965 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4966 LiveGcValues.insert(BasePtr);
4967 LiveGcValues.insert(DerivedPtr);
4968 }
4969 std::optional<OperandBundleUse> Bundle =
4971 unsigned NumOfGCLives = LiveGcValues.size();
4972 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4973 break;
4974 // We can reduce the size of gc live bundle.
4975 DenseMap<Value *, unsigned> Val2Idx;
4976 std::vector<Value *> NewLiveGc;
4977 for (Value *V : Bundle->Inputs) {
4978 auto [It, Inserted] = Val2Idx.try_emplace(V);
4979 if (!Inserted)
4980 continue;
4981 if (LiveGcValues.count(V)) {
4982 It->second = NewLiveGc.size();
4983 NewLiveGc.push_back(V);
4984 } else
4985 It->second = NumOfGCLives;
4986 }
4987 // Update all gc.relocates
4988 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4989 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4990 Value *BasePtr = GCR.getBasePtr();
4991 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4992 "Missed live gc for base pointer");
4993 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4994 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4995 Value *DerivedPtr = GCR.getDerivedPtr();
4996 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4997 "Missed live gc for derived pointer");
4998 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4999 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
5000 }
5001 // Create new statepoint instruction.
5002 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
5003 return CallBase::Create(&Call, NewBundle);
5004 }
5005 default: { break; }
5006 }
5007
5008 return Changed ? &Call : nullptr;
5009}
5010
5011/// If the callee is a constexpr cast of a function, attempt to move the cast to
5012/// the arguments of the call/invoke.
5013/// CallBrInst is not supported.
5014bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5015 auto *Callee =
5017 if (!Callee)
5018 return false;
5019
5021 "CallBr's don't have a single point after a def to insert at");
5022
5023 // Don't perform the transform for declarations, which may not be fully
5024 // accurate. For example, void @foo() is commonly used as a placeholder for
5025 // unknown prototypes.
5026 if (Callee->isDeclaration())
5027 return false;
5028
5029 // If this is a call to a thunk function, don't remove the cast. Thunks are
5030 // used to transparently forward all incoming parameters and outgoing return
5031 // values, so it's important to leave the cast in place.
5032 if (Callee->hasFnAttribute("thunk"))
5033 return false;
5034
5035 // If this is a call to a naked function, the assembly might be
5036 // using an argument, or otherwise rely on the frame layout,
5037 // the function prototype will mismatch.
5038 if (Callee->hasFnAttribute(Attribute::Naked))
5039 return false;
5040
5041 // If this is a musttail call, the callee's prototype must match the caller's
5042 // prototype with the exception of pointee types. The code below doesn't
5043 // implement that, so we can't do this transform.
5044 // TODO: Do the transform if it only requires adding pointer casts.
5045 if (Call.isMustTailCall())
5046 return false;
5047
5049 const AttributeList &CallerPAL = Call.getAttributes();
5050
5051 // Okay, this is a cast from a function to a different type. Unless doing so
5052 // would cause a type conversion of one of our arguments, change this call to
5053 // be a direct call with arguments casted to the appropriate types.
5054 FunctionType *FT = Callee->getFunctionType();
5055 Type *OldRetTy = Caller->getType();
5056 Type *NewRetTy = FT->getReturnType();
5057
5058 // Check to see if we are changing the return type...
5059 if (OldRetTy != NewRetTy) {
5060
5061 if (NewRetTy->isStructTy())
5062 return false; // TODO: Handle multiple return values.
5063
5064 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5065 if (!Caller->use_empty())
5066 return false; // Cannot transform this return value.
5067 }
5068
5069 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5070 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5071 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5072 NewRetTy, CallerPAL.getRetAttrs())))
5073 return false; // Attribute not compatible with transformed value.
5074 }
5075
5076 // If the callbase is an invoke instruction, and the return value is
5077 // used by a PHI node in a successor, we cannot change the return type of
5078 // the call because there is no place to put the cast instruction (without
5079 // breaking the critical edge). Bail out in this case.
5080 if (!Caller->use_empty()) {
5081 BasicBlock *PhisNotSupportedBlock = nullptr;
5082 if (auto *II = dyn_cast<InvokeInst>(Caller))
5083 PhisNotSupportedBlock = II->getNormalDest();
5084 if (PhisNotSupportedBlock)
5085 for (User *U : Caller->users())
5086 if (PHINode *PN = dyn_cast<PHINode>(U))
5087 if (PN->getParent() == PhisNotSupportedBlock)
5088 return false;
5089 }
5090 }
5091
5092 unsigned NumActualArgs = Call.arg_size();
5093 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5094
5095 // Prevent us turning:
5096 // declare void @takes_i32_inalloca(i32* inalloca)
5097 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5098 //
5099 // into:
5100 // call void @takes_i32_inalloca(i32* null)
5101 //
5102 // Similarly, avoid folding away bitcasts of byval calls.
5103 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5104 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5105 return false;
5106
5107 auto AI = Call.arg_begin();
5108 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5109 Type *ParamTy = FT->getParamType(i);
5110 Type *ActTy = (*AI)->getType();
5111
5112 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5113 return false; // Cannot transform this parameter value.
5114
5115 // Check if there are any incompatible attributes we cannot drop safely.
5116 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5117 .overlaps(AttributeFuncs::typeIncompatible(
5118 ParamTy, CallerPAL.getParamAttrs(i),
5119 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5120 return false; // Attribute not compatible with transformed value.
5121
5122 if (Call.isInAllocaArgument(i) ||
5123 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5124 return false; // Cannot transform to and from inalloca/preallocated.
5125
5126 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5127 return false;
5128
5129 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5130 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5131 return false; // Cannot transform to or from byval.
5132 }
5133
5134 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5135 !CallerPAL.isEmpty()) {
5136 // In this case we have more arguments than the new function type, but we
5137 // won't be dropping them. Check that these extra arguments have attributes
5138 // that are compatible with being a vararg call argument.
5139 unsigned SRetIdx;
5140 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5141 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5142 return false;
5143 }
5144
5145 // Okay, we decided that this is a safe thing to do: go ahead and start
5146 // inserting cast instructions as necessary.
5147 SmallVector<Value *, 8> Args;
5149 Args.reserve(NumActualArgs);
5150 ArgAttrs.reserve(NumActualArgs);
5151
5152 // Get any return attributes.
5153 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5154
5155 // If the return value is not being used, the type may not be compatible
5156 // with the existing attributes. Wipe out any problematic attributes.
5157 RAttrs.remove(
5158 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5159
5160 LLVMContext &Ctx = Call.getContext();
5161 AI = Call.arg_begin();
5162 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5163 Type *ParamTy = FT->getParamType(i);
5164
5165 Value *NewArg = *AI;
5166 if ((*AI)->getType() != ParamTy)
5167 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5168 Args.push_back(NewArg);
5169
5170 // Add any parameter attributes except the ones incompatible with the new
5171 // type. Note that we made sure all incompatible ones are safe to drop.
5172 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5173 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5174 ArgAttrs.push_back(
5175 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5176 }
5177
5178 // If the function takes more arguments than the call was taking, add them
5179 // now.
5180 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5181 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5182 ArgAttrs.push_back(AttributeSet());
5183 }
5184
5185 // If we are removing arguments to the function, emit an obnoxious warning.
5186 if (FT->getNumParams() < NumActualArgs) {
5187 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5188 if (FT->isVarArg()) {
5189 // Add all of the arguments in their promoted form to the arg list.
5190 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5191 Type *PTy = getPromotedType((*AI)->getType());
5192 Value *NewArg = *AI;
5193 if (PTy != (*AI)->getType()) {
5194 // Must promote to pass through va_arg area!
5195 Instruction::CastOps opcode =
5196 CastInst::getCastOpcode(*AI, false, PTy, false);
5197 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5198 }
5199 Args.push_back(NewArg);
5200
5201 // Add any parameter attributes.
5202 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5203 }
5204 }
5205 }
5206
5207 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5208
5209 if (NewRetTy->isVoidTy())
5210 Caller->setName(""); // Void type should not have a name.
5211
5212 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5213 "missing argument attributes");
5214 AttributeList NewCallerPAL = AttributeList::get(
5215 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5216
5218 Call.getOperandBundlesAsDefs(OpBundles);
5219
5220 CallBase *NewCall;
5221 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5222 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5223 II->getUnwindDest(), Args, OpBundles);
5224 } else {
5225 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5226 cast<CallInst>(NewCall)->setTailCallKind(
5227 cast<CallInst>(Caller)->getTailCallKind());
5228 }
5229 NewCall->takeName(Caller);
5231 NewCall->setAttributes(NewCallerPAL);
5232
5233 // Preserve prof metadata if any.
5234 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5235
5236 // Insert a cast of the return type as necessary.
5237 Instruction *NC = NewCall;
5238 Value *NV = NC;
5239 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5240 assert(!NV->getType()->isVoidTy());
5242 NC->setDebugLoc(Caller->getDebugLoc());
5243
5244 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5245 assert(OptInsertPt && "No place to insert cast");
5246 InsertNewInstBefore(NC, *OptInsertPt);
5247 Worklist.pushUsersToWorkList(*Caller);
5248 }
5249
5250 if (!Caller->use_empty())
5251 replaceInstUsesWith(*Caller, NV);
5252 else if (Caller->hasValueHandle()) {
5253 if (OldRetTy == NV->getType())
5255 else
5256 // We cannot call ValueIsRAUWd with a different type, and the
5257 // actual tracked value will disappear.
5259 }
5260
5261 eraseInstFromFunction(*Caller);
5262 return true;
5263}
5264
5265/// Turn a call to a function created by init_trampoline / adjust_trampoline
5266/// intrinsic pair into a direct call to the underlying function.
5268InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5269 IntrinsicInst &Tramp) {
5270 FunctionType *FTy = Call.getFunctionType();
5271 AttributeList Attrs = Call.getAttributes();
5272
5273 // If the call already has the 'nest' attribute somewhere then give up -
5274 // otherwise 'nest' would occur twice after splicing in the chain.
5275 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5276 return nullptr;
5277
5279 FunctionType *NestFTy = NestF->getFunctionType();
5280
5281 AttributeList NestAttrs = NestF->getAttributes();
5282 if (!NestAttrs.isEmpty()) {
5283 unsigned NestArgNo = 0;
5284 Type *NestTy = nullptr;
5285 AttributeSet NestAttr;
5286
5287 // Look for a parameter marked with the 'nest' attribute.
5288 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5289 E = NestFTy->param_end();
5290 I != E; ++NestArgNo, ++I) {
5291 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5292 if (AS.hasAttribute(Attribute::Nest)) {
5293 // Record the parameter type and any other attributes.
5294 NestTy = *I;
5295 NestAttr = AS;
5296 break;
5297 }
5298 }
5299
5300 if (NestTy) {
5301 std::vector<Value*> NewArgs;
5302 std::vector<AttributeSet> NewArgAttrs;
5303 NewArgs.reserve(Call.arg_size() + 1);
5304 NewArgAttrs.reserve(Call.arg_size());
5305
5306 // Insert the nest argument into the call argument list, which may
5307 // mean appending it. Likewise for attributes.
5308
5309 {
5310 unsigned ArgNo = 0;
5311 auto I = Call.arg_begin(), E = Call.arg_end();
5312 do {
5313 if (ArgNo == NestArgNo) {
5314 // Add the chain argument and attributes.
5315 Value *NestVal = Tramp.getArgOperand(2);
5316 if (NestVal->getType() != NestTy)
5317 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5318 NewArgs.push_back(NestVal);
5319 NewArgAttrs.push_back(NestAttr);
5320 }
5321
5322 if (I == E)
5323 break;
5324
5325 // Add the original argument and attributes.
5326 NewArgs.push_back(*I);
5327 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5328
5329 ++ArgNo;
5330 ++I;
5331 } while (true);
5332 }
5333
5334 // The trampoline may have been bitcast to a bogus type (FTy).
5335 // Handle this by synthesizing a new function type, equal to FTy
5336 // with the chain parameter inserted.
5337
5338 std::vector<Type*> NewTypes;
5339 NewTypes.reserve(FTy->getNumParams()+1);
5340
5341 // Insert the chain's type into the list of parameter types, which may
5342 // mean appending it.
5343 {
5344 unsigned ArgNo = 0;
5345 FunctionType::param_iterator I = FTy->param_begin(),
5346 E = FTy->param_end();
5347
5348 do {
5349 if (ArgNo == NestArgNo)
5350 // Add the chain's type.
5351 NewTypes.push_back(NestTy);
5352
5353 if (I == E)
5354 break;
5355
5356 // Add the original type.
5357 NewTypes.push_back(*I);
5358
5359 ++ArgNo;
5360 ++I;
5361 } while (true);
5362 }
5363
5364 // Replace the trampoline call with a direct call. Let the generic
5365 // code sort out any function type mismatches.
5366 FunctionType *NewFTy =
5367 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5368 AttributeList NewPAL =
5369 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5370 Attrs.getRetAttrs(), NewArgAttrs);
5371
5373 Call.getOperandBundlesAsDefs(OpBundles);
5374
5375 Instruction *NewCaller;
5376 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5377 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5378 II->getUnwindDest(), NewArgs, OpBundles);
5379 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5380 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5381 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5382 NewCaller =
5383 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5384 CBI->getIndirectDests(), NewArgs, OpBundles);
5385 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5386 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5387 } else {
5388 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5389 cast<CallInst>(NewCaller)->setTailCallKind(
5390 cast<CallInst>(Call).getTailCallKind());
5391 cast<CallInst>(NewCaller)->setCallingConv(
5392 cast<CallInst>(Call).getCallingConv());
5393 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5394 }
5395 NewCaller->setDebugLoc(Call.getDebugLoc());
5396
5397 return NewCaller;
5398 }
5399 }
5400
5401 // Replace the trampoline call with a direct call. Since there is no 'nest'
5402 // parameter, there is no need to adjust the argument list. Let the generic
5403 // code sort out any function type mismatches.
5404 Call.setCalledFunction(FTy, NestF);
5405 return &Call;
5406}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
#define _
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
bool isNegative() const
Definition APFloat.h:1538
void clearSign()
Definition APFloat.h:1357
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isZero() const
Definition APFloat.h:1534
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1987
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2088
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2130
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2659
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2494
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2257
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1242
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1295
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:798
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:222
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:797
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI AssumeSeparateStorageInfo getAssumeSeparateStorageInfo(OperandBundleUse)
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1740
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1695
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1726
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1676
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI AssumeNonNullInfo getAssumeNonNullInfo(OperandBundleUse)
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
LLVM_ABI AssumeAlignInfo getAssumeAlignInfo(OperandBundleUse)
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:255
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1713
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1753
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI AssumeDereferenceableInfo getAssumeDereferenceableInfo(OperandBundleUse)
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const