LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Statepoint.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/ValueHandle.h"
64#include "llvm/Support/Debug.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <optional>
79#include <utility>
80#include <vector>
81
82#define DEBUG_TYPE "instcombine"
84
85using namespace llvm;
86using namespace PatternMatch;
87
88STATISTIC(NumSimplified, "Number of library calls simplified");
89
91 "instcombine-guard-widening-window",
92 cl::init(3),
93 cl::desc("How wide an instruction window to bypass looking for "
94 "another guard"));
95
96/// Return the specified type promoted as it would be to pass though a va_arg
97/// area.
99 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
100 if (ITy->getBitWidth() < 32)
101 return Type::getInt32Ty(Ty->getContext());
102 }
103 return Ty;
104}
105
106/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107/// TODO: This should probably be integrated with visitAllocSites, but that
108/// requires a deeper change to allow either unread or unwritten objects.
110 auto *Src = MI->getRawSource();
111 while (isa<GetElementPtrInst>(Src)) {
112 if (!Src->hasOneUse())
113 return false;
114 Src = cast<Instruction>(Src)->getOperand(0);
115 }
116 return isa<AllocaInst>(Src) && Src->hasOneUse();
117}
118
120 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
121 MaybeAlign CopyDstAlign = MI->getDestAlign();
122 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
123 MI->setDestAlignment(DstAlign);
124 return MI;
125 }
126
127 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
128 MaybeAlign CopySrcAlign = MI->getSourceAlign();
129 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
130 MI->setSourceAlignment(SrcAlign);
131 return MI;
132 }
133
134 // If we have a store to a location which is known constant, we can conclude
135 // that the store must be storing the constant value (else the memory
136 // wouldn't be constant), and this must be a noop.
137 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
138 // Set the size of the copy to 0, it will be deleted on the next iteration.
139 MI->setLength((uint64_t)0);
140 return MI;
141 }
142
143 // If the source is provably undef, the memcpy/memmove doesn't do anything
144 // (unless the transfer is volatile).
145 if (hasUndefSource(MI) && !MI->isVolatile()) {
146 // Set the size of the copy to 0, it will be deleted on the next iteration.
147 MI->setLength((uint64_t)0);
148 return MI;
149 }
150
151 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152 // load/store.
153 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
154 if (!MemOpLength) return nullptr;
155
156 // Source and destination pointer types are always "i8*" for intrinsic. See
157 // if the size is something we can handle with a single primitive load/store.
158 // A single load+store correctly handles overlapping memory in the memmove
159 // case.
160 uint64_t Size = MemOpLength->getLimitedValue();
161 assert(Size && "0-sized memory transferring should be removed already.");
162
163 if (Size > 8 || (Size&(Size-1)))
164 return nullptr; // If not 1/2/4/8 bytes, exit.
165
166 // If it is an atomic and alignment is less than the size then we will
167 // introduce the unaligned memory access which will be later transformed
168 // into libcall in CodeGen. This is not evident performance gain so disable
169 // it now.
170 if (MI->isAtomic())
171 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
172 return nullptr;
173
174 // Use an integer load+store unless we can find something better.
175 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
176
177 // If the memcpy has metadata describing the members, see if we can get the
178 // TBAA, scope and noalias tags describing our copy.
179 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
180
181 Value *Src = MI->getArgOperand(1);
182 Value *Dest = MI->getArgOperand(0);
183 LoadInst *L = Builder.CreateLoad(IntType, Src);
184 // Alignment from the mem intrinsic will be better, so use it.
185 L->setAlignment(*CopySrcAlign);
186 L->setAAMetadata(AACopyMD);
187 MDNode *LoopMemParallelMD =
188 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
189 if (LoopMemParallelMD)
190 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
191 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
192 if (AccessGroupMD)
193 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
194
195 StoreInst *S = Builder.CreateStore(L, Dest);
196 // Alignment from the mem intrinsic will be better, so use it.
197 S->setAlignment(*CopyDstAlign);
198 S->setAAMetadata(AACopyMD);
199 if (LoopMemParallelMD)
200 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
201 if (AccessGroupMD)
202 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
203 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
204
205 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
206 // non-atomics can be volatile
207 L->setVolatile(MT->isVolatile());
208 S->setVolatile(MT->isVolatile());
209 }
210 if (MI->isAtomic()) {
211 // atomics have to be unordered
212 L->setOrdering(AtomicOrdering::Unordered);
214 }
215
216 // Set the size of the copy to 0, it will be deleted on the next iteration.
217 MI->setLength((uint64_t)0);
218 return MI;
219}
220
222 const Align KnownAlignment =
223 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
224 MaybeAlign MemSetAlign = MI->getDestAlign();
225 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
226 MI->setDestAlignment(KnownAlignment);
227 return MI;
228 }
229
230 // If we have a store to a location which is known constant, we can conclude
231 // that the store must be storing the constant value (else the memory
232 // wouldn't be constant), and this must be a noop.
233 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
234 // Set the size of the copy to 0, it will be deleted on the next iteration.
235 MI->setLength((uint64_t)0);
236 return MI;
237 }
238
239 // Remove memset with an undef value.
240 // FIXME: This is technically incorrect because it might overwrite a poison
241 // value. Change to PoisonValue once #52930 is resolved.
242 if (isa<UndefValue>(MI->getValue())) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI->setLength((uint64_t)0);
245 return MI;
246 }
247
248 // Extract the length and alignment and fill if they are constant.
249 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
250 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
251 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
252 return nullptr;
253 const uint64_t Len = LenC->getLimitedValue();
254 assert(Len && "0-sized memory setting should be removed already.");
255 const Align Alignment = MI->getDestAlign().valueOrOne();
256
257 // If it is an atomic and alignment is less than the size then we will
258 // introduce the unaligned memory access which will be later transformed
259 // into libcall in CodeGen. This is not evident performance gain so disable
260 // it now.
261 if (MI->isAtomic() && Alignment < Len)
262 return nullptr;
263
264 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
265 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
266 Value *Dest = MI->getDest();
267
268 // Extract the fill value and store.
269 Constant *FillVal = ConstantInt::get(
270 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
271 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
272 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
273 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
274 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
275 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
276 }
277
278 S->setAlignment(Alignment);
279 if (MI->isAtomic())
281
282 // Set the size of the copy to 0, it will be deleted on the next iteration.
283 MI->setLength((uint64_t)0);
284 return MI;
285 }
286
287 return nullptr;
288}
289
290// TODO, Obvious Missing Transforms:
291// * Narrow width by halfs excluding zero/undef lanes
292Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293 Value *LoadPtr = II.getArgOperand(0);
294 const Align Alignment = II.getParamAlign(0).valueOrOne();
295
296 // If the mask is all ones or undefs, this is a plain vector load of the 1st
297 // argument.
298 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
299 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
300 "unmaskedload");
301 L->copyMetadata(II);
302 return L;
303 }
304
305 // If we can unconditionally load from this address, replace with a
306 // load/select idiom. TODO: use DT for context sensitive query
307 if (isDereferenceablePointer(LoadPtr, II.getType(),
308 II.getDataLayout(), &II, &AC)) {
309 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
310 "unmaskedload");
311 LI->copyMetadata(II);
312 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
313 }
314
315 return nullptr;
316}
317
318// TODO, Obvious Missing Transforms:
319// * Single constant active lane -> store
320// * Narrow width by halfs excluding zero/undef lanes
321Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322 Value *StorePtr = II.getArgOperand(1);
323 Align Alignment = II.getParamAlign(1).valueOrOne();
324 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
325 if (!ConstMask)
326 return nullptr;
327
328 // If the mask is all zeros, this instruction does nothing.
329 if (maskIsAllZeroOrUndef(ConstMask))
331
332 // If the mask is all ones, this is a plain vector store of the 1st argument.
333 if (maskIsAllOneOrUndef(ConstMask)) {
334 StoreInst *S =
335 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
336 S->copyMetadata(II);
337 return S;
338 }
339
340 if (isa<ScalableVectorType>(ConstMask->getType()))
341 return nullptr;
342
343 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
345 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
346 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
347 PoisonElts))
348 return replaceOperand(II, 0, V);
349
350 return nullptr;
351}
352
353// TODO, Obvious Missing Transforms:
354// * Single constant active lane load -> load
355// * Dereferenceable address & few lanes -> scalarize speculative load/selects
356// * Adjacent vector addresses -> masked.load
357// * Narrow width by halfs excluding zero/undef lanes
358// * Vector incrementing address -> vector masked load
359Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
361 if (!ConstMask)
362 return nullptr;
363
364 // Vector splat address w/known mask -> scalar load
365 // Fold the gather to load the source vector first lane
366 // because it is reloading the same value each time
367 if (ConstMask->isAllOnesValue())
368 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
369 auto *VecTy = cast<VectorType>(II.getType());
370 const Align Alignment = II.getParamAlign(0).valueOrOne();
371 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
372 Alignment, "load.scalar");
373 Value *Shuf =
374 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
376 }
377
378 return nullptr;
379}
380
381// TODO, Obvious Missing Transforms:
382// * Single constant active lane -> store
383// * Adjacent vector addresses -> masked.store
384// * Narrow store width by halfs excluding zero/undef lanes
385// * Vector incrementing address -> vector masked store
386Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
387 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
388 if (!ConstMask)
389 return nullptr;
390
391 // If the mask is all zeros, a scatter does nothing.
392 if (maskIsAllZeroOrUndef(ConstMask))
394
395 // Vector splat address -> scalar store
396 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
397 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
398 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
399 if (maskContainsAllOneOrUndef(ConstMask)) {
400 Align Alignment = II.getParamAlign(1).valueOrOne();
401 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
402 Alignment);
403 S->copyMetadata(II);
404 return S;
405 }
406 }
407 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408 // lastlane), ptr
409 if (ConstMask->isAllOnesValue()) {
410 Align Alignment = II.getParamAlign(1).valueOrOne();
411 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
412 ElementCount VF = WideLoadTy->getElementCount();
413 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
414 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
415 Value *Extract =
416 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
417 StoreInst *S =
418 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
419 S->copyMetadata(II);
420 return S;
421 }
422 }
423 if (isa<ScalableVectorType>(ConstMask->getType()))
424 return nullptr;
425
426 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
428 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
429 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
430 PoisonElts))
431 return replaceOperand(II, 0, V);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 1, V);
435
436 return nullptr;
437}
438
439/// This function transforms launder.invariant.group and strip.invariant.group
440/// like:
441/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442/// launder(strip(%x)) -> launder(%x)
443/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444/// strip(launder(%x)) -> strip(%x)
445/// This is legal because it preserves the most recent information about
446/// the presence or absence of invariant.group.
448 InstCombinerImpl &IC) {
449 auto *Arg = II.getArgOperand(0);
450 auto *StrippedArg = Arg->stripPointerCasts();
451 auto *StrippedInvariantGroupsArg = StrippedArg;
452 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
453 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455 break;
456 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
457 }
458 if (StrippedArg == StrippedInvariantGroupsArg)
459 return nullptr; // No launders/strips to remove.
460
461 Value *Result = nullptr;
462
463 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
465 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
467 else
469 "simplifyInvariantGroupIntrinsic only handles launder and strip");
470 if (Result->getType()->getPointerAddressSpace() !=
471 II.getType()->getPointerAddressSpace())
472 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
473
474 return cast<Instruction>(Result);
475}
476
478 assert((II.getIntrinsicID() == Intrinsic::cttz ||
479 II.getIntrinsicID() == Intrinsic::ctlz) &&
480 "Expected cttz or ctlz intrinsic");
481 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482 Value *Op0 = II.getArgOperand(0);
483 Value *Op1 = II.getArgOperand(1);
484 Value *X;
485 // ctlz(bitreverse(x)) -> cttz(x)
486 // cttz(bitreverse(x)) -> ctlz(x)
487 if (match(Op0, m_BitReverse(m_Value(X)))) {
488 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489 Function *F =
490 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
491 return CallInst::Create(F, {X, II.getArgOperand(1)});
492 }
493
494 if (II.getType()->isIntOrIntVectorTy(1)) {
495 // ctlz/cttz i1 Op0 --> not Op0
496 if (match(Op1, m_Zero()))
497 return BinaryOperator::CreateNot(Op0);
498 // If zero is poison, then the input can be assumed to be "true", so the
499 // instruction simplifies to "false".
500 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
502 }
503
504 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505 if (II.hasOneUse() && match(Op1, m_Zero()) &&
506 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
507 II.dropUBImplyingAttrsAndMetadata();
508 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
509 }
510
511 Constant *C;
512
513 if (IsTZ) {
514 // cttz(-x) -> cttz(x)
515 if (match(Op0, m_Neg(m_Value(X))))
516 return IC.replaceOperand(II, 0, X);
517
518 // cttz(-x & x) -> cttz(x)
519 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(sext(x)) -> cttz(zext(x))
523 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
524 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
525 auto *CttzZext =
526 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
527 return IC.replaceInstUsesWith(II, CttzZext);
528 }
529
530 // Zext doesn't change the number of trailing zeros, so narrow:
531 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
533 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
534 IC.Builder.getTrue());
535 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
536 return IC.replaceInstUsesWith(II, ZextCttz);
537 }
538
539 // cttz(abs(x)) -> cttz(x)
540 // cttz(nabs(x)) -> cttz(x)
541 Value *Y;
543 if (SPF == SPF_ABS || SPF == SPF_NABS)
544 return IC.replaceOperand(II, 0, X);
545
547 return IC.replaceOperand(II, 0, X);
548
549 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
551 match(Op1, m_One())) {
552 Value *ConstCttz =
553 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
554 return BinaryOperator::CreateAdd(ConstCttz, X);
555 }
556
557 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
559 match(Op1, m_One())) {
560 Value *ConstCttz =
561 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
562 return BinaryOperator::CreateSub(ConstCttz, X);
563 }
564
565 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
567 Value *Width =
568 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
569 return BinaryOperator::CreateSub(Width, X);
570 }
571 } else {
572 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
574 match(Op1, m_One())) {
575 Value *ConstCtlz =
576 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
577 return BinaryOperator::CreateAdd(ConstCtlz, X);
578 }
579
580 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
582 match(Op1, m_One())) {
583 Value *ConstCtlz =
584 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
585 return BinaryOperator::CreateSub(ConstCtlz, X);
586 }
587
588 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
589 if (Op0->hasOneUse() &&
590 match(Op0,
592 Type *Ty = II.getType();
593 unsigned BitWidth = Ty->getScalarSizeInBits();
594 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
595 {X, IC.Builder.getFalse()});
596 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
597 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
598 }
599 }
600
601 // cttz(Pow2) -> Log2(Pow2)
602 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
603 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
604 if (IsTZ)
605 return IC.replaceInstUsesWith(II, R);
606 BinaryOperator *BO = BinaryOperator::CreateSub(
607 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
608 R);
609 BO->setHasNoSignedWrap();
611 return BO;
612 }
613
614 KnownBits Known = IC.computeKnownBits(Op0, &II);
615
616 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
617 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
618 : Known.countMaxLeadingZeros();
619 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
620 : Known.countMinLeadingZeros();
621
622 // If all bits above (ctlz) or below (cttz) the first known one are known
623 // zero, this value is constant.
624 // FIXME: This should be in InstSimplify because we're replacing an
625 // instruction with a constant.
626 if (PossibleZeros == DefiniteZeros) {
627 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
628 return IC.replaceInstUsesWith(II, C);
629 }
630
631 // If the input to cttz/ctlz is known to be non-zero,
632 // then change the 'ZeroIsPoison' parameter to 'true'
633 // because we know the zero behavior can't affect the result.
634 if (!Known.One.isZero() ||
636 if (!match(II.getArgOperand(1), m_One()))
637 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
638 }
639
640 // Add range attribute since known bits can't completely reflect what we know.
641 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
642 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
643 !II.getMetadata(LLVMContext::MD_range)) {
644 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
645 APInt(BitWidth, PossibleZeros + 1));
646 II.addRangeRetAttr(Range);
647 return &II;
648 }
649
650 return nullptr;
651}
652
654 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
655 "Expected ctpop intrinsic");
656 Type *Ty = II.getType();
657 unsigned BitWidth = Ty->getScalarSizeInBits();
658 Value *Op0 = II.getArgOperand(0);
659 Value *X, *Y;
660
661 // ctpop(bitreverse(x)) -> ctpop(x)
662 // ctpop(bswap(x)) -> ctpop(x)
663 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
664 return IC.replaceOperand(II, 0, X);
665
666 // ctpop(rot(x)) -> ctpop(x)
667 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
668 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
669 X == Y)
670 return IC.replaceOperand(II, 0, X);
671
672 // ctpop(x | -x) -> bitwidth - cttz(x, false)
673 if (Op0->hasOneUse() &&
674 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
675 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
676 {X, IC.Builder.getFalse()});
677 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
678 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
679 }
680
681 // ctpop(~x & (x - 1)) -> cttz(x, false)
682 if (match(Op0,
684 Function *F =
685 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
686 return CallInst::Create(F, {X, IC.Builder.getFalse()});
687 }
688
689 // Zext doesn't change the number of set bits, so narrow:
690 // ctpop (zext X) --> zext (ctpop X)
691 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
692 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
693 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
694 }
695
696 KnownBits Known(BitWidth);
697 IC.computeKnownBits(Op0, Known, &II);
698
699 // If all bits are zero except for exactly one fixed bit, then the result
700 // must be 0 or 1, and we can get that answer by shifting to LSB:
701 // ctpop (X & 32) --> (X & 32) >> 5
702 // TODO: Investigate removing this as its likely unnecessary given the below
703 // `isKnownToBeAPowerOfTwo` check.
704 if ((~Known.Zero).isPowerOf2())
705 return BinaryOperator::CreateLShr(
706 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
707
708 // More generally we can also handle non-constant power of 2 patterns such as
709 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
710 // ctpop(Pow2OrZero) --> icmp ne X, 0
711 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
712 return CastInst::Create(Instruction::ZExt,
715 Ty);
716
717 // Add range attribute since known bits can't completely reflect what we know.
718 if (BitWidth != 1) {
719 ConstantRange OldRange =
720 II.getRange().value_or(ConstantRange::getFull(BitWidth));
721
722 unsigned Lower = Known.countMinPopulation();
723 unsigned Upper = Known.countMaxPopulation() + 1;
724
725 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
727 Lower = 1;
728
730 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
731
732 if (Range != OldRange) {
733 II.addRangeRetAttr(Range);
734 return &II;
735 }
736 }
737
738 return nullptr;
739}
740
741/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
742/// at most two source operands are actually referenced.
744 bool IsExtension) {
745 // Bail out if the mask is not a constant.
746 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
747 if (!C)
748 return nullptr;
749
750 auto *RetTy = cast<FixedVectorType>(II.getType());
751 unsigned NumIndexes = RetTy->getNumElements();
752
753 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
754 if (!RetTy->getElementType()->isIntegerTy(8) ||
755 (NumIndexes != 8 && NumIndexes != 16))
756 return nullptr;
757
758 // For tbx instructions, the first argument is the "fallback" vector, which
759 // has the same length as the mask and return type.
760 unsigned int StartIndex = (unsigned)IsExtension;
761 auto *SourceTy =
762 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
763 // Note that the element count of each source vector does *not* need to be the
764 // same as the element count of the return type and mask! All source vectors
765 // must have the same element count as each other, though.
766 unsigned NumElementsPerSource = SourceTy->getNumElements();
767
768 // There are no tbl/tbx intrinsics for which the destination size exceeds the
769 // source size. However, our definitions of the intrinsics, at least in
770 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
771 // *could* technically happen.
772 if (NumIndexes > NumElementsPerSource)
773 return nullptr;
774
775 // The tbl/tbx intrinsics take several source operands followed by a mask
776 // operand.
777 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
778
779 // Map input operands to shuffle indices. This also helpfully deduplicates the
780 // input arguments, in case the same value is passed as an argument multiple
781 // times.
782 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
783 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
784 PoisonValue::get(SourceTy)};
785
786 int Indexes[16];
787 for (unsigned I = 0; I < NumIndexes; ++I) {
788 Constant *COp = C->getAggregateElement(I);
789
790 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
791 return nullptr;
792
793 if (isa<UndefValue>(COp)) {
794 Indexes[I] = -1;
795 continue;
796 }
797
798 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
799 // The index of the input argument that this index references (0 = first
800 // source argument, etc).
801 unsigned SourceOperandIndex = Index / NumElementsPerSource;
802 // The index of the element at that source operand.
803 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
804
805 Value *SourceOperand;
806 if (SourceOperandIndex >= NumSourceOperands) {
807 // This index is out of bounds. Map it to index into either the fallback
808 // vector (tbx) or vector of zeroes (tbl).
809 SourceOperandIndex = NumSourceOperands;
810 if (IsExtension) {
811 // For out-of-bounds indices in tbx, choose the `I`th element of the
812 // fallback.
813 SourceOperand = II.getArgOperand(0);
814 SourceOperandElementIndex = I;
815 } else {
816 // Otherwise, choose some element from the dummy vector of zeroes (we'll
817 // always choose the first).
818 SourceOperand = Constant::getNullValue(SourceTy);
819 SourceOperandElementIndex = 0;
820 }
821 } else {
822 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
823 }
824
825 // The source operand may be the fallback vector, which may not have the
826 // same number of elements as the source vector. In that case, we *could*
827 // choose to extend its length with another shufflevector, but it's simpler
828 // to just bail instead.
829 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
830 NumElementsPerSource)
831 return nullptr;
832
833 // We now know the source operand referenced by this index. Make it a
834 // shufflevector operand, if it isn't already.
835 unsigned NumSlots = ValueToShuffleSlot.size();
836 // This shuffle references more than two sources, and hence cannot be
837 // represented as a shufflevector.
838 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
839 return nullptr;
840
841 auto [It, Inserted] =
842 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
843 if (Inserted)
844 ShuffleOperands[It->getSecond()] = SourceOperand;
845
846 unsigned RemappedIndex =
847 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
848 Indexes[I] = RemappedIndex;
849 }
850
852 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
853 return IC.replaceInstUsesWith(II, Shuf);
854}
855
856// Returns true iff the 2 intrinsics have the same operands, limiting the
857// comparison to the first NumOperands.
858static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
859 unsigned NumOperands) {
860 assert(I.arg_size() >= NumOperands && "Not enough operands");
861 assert(E.arg_size() >= NumOperands && "Not enough operands");
862 for (unsigned i = 0; i < NumOperands; i++)
863 if (I.getArgOperand(i) != E.getArgOperand(i))
864 return false;
865 return true;
866}
867
868// Remove trivially empty start/end intrinsic ranges, i.e. a start
869// immediately followed by an end (ignoring debuginfo or other
870// start/end intrinsics in between). As this handles only the most trivial
871// cases, tracking the nesting level is not needed:
872//
873// call @llvm.foo.start(i1 0)
874// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
875// call @llvm.foo.end(i1 0)
876// call @llvm.foo.end(i1 0) ; &I
877static bool
879 std::function<bool(const IntrinsicInst &)> IsStart) {
880 // We start from the end intrinsic and scan backwards, so that InstCombine
881 // has already processed (and potentially removed) all the instructions
882 // before the end intrinsic.
883 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
884 for (; BI != BE; ++BI) {
885 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
886 if (I->isDebugOrPseudoInst() ||
887 I->getIntrinsicID() == EndI.getIntrinsicID())
888 continue;
889 if (IsStart(*I)) {
890 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
892 IC.eraseInstFromFunction(EndI);
893 return true;
894 }
895 // Skip start intrinsics that don't pair with this end intrinsic.
896 continue;
897 }
898 }
899 break;
900 }
901
902 return false;
903}
904
906 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
907 // Bail out on the case where the source va_list of a va_copy is destroyed
908 // immediately by a follow-up va_end.
909 return II.getIntrinsicID() == Intrinsic::vastart ||
910 (II.getIntrinsicID() == Intrinsic::vacopy &&
911 I.getArgOperand(0) != II.getArgOperand(1));
912 });
913 return nullptr;
914}
915
917 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
918 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
919 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
920 Call.setArgOperand(0, Arg1);
921 Call.setArgOperand(1, Arg0);
922 return &Call;
923 }
924 return nullptr;
925}
926
927/// Creates a result tuple for an overflow intrinsic \p II with a given
928/// \p Result and a constant \p Overflow value.
930 Constant *Overflow) {
931 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
932 StructType *ST = cast<StructType>(II->getType());
933 Constant *Struct = ConstantStruct::get(ST, V);
934 return InsertValueInst::Create(Struct, Result, 0);
935}
936
938InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
939 WithOverflowInst *WO = cast<WithOverflowInst>(II);
940 Value *OperationResult = nullptr;
941 Constant *OverflowResult = nullptr;
942 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
943 WO->getRHS(), *WO, OperationResult, OverflowResult))
944 return createOverflowTuple(WO, OperationResult, OverflowResult);
945
946 // See whether we can optimize the overflow check with assumption information.
947 for (User *U : WO->users()) {
948 if (!match(U, m_ExtractValue<1>(m_Value())))
949 continue;
950
951 for (auto &AssumeVH : AC.assumptionsFor(U)) {
952 if (!AssumeVH)
953 continue;
954 CallInst *I = cast<CallInst>(AssumeVH);
955 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
956 continue;
957 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
958 /*AllowEphemerals=*/true))
959 continue;
960 Value *Result =
961 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
962 Result->takeName(WO);
963 if (auto *Inst = dyn_cast<Instruction>(Result)) {
964 if (WO->isSigned())
965 Inst->setHasNoSignedWrap();
966 else
967 Inst->setHasNoUnsignedWrap();
968 }
969 return createOverflowTuple(WO, Result,
970 ConstantInt::getFalse(U->getType()));
971 }
972 }
973
974 return nullptr;
975}
976
977static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
978 Ty = Ty->getScalarType();
979 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
980}
981
982static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
983 Ty = Ty->getScalarType();
984 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
985}
986
987/// \returns the compare predicate type if the test performed by
988/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
989/// floating-point environment assumed for \p F for type \p Ty
991 const Function &F, Type *Ty) {
992 switch (static_cast<unsigned>(Mask)) {
993 case fcZero:
994 if (inputDenormalIsIEEE(F, Ty))
995 return FCmpInst::FCMP_OEQ;
996 break;
997 case fcZero | fcSubnormal:
998 if (inputDenormalIsDAZ(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcPositive | fcNegZero:
1002 if (inputDenormalIsIEEE(F, Ty))
1003 return FCmpInst::FCMP_OGE;
1004 break;
1006 if (inputDenormalIsDAZ(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1010 if (inputDenormalIsIEEE(F, Ty))
1011 return FCmpInst::FCMP_OGT;
1012 break;
1013 case fcNegative | fcPosZero:
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OLE;
1016 break;
1018 if (inputDenormalIsDAZ(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1022 if (inputDenormalIsIEEE(F, Ty))
1023 return FCmpInst::FCMP_OLT;
1024 break;
1025 case fcPosNormal | fcPosInf:
1026 if (inputDenormalIsDAZ(F, Ty))
1027 return FCmpInst::FCMP_OGT;
1028 break;
1029 case fcNegNormal | fcNegInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OLT;
1032 break;
1033 case ~fcZero & ~fcNan:
1034 if (inputDenormalIsIEEE(F, Ty))
1035 return FCmpInst::FCMP_ONE;
1036 break;
1037 case ~(fcZero | fcSubnormal) & ~fcNan:
1038 if (inputDenormalIsDAZ(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 default:
1042 break;
1043 }
1044
1046}
1047
1048Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1049 Value *Src0 = II.getArgOperand(0);
1050 Value *Src1 = II.getArgOperand(1);
1051 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1052 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1053 const bool IsUnordered = (Mask & fcNan) == fcNan;
1054 const bool IsOrdered = (Mask & fcNan) == fcNone;
1055 const FPClassTest OrderedMask = Mask & ~fcNan;
1056 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1057
1058 const bool IsStrict =
1059 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1060
1061 Value *FNegSrc;
1062 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1063 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1064
1065 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1066 return replaceOperand(II, 0, FNegSrc);
1067 }
1068
1069 Value *FAbsSrc;
1070 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1071 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1072 return replaceOperand(II, 0, FAbsSrc);
1073 }
1074
1075 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1076 (IsOrdered || IsUnordered) && !IsStrict) {
1077 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1078 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1079 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1080 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1082 FCmpInst::Predicate Pred =
1083 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1084 if (OrderedInvertedMask == fcInf)
1085 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1086
1087 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
1088 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1089 CmpInf->takeName(&II);
1090 return replaceInstUsesWith(II, CmpInf);
1091 }
1092
1093 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1094 (IsOrdered || IsUnordered) && !IsStrict) {
1095 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1096 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1097 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1098 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1099 Constant *Inf =
1100 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1101 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1102 : Builder.CreateFCmpOEQ(Src0, Inf);
1103
1104 EqInf->takeName(&II);
1105 return replaceInstUsesWith(II, EqInf);
1106 }
1107
1108 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1109 (IsOrdered || IsUnordered) && !IsStrict) {
1110 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1111 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1112 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1113 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1115 OrderedInvertedMask == fcNegInf);
1116 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1117 : Builder.CreateFCmpONE(Src0, Inf);
1118 NeInf->takeName(&II);
1119 return replaceInstUsesWith(II, NeInf);
1120 }
1121
1122 if (Mask == fcNan && !IsStrict) {
1123 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1124 // exceptions.
1125 Value *IsNan =
1126 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1127 IsNan->takeName(&II);
1128 return replaceInstUsesWith(II, IsNan);
1129 }
1130
1131 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1132 // Equivalent of !isnan. Replace with standard fcmp.
1133 Value *FCmp =
1134 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1135 FCmp->takeName(&II);
1136 return replaceInstUsesWith(II, FCmp);
1137 }
1138
1140
1141 // Try to replace with an fcmp with 0
1142 //
1143 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1144 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1145 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1146 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1147 //
1148 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1149 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1150 //
1151 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1152 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1153 //
1154 if (!IsStrict && (IsOrdered || IsUnordered) &&
1155 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1156 Src0->getType())) !=
1159 // Equivalent of == 0.
1160 Value *FCmp = Builder.CreateFCmp(
1161 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1162 Src0, Zero);
1163
1164 FCmp->takeName(&II);
1165 return replaceInstUsesWith(II, FCmp);
1166 }
1167
1168 KnownFPClass Known =
1169 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1170
1171 // Clear test bits we know must be false from the source value.
1172 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1173 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1174 if ((Mask & Known.KnownFPClasses) != Mask) {
1175 II.setArgOperand(
1176 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1177 return &II;
1178 }
1179
1180 // If none of the tests which can return false are possible, fold to true.
1181 // fp_class (nnan x), ~(qnan|snan) -> true
1182 // fp_class (ninf x), ~(ninf|pinf) -> true
1183 if (Mask == Known.KnownFPClasses)
1184 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1185
1186 return nullptr;
1187}
1188
1189static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1190 KnownBits Known = computeKnownBits(Op, SQ);
1191 if (Known.isNonNegative())
1192 return false;
1193 if (Known.isNegative())
1194 return true;
1195
1196 Value *X, *Y;
1197 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1199
1200 return std::nullopt;
1201}
1202
1203static std::optional<bool> getKnownSignOrZero(Value *Op,
1204 const SimplifyQuery &SQ) {
1205 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1206 return Sign;
1207
1208 Value *X, *Y;
1209 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1211
1212 return std::nullopt;
1213}
1214
1215/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1216static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1217 const SimplifyQuery &SQ) {
1218 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1219 if (!Known1)
1220 return false;
1221 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1222 if (!Known0)
1223 return false;
1224 return *Known0 == *Known1;
1225}
1226
1227/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1228/// can trigger other combines.
1230 InstCombiner::BuilderTy &Builder) {
1231 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1232 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1233 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1234 "Expected a min or max intrinsic");
1235
1236 // TODO: Match vectors with undef elements, but undef may not propagate.
1237 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1238 Value *X;
1239 const APInt *C0, *C1;
1240 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1241 !match(Op1, m_APInt(C1)))
1242 return nullptr;
1243
1244 // Check for necessary no-wrap and overflow constraints.
1245 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1246 auto *Add = cast<BinaryOperator>(Op0);
1247 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1248 (!IsSigned && !Add->hasNoUnsignedWrap()))
1249 return nullptr;
1250
1251 // If the constant difference overflows, then instsimplify should reduce the
1252 // min/max to the add or C1.
1253 bool Overflow;
1254 APInt CDiff =
1255 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1256 assert(!Overflow && "Expected simplify of min/max");
1257
1258 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1259 // Note: the "mismatched" no-overflow setting does not propagate.
1260 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1261 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1262 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1263 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1264}
1265/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1266Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1267 Type *Ty = MinMax1.getType();
1268
1269 // We are looking for a tree of:
1270 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1271 // Where the min and max could be reversed
1272 Instruction *MinMax2;
1273 BinaryOperator *AddSub;
1274 const APInt *MinValue, *MaxValue;
1275 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1276 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1277 return nullptr;
1278 } else if (match(&MinMax1,
1279 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1280 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1281 return nullptr;
1282 } else
1283 return nullptr;
1284
1285 // Check that the constants clamp a saturate, and that the new type would be
1286 // sensible to convert to.
1287 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1288 return nullptr;
1289 // In what bitwidth can this be treated as saturating arithmetics?
1290 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1291 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1292 // good first approximation for what should be done there.
1293 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1294 return nullptr;
1295
1296 // Also make sure that the inner min/max and the add/sub have one use.
1297 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1298 return nullptr;
1299
1300 // Create the new type (which can be a vector type)
1301 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1302
1303 Intrinsic::ID IntrinsicID;
1304 if (AddSub->getOpcode() == Instruction::Add)
1305 IntrinsicID = Intrinsic::sadd_sat;
1306 else if (AddSub->getOpcode() == Instruction::Sub)
1307 IntrinsicID = Intrinsic::ssub_sat;
1308 else
1309 return nullptr;
1310
1311 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1312 // is usually achieved via a sext from a smaller type.
1313 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1314 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1315 return nullptr;
1316
1317 // Finally create and return the sat intrinsic, truncated to the new type
1318 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1319 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1320 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1321 return CastInst::Create(Instruction::SExt, Sat, Ty);
1322}
1323
1324
1325/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1326/// can only be one of two possible constant values -- turn that into a select
1327/// of constants.
1329 InstCombiner::BuilderTy &Builder) {
1330 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1331 Value *X;
1332 const APInt *C0, *C1;
1333 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1334 return nullptr;
1335
1337 switch (II->getIntrinsicID()) {
1338 case Intrinsic::smax:
1339 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1340 Pred = ICmpInst::ICMP_SGT;
1341 break;
1342 case Intrinsic::smin:
1343 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1344 Pred = ICmpInst::ICMP_SLT;
1345 break;
1346 case Intrinsic::umax:
1347 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1348 Pred = ICmpInst::ICMP_UGT;
1349 break;
1350 case Intrinsic::umin:
1351 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1352 Pred = ICmpInst::ICMP_ULT;
1353 break;
1354 default:
1355 llvm_unreachable("Expected min/max intrinsic");
1356 }
1357 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1358 return nullptr;
1359
1360 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1361 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1362 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1363 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1364}
1365
1366/// If this min/max has a constant operand and an operand that is a matching
1367/// min/max with a constant operand, constant-fold the 2 constant operands.
1369 IRBuilderBase &Builder,
1370 const SimplifyQuery &SQ) {
1371 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1372 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1373 if (!LHS)
1374 return nullptr;
1375
1376 Constant *C0, *C1;
1377 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1378 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1379 return nullptr;
1380
1381 // max (max X, C0), C1 --> max X, (max C0, C1)
1382 // min (min X, C0), C1 --> min X, (min C0, C1)
1383 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1384 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1385 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1386 if (InnerMinMaxID != MinMaxID &&
1387 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1388 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1389 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1390 return nullptr;
1391
1393 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1394 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1395 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1396 {LHS->getArgOperand(0), NewC});
1397}
1398
1399/// If this min/max has a matching min/max operand with a constant, try to push
1400/// the constant operand into this instruction. This can enable more folds.
1401static Instruction *
1403 InstCombiner::BuilderTy &Builder) {
1404 // Match and capture a min/max operand candidate.
1405 Value *X, *Y;
1406 Constant *C;
1407 Instruction *Inner;
1409 m_Instruction(Inner),
1411 m_Value(Y))))
1412 return nullptr;
1413
1414 // The inner op must match. Check for constants to avoid infinite loops.
1415 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1416 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1417 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1419 return nullptr;
1420
1421 // max (max X, C), Y --> max (max X, Y), C
1423 MinMaxID, II->getType());
1424 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1425 NewInner->takeName(Inner);
1426 return CallInst::Create(MinMax, {NewInner, C});
1427}
1428
1429/// Reduce a sequence of min/max intrinsics with a common operand.
1431 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1432 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1433 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1434 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1435 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1436 RHS->getIntrinsicID() != MinMaxID ||
1437 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1438 return nullptr;
1439
1440 Value *A = LHS->getArgOperand(0);
1441 Value *B = LHS->getArgOperand(1);
1442 Value *C = RHS->getArgOperand(0);
1443 Value *D = RHS->getArgOperand(1);
1444
1445 // Look for a common operand.
1446 Value *MinMaxOp = nullptr;
1447 Value *ThirdOp = nullptr;
1448 if (LHS->hasOneUse()) {
1449 // If the LHS is only used in this chain and the RHS is used outside of it,
1450 // reuse the RHS min/max because that will eliminate the LHS.
1451 if (D == A || C == A) {
1452 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1453 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1454 MinMaxOp = RHS;
1455 ThirdOp = B;
1456 } else if (D == B || C == B) {
1457 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1458 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1459 MinMaxOp = RHS;
1460 ThirdOp = A;
1461 }
1462 } else {
1463 assert(RHS->hasOneUse() && "Expected one-use operand");
1464 // Reuse the LHS. This will eliminate the RHS.
1465 if (D == A || D == B) {
1466 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1467 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1468 MinMaxOp = LHS;
1469 ThirdOp = C;
1470 } else if (C == A || C == B) {
1471 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1472 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1473 MinMaxOp = LHS;
1474 ThirdOp = D;
1475 }
1476 }
1477
1478 if (!MinMaxOp || !ThirdOp)
1479 return nullptr;
1480
1481 Module *Mod = II->getModule();
1482 Function *MinMax =
1483 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1484 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1485}
1486
1487/// If all arguments of the intrinsic are unary shuffles with the same mask,
1488/// try to shuffle after the intrinsic.
1491 if (!II->getType()->isVectorTy() ||
1492 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1493 !II->getCalledFunction()->isSpeculatable())
1494 return nullptr;
1495
1496 Value *X;
1497 Constant *C;
1498 ArrayRef<int> Mask;
1499 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1500 return isa<Constant>(Arg.get()) ||
1501 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1502 Arg.getOperandNo(), nullptr);
1503 });
1504 if (!NonConstArg ||
1505 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1506 return nullptr;
1507
1508 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1509 // instructions.
1510 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1511 return nullptr;
1512
1513 // See if all arguments are shuffled with the same mask.
1515 Type *SrcTy = X->getType();
1516 for (Use &Arg : II->args()) {
1517 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1518 Arg.getOperandNo(), nullptr))
1519 NewArgs.push_back(Arg);
1520 else if (match(&Arg,
1521 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1522 X->getType() == SrcTy)
1523 NewArgs.push_back(X);
1524 else if (match(&Arg, m_ImmConstant(C))) {
1525 // If it's a constant, try find the constant that would be shuffled to C.
1526 if (Constant *ShuffledC =
1527 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1528 NewArgs.push_back(ShuffledC);
1529 else
1530 return nullptr;
1531 } else
1532 return nullptr;
1533 }
1534
1535 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1536 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1537 // Result type might be a different vector width.
1538 // TODO: Check that the result type isn't widened?
1539 VectorType *ResTy =
1540 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1541 Value *NewIntrinsic =
1542 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1543 return new ShuffleVectorInst(NewIntrinsic, Mask);
1544}
1545
1546/// If all arguments of the intrinsic are reverses, try to pull the reverse
1547/// after the intrinsic.
1549 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1550 return nullptr;
1551
1552 // At least 1 operand must be a reverse with 1 use because we are creating 2
1553 // instructions.
1554 if (none_of(II->args(), [](Value *V) {
1555 return match(V, m_OneUse(m_VecReverse(m_Value())));
1556 }))
1557 return nullptr;
1558
1559 Value *X;
1560 Constant *C;
1561 SmallVector<Value *> NewArgs;
1562 for (Use &Arg : II->args()) {
1563 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1564 Arg.getOperandNo(), nullptr))
1565 NewArgs.push_back(Arg);
1566 else if (match(&Arg, m_VecReverse(m_Value(X))))
1567 NewArgs.push_back(X);
1568 else if (isSplatValue(Arg))
1569 NewArgs.push_back(Arg);
1570 else if (match(&Arg, m_ImmConstant(C)))
1571 NewArgs.push_back(Builder.CreateVectorReverse(C));
1572 else
1573 return nullptr;
1574 }
1575
1576 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1577 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1578 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1579 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1580 return Builder.CreateVectorReverse(NewIntrinsic);
1581}
1582
1583/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1584/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1585/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1586template <Intrinsic::ID IntrID>
1588 InstCombiner::BuilderTy &Builder) {
1589 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1590 "This helper only supports BSWAP and BITREVERSE intrinsics");
1591
1592 Value *X, *Y;
1593 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1594 // don't match ConstantExpr that aren't meaningful for this transform.
1597 Value *OldReorderX, *OldReorderY;
1599
1600 // If both X and Y are bswap/bitreverse, the transform reduces the number
1601 // of instructions even if there's multiuse.
1602 // If only one operand is bswap/bitreverse, we need to ensure the operand
1603 // have only one use.
1604 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1605 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1606 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1607 }
1608
1609 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1610 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1611 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1612 }
1613
1614 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1615 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1616 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1617 }
1618 }
1619 return nullptr;
1620}
1621
1622/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1623/// `f(f(x, y), y) == f(x, y)` holds.
1625 switch (IID) {
1626 case Intrinsic::smax:
1627 case Intrinsic::smin:
1628 case Intrinsic::umax:
1629 case Intrinsic::umin:
1630 case Intrinsic::maximum:
1631 case Intrinsic::minimum:
1632 case Intrinsic::maximumnum:
1633 case Intrinsic::minimumnum:
1634 case Intrinsic::maxnum:
1635 case Intrinsic::minnum:
1636 return true;
1637 default:
1638 return false;
1639 }
1640}
1641
1642/// Attempt to simplify value-accumulating recurrences of kind:
1643/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1644/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1645/// And let the idempotent binary intrinsic be hoisted, when the operands are
1646/// known to be loop-invariant.
1648 IntrinsicInst *II) {
1649 PHINode *PN;
1650 Value *Init, *OtherOp;
1651
1652 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1653 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1654 auto IID = II->getIntrinsicID();
1655 if (!isIdempotentBinaryIntrinsic(IID) ||
1657 !IC.getDominatorTree().dominates(OtherOp, PN))
1658 return nullptr;
1659
1660 auto *InvariantBinaryInst =
1661 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1662 if (isa<FPMathOperator>(InvariantBinaryInst))
1663 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1664 return InvariantBinaryInst;
1665}
1666
1667static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1668 if (!CanReorderLanes)
1669 return nullptr;
1670
1671 Value *V;
1672 if (match(Arg, m_VecReverse(m_Value(V))))
1673 return V;
1674
1675 ArrayRef<int> Mask;
1676 if (!isa<FixedVectorType>(Arg->getType()) ||
1677 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1678 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1679 return nullptr;
1680
1681 int Sz = Mask.size();
1682 SmallBitVector UsedIndices(Sz);
1683 for (int Idx : Mask) {
1684 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1685 return nullptr;
1686 UsedIndices.set(Idx);
1687 }
1688
1689 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1690 // other changes.
1691 return UsedIndices.all() ? V : nullptr;
1692}
1693
1694/// Fold an unsigned minimum of trailing or leading zero bits counts:
1695/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1696/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1697/// >> ConstOp))
1698/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1699/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1700template <Intrinsic::ID IntrID>
1701static Value *
1703 const DataLayout &DL,
1704 InstCombiner::BuilderTy &Builder) {
1705 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1706 "This helper only supports cttz and ctlz intrinsics");
1707
1708 Value *CtOp1, *CtOp2;
1709 Value *ZeroUndef1, *ZeroUndef2;
1710 if (!match(I0, m_OneUse(
1711 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1712 return nullptr;
1713
1714 if (match(I1,
1715 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1716 return Builder.CreateBinaryIntrinsic(
1717 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1718 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1719
1720 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1721 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1722 if (!match(I1, m_CheckedInt(LessBitWidth)))
1723 // We have a constant >= BitWidth (which can be handled by CVP)
1724 // or a non-splat vector with elements < and >= BitWidth
1725 return nullptr;
1726
1727 Type *Ty = I1->getType();
1729 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1730 IntrID == Intrinsic::cttz
1731 ? ConstantInt::get(Ty, 1)
1732 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1733 cast<Constant>(I1), DL);
1734 return Builder.CreateBinaryIntrinsic(
1735 IntrID, Builder.CreateOr(CtOp1, NewConst),
1736 ConstantInt::getTrue(ZeroUndef1->getType()));
1737}
1738
1739/// Return whether "X LOp (Y ROp Z)" is always equal to
1740/// "(X LOp Y) ROp (X LOp Z)".
1742 bool HasNSW, Intrinsic::ID ROp) {
1743 switch (ROp) {
1744 case Intrinsic::umax:
1745 case Intrinsic::umin:
1746 if (HasNUW && LOp == Instruction::Add)
1747 return true;
1748 if (HasNUW && LOp == Instruction::Shl)
1749 return true;
1750 return false;
1751 case Intrinsic::smax:
1752 case Intrinsic::smin:
1753 return HasNSW && LOp == Instruction::Add;
1754 default:
1755 return false;
1756 }
1757}
1758
1759/// Return whether "(X ROp Y) LOp Z" is always equal to
1760/// "(X LOp Z) ROp (Y LOp Z)".
1762 bool HasNSW, Intrinsic::ID ROp) {
1763 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1764 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1765 switch (ROp) {
1766 case Intrinsic::umax:
1767 case Intrinsic::umin:
1768 return HasNUW && LOp == Instruction::Sub;
1769 case Intrinsic::smax:
1770 case Intrinsic::smin:
1771 return HasNSW && LOp == Instruction::Sub;
1772 default:
1773 return false;
1774 }
1775}
1776
1777// Attempts to factorise a common term
1778// in an instruction that has the form "(A op' B) op (C op' D)
1779// where op is an intrinsic and op' is a binop
1780static Value *
1782 InstCombiner::BuilderTy &Builder) {
1783 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1784 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1785
1788
1789 if (!Op0 || !Op1)
1790 return nullptr;
1791
1792 if (Op0->getOpcode() != Op1->getOpcode())
1793 return nullptr;
1794
1795 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1796 return nullptr;
1797
1798 Instruction::BinaryOps InnerOpcode =
1799 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1800 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1801 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1802
1803 Value *A = Op0->getOperand(0);
1804 Value *B = Op0->getOperand(1);
1805 Value *C = Op1->getOperand(0);
1806 Value *D = Op1->getOperand(1);
1807
1808 // Attempts to swap variables such that A equals C or B equals D,
1809 // if the inner operation is commutative.
1810 if (Op0->isCommutative() && A != C && B != D) {
1811 if (A == D || B == C)
1812 std::swap(C, D);
1813 else
1814 return nullptr;
1815 }
1816
1817 BinaryOperator *NewBinop;
1818 if (A == C &&
1819 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1820 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1821 NewBinop =
1822 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1823 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1824 TopLevelOpcode)) {
1825 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1826 NewBinop =
1827 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1828 } else {
1829 return nullptr;
1830 }
1831
1832 NewBinop->setHasNoUnsignedWrap(HasNUW);
1833 NewBinop->setHasNoSignedWrap(HasNSW);
1834
1835 return NewBinop;
1836}
1837
1839 Value *Arg0 = II->getArgOperand(0);
1840 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1841 if (!ShiftConst)
1842 return nullptr;
1843
1844 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1845 bool AllPositive = true;
1846 bool AllNegative = true;
1847
1848 auto Check = [&](Constant *C) -> bool {
1849 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1850 const APInt &V = CI->getValue();
1851 if (V.isNonNegative()) {
1852 AllNegative = false;
1853 return AllPositive && V.ult(ElemBits);
1854 }
1855 AllPositive = false;
1856 return AllNegative && V.sgt(-ElemBits);
1857 }
1858 return false;
1859 };
1860
1861 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1862 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1863 if (!Check(ShiftConst->getAggregateElement(I)))
1864 return nullptr;
1865 }
1866
1867 } else if (!Check(ShiftConst))
1868 return nullptr;
1869
1870 IRBuilderBase &B = IC.Builder;
1871 if (AllPositive)
1872 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1873
1874 Value *NegAmt = B.CreateNeg(ShiftConst);
1875 Intrinsic::ID IID = II->getIntrinsicID();
1876 const bool IsSigned =
1877 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1878 Value *Result =
1879 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1880 return IC.replaceInstUsesWith(*II, Result);
1881}
1882
1883/// CallInst simplification. This mostly only handles folding of intrinsic
1884/// instructions. For normal calls, it allows visitCallBase to do the heavy
1885/// lifting.
1887 // Don't try to simplify calls without uses. It will not do anything useful,
1888 // but will result in the following folds being skipped.
1889 if (!CI.use_empty()) {
1890 SmallVector<Value *, 8> Args(CI.args());
1891 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1892 SQ.getWithInstruction(&CI)))
1893 return replaceInstUsesWith(CI, V);
1894 }
1895
1896 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1897 return visitFree(CI, FreedOp);
1898
1899 // If the caller function (i.e. us, the function that contains this CallInst)
1900 // is nounwind, mark the call as nounwind, even if the callee isn't.
1901 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1902 CI.setDoesNotThrow();
1903 return &CI;
1904 }
1905
1907 if (!II)
1908 return visitCallBase(CI);
1909
1910 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1911 // instead of in visitCallBase.
1912 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1913 if (auto NumBytes = MI->getLengthInBytes()) {
1914 // memmove/cpy/set of zero bytes is a noop.
1915 if (NumBytes->isZero())
1916 return eraseInstFromFunction(CI);
1917
1918 // For atomic unordered mem intrinsics if len is not a positive or
1919 // not a multiple of element size then behavior is undefined.
1920 if (MI->isAtomic() &&
1921 (NumBytes->isNegative() ||
1922 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1924 assert(MI->getType()->isVoidTy() &&
1925 "non void atomic unordered mem intrinsic");
1926 return eraseInstFromFunction(*MI);
1927 }
1928 }
1929
1930 // No other transformations apply to volatile transfers.
1931 if (MI->isVolatile())
1932 return nullptr;
1933
1935 // memmove(x,x,size) -> noop.
1936 if (MTI->getSource() == MTI->getDest())
1937 return eraseInstFromFunction(CI);
1938 }
1939
1940 auto IsPointerUndefined = [MI](Value *Ptr) {
1941 return isa<ConstantPointerNull>(Ptr) &&
1943 MI->getFunction(),
1944 cast<PointerType>(Ptr->getType())->getAddressSpace());
1945 };
1946 bool SrcIsUndefined = false;
1947 // If we can determine a pointer alignment that is bigger than currently
1948 // set, update the alignment.
1949 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1951 return I;
1952 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1953 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1954 if (Instruction *I = SimplifyAnyMemSet(MSI))
1955 return I;
1956 }
1957
1958 // If src/dest is null, this memory intrinsic must be a noop.
1959 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1960 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1961 return eraseInstFromFunction(CI);
1962 }
1963
1964 // If we have a memmove and the source operation is a constant global,
1965 // then the source and dest pointers can't alias, so we can change this
1966 // into a call to memcpy.
1967 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1968 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1969 if (GVSrc->isConstant()) {
1970 Module *M = CI.getModule();
1971 Intrinsic::ID MemCpyID =
1972 MMI->isAtomic()
1973 ? Intrinsic::memcpy_element_unordered_atomic
1974 : Intrinsic::memcpy;
1975 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1976 CI.getArgOperand(1)->getType(),
1977 CI.getArgOperand(2)->getType() };
1979 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1980 return II;
1981 }
1982 }
1983 }
1984
1985 // For fixed width vector result intrinsics, use the generic demanded vector
1986 // support.
1987 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1988 auto VWidth = IIFVTy->getNumElements();
1989 APInt PoisonElts(VWidth, 0);
1990 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1991 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1992 if (V != II)
1993 return replaceInstUsesWith(*II, V);
1994 return II;
1995 }
1996 }
1997
1998 if (II->isCommutative()) {
1999 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2000 replaceOperand(*II, 0, Pair->first);
2001 replaceOperand(*II, 1, Pair->second);
2002 return II;
2003 }
2004
2005 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2006 return NewCall;
2007 }
2008
2009 // Unused constrained FP intrinsic calls may have declared side effect, which
2010 // prevents it from being removed. In some cases however the side effect is
2011 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2012 // returns a replacement, the call may be removed.
2013 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2014 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2015 return eraseInstFromFunction(CI);
2016 }
2017
2018 Intrinsic::ID IID = II->getIntrinsicID();
2019 switch (IID) {
2020 case Intrinsic::objectsize: {
2021 SmallVector<Instruction *> InsertedInstructions;
2022 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2023 &InsertedInstructions)) {
2024 for (Instruction *Inserted : InsertedInstructions)
2025 Worklist.add(Inserted);
2026 return replaceInstUsesWith(CI, V);
2027 }
2028 return nullptr;
2029 }
2030 case Intrinsic::abs: {
2031 Value *IIOperand = II->getArgOperand(0);
2032 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2033
2034 // abs(-x) -> abs(x)
2035 Value *X;
2036 if (match(IIOperand, m_Neg(m_Value(X)))) {
2037 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2038 replaceOperand(*II, 1, Builder.getTrue());
2039 return replaceOperand(*II, 0, X);
2040 }
2041 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2042 return replaceOperand(*II, 0, X);
2043
2044 Value *Y;
2045 // abs(a * abs(b)) -> abs(a * b)
2046 if (match(IIOperand,
2049 bool NSW =
2050 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2051 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2052 return replaceOperand(*II, 0, XY);
2053 }
2054
2055 if (std::optional<bool> Known =
2056 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2057 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2058 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2059 if (!*Known)
2060 return replaceInstUsesWith(*II, IIOperand);
2061
2062 // abs(x) -> -x if x < 0
2063 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2064 if (IntMinIsPoison)
2065 return BinaryOperator::CreateNSWNeg(IIOperand);
2066 return BinaryOperator::CreateNeg(IIOperand);
2067 }
2068
2069 // abs (sext X) --> zext (abs X*)
2070 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2071 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2072 Value *NarrowAbs =
2073 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2074 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2075 }
2076
2077 // Match a complicated way to check if a number is odd/even:
2078 // abs (srem X, 2) --> and X, 1
2079 const APInt *C;
2080 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2081 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2082
2083 break;
2084 }
2085 case Intrinsic::umin: {
2086 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2087 // umin(x, 1) == zext(x != 0)
2088 if (match(I1, m_One())) {
2089 assert(II->getType()->getScalarSizeInBits() != 1 &&
2090 "Expected simplify of umin with max constant");
2091 Value *Zero = Constant::getNullValue(I0->getType());
2092 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2093 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2094 }
2095 // umin(cttz(x), const) --> cttz(x | (1 << const))
2096 if (Value *FoldedCttz =
2098 I0, I1, DL, Builder))
2099 return replaceInstUsesWith(*II, FoldedCttz);
2100 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2101 if (Value *FoldedCtlz =
2103 I0, I1, DL, Builder))
2104 return replaceInstUsesWith(*II, FoldedCtlz);
2105 [[fallthrough]];
2106 }
2107 case Intrinsic::umax: {
2108 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2109 Value *X, *Y;
2110 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2111 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2112 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2113 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2114 }
2115 Constant *C;
2116 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2117 I0->hasOneUse()) {
2118 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2119 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2120 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2121 }
2122 }
2123 // If C is not 0:
2124 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2125 // If C is not 0 or 1:
2126 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2127 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2128 const APInt *C;
2129 Value *X;
2130 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2131 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2132 return nullptr;
2133 if (C->isZero())
2134 return nullptr;
2135 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2136 return nullptr;
2137
2138 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2139 Value *NewSelect = nullptr;
2140 NewSelect = Builder.CreateSelectWithUnknownProfile(
2141 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2142 return replaceInstUsesWith(*II, NewSelect);
2143 };
2144
2145 if (IID == Intrinsic::umax) {
2146 if (Instruction *I = foldMaxMulShift(I0, I1))
2147 return I;
2148 if (Instruction *I = foldMaxMulShift(I1, I0))
2149 return I;
2150 }
2151
2152 // If both operands of unsigned min/max are sign-extended, it is still ok
2153 // to narrow the operation.
2154 [[fallthrough]];
2155 }
2156 case Intrinsic::smax:
2157 case Intrinsic::smin: {
2158 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2159 Value *X, *Y;
2160 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2161 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2162 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2163 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2164 }
2165
2166 Constant *C;
2167 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2168 I0->hasOneUse()) {
2169 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2170 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2171 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2172 }
2173 }
2174
2175 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2176 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2177 const APInt *MinC, *MaxC;
2178 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2179 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2180 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2181 Value *NewMax = Builder.CreateBinaryIntrinsic(
2182 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2183 return replaceInstUsesWith(
2184 *II, Builder.CreateBinaryIntrinsic(
2185 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2186 };
2187 if (IID == Intrinsic::smax &&
2189 m_APInt(MinC)))) &&
2190 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2191 return CreateCanonicalClampForm(true);
2192 if (IID == Intrinsic::umax &&
2194 m_APInt(MinC)))) &&
2195 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2196 return CreateCanonicalClampForm(false);
2197
2198 // umin(i1 X, i1 Y) -> and i1 X, Y
2199 // smax(i1 X, i1 Y) -> and i1 X, Y
2200 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2201 II->getType()->isIntOrIntVectorTy(1)) {
2202 return BinaryOperator::CreateAnd(I0, I1);
2203 }
2204
2205 // umax(i1 X, i1 Y) -> or i1 X, Y
2206 // smin(i1 X, i1 Y) -> or i1 X, Y
2207 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2208 II->getType()->isIntOrIntVectorTy(1)) {
2209 return BinaryOperator::CreateOr(I0, I1);
2210 }
2211
2212 // smin(smax(X, -1), 1) -> scmp(X, 0)
2213 // smax(smin(X, 1), -1) -> scmp(X, 0)
2214 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2215 // And i1's have been changed to and/ors
2216 // So we only need to check for smin
2217 if (IID == Intrinsic::smin) {
2218 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2219 match(I1, m_One())) {
2220 Value *Zero = ConstantInt::get(X->getType(), 0);
2221 return replaceInstUsesWith(
2222 CI,
2223 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2224 }
2225 }
2226
2227 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2228 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2229 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2230 // TODO: Canonicalize neg after min/max if I1 is constant.
2231 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2232 (I0->hasOneUse() || I1->hasOneUse())) {
2234 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2235 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2236 }
2237 }
2238
2239 // (umax X, (xor X, Pow2))
2240 // -> (or X, Pow2)
2241 // (umin X, (xor X, Pow2))
2242 // -> (and X, ~Pow2)
2243 // (smax X, (xor X, Pos_Pow2))
2244 // -> (or X, Pos_Pow2)
2245 // (smin X, (xor X, Pos_Pow2))
2246 // -> (and X, ~Pos_Pow2)
2247 // (smax X, (xor X, Neg_Pow2))
2248 // -> (and X, ~Neg_Pow2)
2249 // (smin X, (xor X, Neg_Pow2))
2250 // -> (or X, Neg_Pow2)
2251 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2252 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2253 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2254 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2255 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2256
2257 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2258 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2259 if (KnownSign == std::nullopt) {
2260 UseOr = false;
2261 UseAndN = false;
2262 } else if (*KnownSign /* true is Signed. */) {
2263 UseOr ^= true;
2264 UseAndN ^= true;
2265 Type *Ty = I0->getType();
2266 // Negative power of 2 must be IntMin. It's possible to be able to
2267 // prove negative / power of 2 without actually having known bits, so
2268 // just get the value by hand.
2270 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2271 }
2272 }
2273 if (UseOr)
2274 return BinaryOperator::CreateOr(I0, X);
2275 else if (UseAndN)
2276 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2277 }
2278
2279 // If we can eliminate ~A and Y is free to invert:
2280 // max ~A, Y --> ~(min A, ~Y)
2281 //
2282 // Examples:
2283 // max ~A, ~Y --> ~(min A, Y)
2284 // max ~A, C --> ~(min A, ~C)
2285 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2286 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2287 Value *A;
2288 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2289 !isFreeToInvert(A, A->hasOneUse())) {
2290 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2292 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2293 return BinaryOperator::CreateNot(InvMaxMin);
2294 }
2295 }
2296 return nullptr;
2297 };
2298
2299 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2300 return I;
2301 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2302 return I;
2303
2305 return I;
2306
2307 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2308 const APInt *RHSC;
2309 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2310 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2311 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2312 ConstantInt::get(II->getType(), *RHSC));
2313
2314 // smax(X, -X) --> abs(X)
2315 // smin(X, -X) --> -abs(X)
2316 // umax(X, -X) --> -abs(X)
2317 // umin(X, -X) --> abs(X)
2318 if (isKnownNegation(I0, I1)) {
2319 // We can choose either operand as the input to abs(), but if we can
2320 // eliminate the only use of a value, that's better for subsequent
2321 // transforms/analysis.
2322 if (I0->hasOneUse() && !I1->hasOneUse())
2323 std::swap(I0, I1);
2324
2325 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2326 // operation and potentially its negation.
2327 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2328 Value *Abs = Builder.CreateBinaryIntrinsic(
2329 Intrinsic::abs, I0,
2330 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2331
2332 // We don't have a "nabs" intrinsic, so negate if needed based on the
2333 // max/min operation.
2334 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2335 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2336 return replaceInstUsesWith(CI, Abs);
2337 }
2338
2340 return Sel;
2341
2342 if (Instruction *SAdd = matchSAddSubSat(*II))
2343 return SAdd;
2344
2345 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2346 return replaceInstUsesWith(*II, NewMinMax);
2347
2349 return R;
2350
2351 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2352 return NewMinMax;
2353
2354 // Try to fold minmax with constant RHS based on range information
2355 if (match(I1, m_APIntAllowPoison(RHSC))) {
2356 ICmpInst::Predicate Pred =
2358 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2360 I0, IsSigned, SQ.getWithInstruction(II));
2361 if (!LHS_CR.isFullSet()) {
2362 if (LHS_CR.icmp(Pred, *RHSC))
2363 return replaceInstUsesWith(*II, I0);
2364 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2365 return replaceInstUsesWith(*II,
2366 ConstantInt::get(II->getType(), *RHSC));
2367 }
2368 }
2369
2371 return replaceInstUsesWith(*II, V);
2372
2373 break;
2374 }
2375 case Intrinsic::scmp: {
2376 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2377 Value *LHS, *RHS;
2378 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2379 return replaceInstUsesWith(
2380 CI,
2381 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2382 break;
2383 }
2384 case Intrinsic::bitreverse: {
2385 Value *IIOperand = II->getArgOperand(0);
2386 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2387 Value *X;
2388 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2389 X->getType()->isIntOrIntVectorTy(1)) {
2390 Type *Ty = II->getType();
2391 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2392 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2394 }
2395
2396 if (Instruction *crossLogicOpFold =
2398 return crossLogicOpFold;
2399
2400 break;
2401 }
2402 case Intrinsic::bswap: {
2403 Value *IIOperand = II->getArgOperand(0);
2404
2405 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2406 // inverse-shift-of-bswap:
2407 // bswap (shl X, Y) --> lshr (bswap X), Y
2408 // bswap (lshr X, Y) --> shl (bswap X), Y
2409 Value *X, *Y;
2410 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2411 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2413 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2414 BinaryOperator::BinaryOps InverseShift =
2415 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2416 ? Instruction::LShr
2417 : Instruction::Shl;
2418 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2419 }
2420 }
2421
2422 KnownBits Known = computeKnownBits(IIOperand, II);
2423 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2424 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2425 unsigned BW = Known.getBitWidth();
2426
2427 // bswap(x) -> shift(x) if x has exactly one "active byte"
2428 if (BW - LZ - TZ == 8) {
2429 assert(LZ != TZ && "active byte cannot be in the middle");
2430 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2431 return BinaryOperator::CreateNUWShl(
2432 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2433 // -> lshr(x) if the "active byte" is in the high part of x
2434 return BinaryOperator::CreateExactLShr(
2435 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2436 }
2437
2438 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2439 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2440 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2441 Value *CV = ConstantInt::get(X->getType(), C);
2442 Value *V = Builder.CreateLShr(X, CV);
2443 return new TruncInst(V, IIOperand->getType());
2444 }
2445
2446 if (Instruction *crossLogicOpFold =
2448 return crossLogicOpFold;
2449 }
2450
2451 // Try to fold into bitreverse if bswap is the root of the expression tree.
2452 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2453 /*MatchBitReversals*/ true))
2454 return BitOp;
2455 break;
2456 }
2457 case Intrinsic::masked_load:
2458 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2459 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2460 break;
2461 case Intrinsic::masked_store:
2462 return simplifyMaskedStore(*II);
2463 case Intrinsic::masked_gather:
2464 return simplifyMaskedGather(*II);
2465 case Intrinsic::masked_scatter:
2466 return simplifyMaskedScatter(*II);
2467 case Intrinsic::launder_invariant_group:
2468 case Intrinsic::strip_invariant_group:
2469 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2470 return replaceInstUsesWith(*II, SkippedBarrier);
2471 break;
2472 case Intrinsic::powi:
2473 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2474 // 0 and 1 are handled in instsimplify
2475 // powi(x, -1) -> 1/x
2476 if (Power->isMinusOne())
2477 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2478 II->getArgOperand(0), II);
2479 // powi(x, 2) -> x*x
2480 if (Power->equalsInt(2))
2481 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2482 II->getArgOperand(0), II);
2483
2484 if (!Power->getValue()[0]) {
2485 Value *X;
2486 // If power is even:
2487 // powi(-x, p) -> powi(x, p)
2488 // powi(fabs(x), p) -> powi(x, p)
2489 // powi(copysign(x, y), p) -> powi(x, p)
2490 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2491 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2492 match(II->getArgOperand(0),
2494 return replaceOperand(*II, 0, X);
2495 }
2496 }
2497 break;
2498
2499 case Intrinsic::cttz:
2500 case Intrinsic::ctlz:
2501 if (auto *I = foldCttzCtlz(*II, *this))
2502 return I;
2503 break;
2504
2505 case Intrinsic::ctpop:
2506 if (auto *I = foldCtpop(*II, *this))
2507 return I;
2508 break;
2509
2510 case Intrinsic::fshl:
2511 case Intrinsic::fshr: {
2512 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2513 Type *Ty = II->getType();
2514 unsigned BitWidth = Ty->getScalarSizeInBits();
2515 Constant *ShAmtC;
2516 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2517 // Canonicalize a shift amount constant operand to modulo the bit-width.
2518 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2519 Constant *ModuloC =
2520 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2521 if (!ModuloC)
2522 return nullptr;
2523 if (ModuloC != ShAmtC)
2524 return replaceOperand(*II, 2, ModuloC);
2525
2527 ShAmtC, DL),
2528 m_One()) &&
2529 "Shift amount expected to be modulo bitwidth");
2530
2531 // Canonicalize funnel shift right by constant to funnel shift left. This
2532 // is not entirely arbitrary. For historical reasons, the backend may
2533 // recognize rotate left patterns but miss rotate right patterns.
2534 if (IID == Intrinsic::fshr) {
2535 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2536 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2537 return nullptr;
2538
2539 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2540 Module *Mod = II->getModule();
2541 Function *Fshl =
2542 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2543 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2544 }
2545 assert(IID == Intrinsic::fshl &&
2546 "All funnel shifts by simple constants should go left");
2547
2548 // fshl(X, 0, C) --> shl X, C
2549 // fshl(X, undef, C) --> shl X, C
2550 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2551 return BinaryOperator::CreateShl(Op0, ShAmtC);
2552
2553 // fshl(0, X, C) --> lshr X, (BW-C)
2554 // fshl(undef, X, C) --> lshr X, (BW-C)
2555 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2556 return BinaryOperator::CreateLShr(Op1,
2557 ConstantExpr::getSub(WidthC, ShAmtC));
2558
2559 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2560 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2561 Module *Mod = II->getModule();
2562 Function *Bswap =
2563 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2564 return CallInst::Create(Bswap, { Op0 });
2565 }
2566 if (Instruction *BitOp =
2567 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2568 /*MatchBitReversals*/ true))
2569 return BitOp;
2570
2571 // R = fshl(X, X, C2)
2572 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2573 Value *InnerOp;
2574 const APInt *ShAmtInnerC, *ShAmtOuterC;
2575 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2576 m_APInt(ShAmtInnerC))) &&
2577 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2578 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2579 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2580 if (Modulo.isZero())
2581 return replaceInstUsesWith(*II, InnerOp);
2582 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2584 {InnerOp, InnerOp, ModuloC});
2585 }
2586 }
2587
2588 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2589 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2590 // if BitWidth is a power-of-2
2591 Value *Y;
2592 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2593 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2594 Module *Mod = II->getModule();
2596 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2597 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2598 }
2599
2600 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2601 // power-of-2
2602 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2603 match(Op1, m_ZeroInt())) {
2604 Value *Op2 = II->getArgOperand(2);
2605 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2606 return BinaryOperator::CreateShl(Op0, And);
2607 }
2608
2609 // Left or right might be masked.
2611 return &CI;
2612
2613 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2614 // so only the low bits of the shift amount are demanded if the bitwidth is
2615 // a power-of-2.
2616 if (!isPowerOf2_32(BitWidth))
2617 break;
2619 KnownBits Op2Known(BitWidth);
2620 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2621 return &CI;
2622 break;
2623 }
2624 case Intrinsic::ptrmask: {
2625 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2626 KnownBits Known(BitWidth);
2628 return II;
2629
2630 Value *InnerPtr, *InnerMask;
2631 bool Changed = false;
2632 // Combine:
2633 // (ptrmask (ptrmask p, A), B)
2634 // -> (ptrmask p, (and A, B))
2635 if (match(II->getArgOperand(0),
2637 m_Value(InnerMask))))) {
2638 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2639 "Mask types must match");
2640 // TODO: If InnerMask == Op1, we could copy attributes from inner
2641 // callsite -> outer callsite.
2642 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2643 replaceOperand(CI, 0, InnerPtr);
2644 replaceOperand(CI, 1, NewMask);
2645 Changed = true;
2646 }
2647
2648 // See if we can deduce non-null.
2649 if (!CI.hasRetAttr(Attribute::NonNull) &&
2650 (Known.isNonZero() ||
2651 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2652 CI.addRetAttr(Attribute::NonNull);
2653 Changed = true;
2654 }
2655
2656 unsigned NewAlignmentLog =
2658 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2659 // Known bits will capture if we had alignment information associated with
2660 // the pointer argument.
2661 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2663 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2664 Changed = true;
2665 }
2666 if (Changed)
2667 return &CI;
2668 break;
2669 }
2670 case Intrinsic::uadd_with_overflow:
2671 case Intrinsic::sadd_with_overflow: {
2672 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2673 return I;
2674
2675 // Given 2 constant operands whose sum does not overflow:
2676 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2677 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2678 Value *X;
2679 const APInt *C0, *C1;
2680 Value *Arg0 = II->getArgOperand(0);
2681 Value *Arg1 = II->getArgOperand(1);
2682 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2683 bool HasNWAdd = IsSigned
2684 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2685 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2686 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2687 bool Overflow;
2688 APInt NewC =
2689 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2690 if (!Overflow)
2691 return replaceInstUsesWith(
2692 *II, Builder.CreateBinaryIntrinsic(
2693 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2694 }
2695 break;
2696 }
2697
2698 case Intrinsic::umul_with_overflow:
2699 case Intrinsic::smul_with_overflow:
2700 case Intrinsic::usub_with_overflow:
2701 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2702 return I;
2703 break;
2704
2705 case Intrinsic::ssub_with_overflow: {
2706 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2707 return I;
2708
2709 Constant *C;
2710 Value *Arg0 = II->getArgOperand(0);
2711 Value *Arg1 = II->getArgOperand(1);
2712 // Given a constant C that is not the minimum signed value
2713 // for an integer of a given bit width:
2714 //
2715 // ssubo X, C -> saddo X, -C
2716 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2717 Value *NegVal = ConstantExpr::getNeg(C);
2718 // Build a saddo call that is equivalent to the discovered
2719 // ssubo call.
2720 return replaceInstUsesWith(
2721 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2722 Arg0, NegVal));
2723 }
2724
2725 break;
2726 }
2727
2728 case Intrinsic::uadd_sat:
2729 case Intrinsic::sadd_sat:
2730 case Intrinsic::usub_sat:
2731 case Intrinsic::ssub_sat: {
2733 Type *Ty = SI->getType();
2734 Value *Arg0 = SI->getLHS();
2735 Value *Arg1 = SI->getRHS();
2736
2737 // Make use of known overflow information.
2738 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2739 Arg0, Arg1, SI);
2740 switch (OR) {
2742 break;
2744 if (SI->isSigned())
2745 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2746 else
2747 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2749 unsigned BitWidth = Ty->getScalarSizeInBits();
2750 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2751 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2752 }
2754 unsigned BitWidth = Ty->getScalarSizeInBits();
2755 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2756 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2757 }
2758 }
2759
2760 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2761 // which after that:
2762 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2763 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2764 Constant *C, *C1;
2765 Value *A;
2766 if (IID == Intrinsic::usub_sat &&
2767 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2768 match(Arg1, m_ImmConstant(C1))) {
2769 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2770 auto *NewSub =
2771 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2772 return replaceInstUsesWith(*SI, NewSub);
2773 }
2774
2775 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2776 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2777 C->isNotMinSignedValue()) {
2778 Value *NegVal = ConstantExpr::getNeg(C);
2779 return replaceInstUsesWith(
2780 *II, Builder.CreateBinaryIntrinsic(
2781 Intrinsic::sadd_sat, Arg0, NegVal));
2782 }
2783
2784 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2785 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2786 // if Val and Val2 have the same sign
2787 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2788 Value *X;
2789 const APInt *Val, *Val2;
2790 APInt NewVal;
2791 bool IsUnsigned =
2792 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2793 if (Other->getIntrinsicID() == IID &&
2794 match(Arg1, m_APInt(Val)) &&
2795 match(Other->getArgOperand(0), m_Value(X)) &&
2796 match(Other->getArgOperand(1), m_APInt(Val2))) {
2797 if (IsUnsigned)
2798 NewVal = Val->uadd_sat(*Val2);
2799 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2800 bool Overflow;
2801 NewVal = Val->sadd_ov(*Val2, Overflow);
2802 if (Overflow) {
2803 // Both adds together may add more than SignedMaxValue
2804 // without saturating the final result.
2805 break;
2806 }
2807 } else {
2808 // Cannot fold saturated addition with different signs.
2809 break;
2810 }
2811
2812 return replaceInstUsesWith(
2813 *II, Builder.CreateBinaryIntrinsic(
2814 IID, X, ConstantInt::get(II->getType(), NewVal)));
2815 }
2816 }
2817 break;
2818 }
2819
2820 case Intrinsic::minnum:
2821 case Intrinsic::maxnum:
2822 case Intrinsic::minimumnum:
2823 case Intrinsic::maximumnum:
2824 case Intrinsic::minimum:
2825 case Intrinsic::maximum: {
2826 Value *Arg0 = II->getArgOperand(0);
2827 Value *Arg1 = II->getArgOperand(1);
2828 Value *X, *Y;
2829 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2830 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2831 // If both operands are negated, invert the call and negate the result:
2832 // min(-X, -Y) --> -(max(X, Y))
2833 // max(-X, -Y) --> -(min(X, Y))
2834 Intrinsic::ID NewIID;
2835 switch (IID) {
2836 case Intrinsic::maxnum:
2837 NewIID = Intrinsic::minnum;
2838 break;
2839 case Intrinsic::minnum:
2840 NewIID = Intrinsic::maxnum;
2841 break;
2842 case Intrinsic::maximumnum:
2843 NewIID = Intrinsic::minimumnum;
2844 break;
2845 case Intrinsic::minimumnum:
2846 NewIID = Intrinsic::maximumnum;
2847 break;
2848 case Intrinsic::maximum:
2849 NewIID = Intrinsic::minimum;
2850 break;
2851 case Intrinsic::minimum:
2852 NewIID = Intrinsic::maximum;
2853 break;
2854 default:
2855 llvm_unreachable("unexpected intrinsic ID");
2856 }
2857 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2858 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2859 FNeg->copyIRFlags(II);
2860 return FNeg;
2861 }
2862
2863 // m(m(X, C2), C1) -> m(X, C)
2864 const APFloat *C1, *C2;
2865 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2866 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2867 ((match(M->getArgOperand(0), m_Value(X)) &&
2868 match(M->getArgOperand(1), m_APFloat(C2))) ||
2869 (match(M->getArgOperand(1), m_Value(X)) &&
2870 match(M->getArgOperand(0), m_APFloat(C2))))) {
2871 APFloat Res(0.0);
2872 switch (IID) {
2873 case Intrinsic::maxnum:
2874 Res = maxnum(*C1, *C2);
2875 break;
2876 case Intrinsic::minnum:
2877 Res = minnum(*C1, *C2);
2878 break;
2879 case Intrinsic::maximumnum:
2880 Res = maximumnum(*C1, *C2);
2881 break;
2882 case Intrinsic::minimumnum:
2883 Res = minimumnum(*C1, *C2);
2884 break;
2885 case Intrinsic::maximum:
2886 Res = maximum(*C1, *C2);
2887 break;
2888 case Intrinsic::minimum:
2889 Res = minimum(*C1, *C2);
2890 break;
2891 default:
2892 llvm_unreachable("unexpected intrinsic ID");
2893 }
2894 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2895 // was a simplification (so Arg0 and its original flags could
2896 // propagate?)
2897 Value *V = Builder.CreateBinaryIntrinsic(
2898 IID, X, ConstantFP::get(Arg0->getType(), Res),
2900 return replaceInstUsesWith(*II, V);
2901 }
2902 }
2903
2904 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2905 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2906 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2907 X->getType() == Y->getType()) {
2908 Value *NewCall =
2909 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2910 return new FPExtInst(NewCall, II->getType());
2911 }
2912
2913 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2914 Constant *C;
2915 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2916 match(Arg1, m_ImmConstant(C))) {
2917 if (Constant *TruncC =
2918 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2919 Value *NewCall =
2920 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2921 return new FPExtInst(NewCall, II->getType());
2922 }
2923 }
2924
2925 // max X, -X --> fabs X
2926 // min X, -X --> -(fabs X)
2927 // TODO: Remove one-use limitation? That is obviously better for max,
2928 // hence why we don't check for one-use for that. However,
2929 // it would be an extra instruction for min (fnabs), but
2930 // that is still likely better for analysis and codegen.
2931 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2932 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2933 return Op0->hasOneUse() ||
2934 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2935 IID != Intrinsic::minimumnum);
2936 return false;
2937 };
2938
2939 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2940 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2941 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2942 IID == Intrinsic::minimumnum)
2943 R = Builder.CreateFNegFMF(R, II);
2944 return replaceInstUsesWith(*II, R);
2945 }
2946
2947 break;
2948 }
2949 case Intrinsic::matrix_multiply: {
2950 // Optimize negation in matrix multiplication.
2951
2952 // -A * -B -> A * B
2953 Value *A, *B;
2954 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2955 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2956 replaceOperand(*II, 0, A);
2957 replaceOperand(*II, 1, B);
2958 return II;
2959 }
2960
2961 Value *Op0 = II->getOperand(0);
2962 Value *Op1 = II->getOperand(1);
2963 Value *OpNotNeg, *NegatedOp;
2964 unsigned NegatedOpArg, OtherOpArg;
2965 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2966 NegatedOp = Op0;
2967 NegatedOpArg = 0;
2968 OtherOpArg = 1;
2969 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2970 NegatedOp = Op1;
2971 NegatedOpArg = 1;
2972 OtherOpArg = 0;
2973 } else
2974 // Multiplication doesn't have a negated operand.
2975 break;
2976
2977 // Only optimize if the negated operand has only one use.
2978 if (!NegatedOp->hasOneUse())
2979 break;
2980
2981 Value *OtherOp = II->getOperand(OtherOpArg);
2982 VectorType *RetTy = cast<VectorType>(II->getType());
2983 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2984 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2985 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2986 ElementCount OtherCount = OtherOpTy->getElementCount();
2987 ElementCount RetCount = RetTy->getElementCount();
2988 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2989 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2990 ElementCount::isKnownLT(OtherCount, RetCount)) {
2991 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2992 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2993 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2994 return II;
2995 }
2996 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2997 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2998 SmallVector<Value *, 5> NewArgs(II->args());
2999 NewArgs[NegatedOpArg] = OpNotNeg;
3000 Instruction *NewMul =
3001 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3002 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3003 }
3004 break;
3005 }
3006 case Intrinsic::fmuladd: {
3007 // Try to simplify the underlying FMul.
3008 if (Value *V =
3009 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3010 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3011 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3012 II->getFastMathFlags());
3013
3014 [[fallthrough]];
3015 }
3016 case Intrinsic::fma: {
3017 // fma fneg(x), fneg(y), z -> fma x, y, z
3018 Value *Src0 = II->getArgOperand(0);
3019 Value *Src1 = II->getArgOperand(1);
3020 Value *Src2 = II->getArgOperand(2);
3021 Value *X, *Y;
3022 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3023 replaceOperand(*II, 0, X);
3024 replaceOperand(*II, 1, Y);
3025 return II;
3026 }
3027
3028 // fma fabs(x), fabs(x), z -> fma x, x, z
3029 if (match(Src0, m_FAbs(m_Value(X))) &&
3030 match(Src1, m_FAbs(m_Specific(X)))) {
3031 replaceOperand(*II, 0, X);
3032 replaceOperand(*II, 1, X);
3033 return II;
3034 }
3035
3036 // Try to simplify the underlying FMul. We can only apply simplifications
3037 // that do not require rounding.
3038 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3039 SQ.getWithInstruction(II)))
3040 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3041
3042 // fma x, y, 0 -> fmul x, y
3043 // This is always valid for -0.0, but requires nsz for +0.0 as
3044 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3045 if (match(Src2, m_NegZeroFP()) ||
3046 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3047 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3048
3049 // fma x, -1.0, y -> fsub y, x
3050 if (match(Src1, m_SpecificFP(-1.0)))
3051 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3052
3053 break;
3054 }
3055 case Intrinsic::copysign: {
3056 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3057 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3058 Sign, getSimplifyQuery().getWithInstruction(II))) {
3059 if (*KnownSignBit) {
3060 // If we know that the sign argument is negative, reduce to FNABS:
3061 // copysign Mag, -Sign --> fneg (fabs Mag)
3062 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3063 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3064 }
3065
3066 // If we know that the sign argument is positive, reduce to FABS:
3067 // copysign Mag, +Sign --> fabs Mag
3068 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3069 return replaceInstUsesWith(*II, Fabs);
3070 }
3071
3072 // Propagate sign argument through nested calls:
3073 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3074 Value *X;
3076 Value *CopySign =
3077 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3078 return replaceInstUsesWith(*II, CopySign);
3079 }
3080
3081 // Clear sign-bit of constant magnitude:
3082 // copysign -MagC, X --> copysign MagC, X
3083 // TODO: Support constant folding for fabs
3084 const APFloat *MagC;
3085 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3086 APFloat PosMagC = *MagC;
3087 PosMagC.clearSign();
3088 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3089 }
3090
3091 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3092 // copysign (fabs X), Sign --> copysign X, Sign
3093 // copysign (fneg X), Sign --> copysign X, Sign
3094 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3095 return replaceOperand(*II, 0, X);
3096
3097 Type *SignEltTy = Sign->getType()->getScalarType();
3098
3099 Value *CastSrc;
3100 if (match(Sign,
3102 CastSrc->getType()->isIntOrIntVectorTy() &&
3104 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3106 APInt::getSignMask(Known.getBitWidth()), Known,
3107 SQ))
3108 return II;
3109 }
3110
3111 break;
3112 }
3113 case Intrinsic::fabs: {
3114 Value *Cond, *TVal, *FVal;
3115 Value *Arg = II->getArgOperand(0);
3116 Value *X;
3117 // fabs (-X) --> fabs (X)
3118 if (match(Arg, m_FNeg(m_Value(X)))) {
3119 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
3120 return replaceInstUsesWith(CI, Fabs);
3121 }
3122
3123 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3124 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3125 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3126 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3127 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3128 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3129 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3130 SI->setFastMathFlags(II->getFastMathFlags() |
3131 cast<SelectInst>(Arg)->getFastMathFlags());
3132 // Can't copy nsz to select, as even with the nsz flag the fabs result
3133 // always has the sign bit unset.
3134 SI->setHasNoSignedZeros(false);
3135 return SI;
3136 }
3137 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3138 if (match(TVal, m_FNeg(m_Specific(FVal))))
3139 return replaceOperand(*II, 0, FVal);
3140 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3141 if (match(FVal, m_FNeg(m_Specific(TVal))))
3142 return replaceOperand(*II, 0, TVal);
3143 }
3144
3145 Value *Magnitude, *Sign;
3146 if (match(II->getArgOperand(0),
3147 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3148 // fabs (copysign x, y) -> (fabs x)
3149 CallInst *AbsSign =
3150 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
3151 return replaceInstUsesWith(*II, AbsSign);
3152 }
3153
3154 [[fallthrough]];
3155 }
3156 case Intrinsic::ceil:
3157 case Intrinsic::floor:
3158 case Intrinsic::round:
3159 case Intrinsic::roundeven:
3160 case Intrinsic::nearbyint:
3161 case Intrinsic::rint:
3162 case Intrinsic::trunc: {
3163 Value *ExtSrc;
3164 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3165 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3166 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3167 return new FPExtInst(NarrowII, II->getType());
3168 }
3169 break;
3170 }
3171 case Intrinsic::cos:
3172 case Intrinsic::amdgcn_cos:
3173 case Intrinsic::cosh: {
3174 Value *X, *Sign;
3175 Value *Src = II->getArgOperand(0);
3176 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3177 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3178 // f(-x) --> f(x)
3179 // f(fabs(x)) --> f(x)
3180 // f(copysign(x, y)) --> f(x)
3181 // for f in {cos, cosh}
3182 return replaceOperand(*II, 0, X);
3183 }
3184 break;
3185 }
3186 case Intrinsic::sin:
3187 case Intrinsic::amdgcn_sin:
3188 case Intrinsic::sinh:
3189 case Intrinsic::tan:
3190 case Intrinsic::tanh: {
3191 Value *X;
3192 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3193 // f(-x) --> -f(x)
3194 // for f in {sin, sinh, tan, tanh}
3195 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3196 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3197 }
3198 break;
3199 }
3200 case Intrinsic::ldexp: {
3201 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3202 //
3203 // The danger is if the first ldexp would overflow to infinity or underflow
3204 // to zero, but the combined exponent avoids it. We ignore this with
3205 // reassoc.
3206 //
3207 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3208 // it would just double down on the overflow/underflow which would occur
3209 // anyway.
3210 //
3211 // TODO: Could do better if we had range tracking for the input value
3212 // exponent. Also could broaden sign check to cover == 0 case.
3213 Value *Src = II->getArgOperand(0);
3214 Value *Exp = II->getArgOperand(1);
3215
3216 uint64_t ConstExp;
3217 if (match(Exp, m_ConstantInt(ConstExp))) {
3218 // ldexp(x, K) -> fmul x, 2^K
3219 const fltSemantics &FPTy =
3220 Src->getType()->getScalarType()->getFltSemantics();
3221
3222 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3224 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3225 // Skip overflow and underflow cases.
3226 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3227 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3228 }
3229 }
3230
3231 Value *InnerSrc;
3232 Value *InnerExp;
3234 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3235 Exp->getType() == InnerExp->getType()) {
3236 FastMathFlags FMF = II->getFastMathFlags();
3237 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3238
3239 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3240 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
3241 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3242 // width.
3243 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3244 II->setArgOperand(1, NewExp);
3245 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3246 return replaceOperand(*II, 0, InnerSrc);
3247 }
3248 }
3249
3250 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3251 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3252 Value *ExtSrc;
3253 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3254 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3255 Value *Select =
3256 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3257 ConstantFP::get(II->getType(), 1.0));
3259 }
3260 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3261 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3262 Value *Select =
3263 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3264 ConstantFP::get(II->getType(), 1.0));
3266 }
3267
3268 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3269 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3270 ///
3271 // TODO: If we cared, should insert a canonicalize for x
3272 Value *SelectCond, *SelectLHS, *SelectRHS;
3273 if (match(II->getArgOperand(1),
3274 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3275 m_Value(SelectRHS))))) {
3276 Value *NewLdexp = nullptr;
3277 Value *Select = nullptr;
3278 if (match(SelectRHS, m_ZeroInt())) {
3279 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3280 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3281 } else if (match(SelectLHS, m_ZeroInt())) {
3282 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3283 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3284 }
3285
3286 if (NewLdexp) {
3287 Select->takeName(II);
3288 return replaceInstUsesWith(*II, Select);
3289 }
3290 }
3291
3292 break;
3293 }
3294 case Intrinsic::ptrauth_auth:
3295 case Intrinsic::ptrauth_resign: {
3296 // We don't support this optimization on intrinsic calls with deactivation
3297 // symbols, which are represented using operand bundles.
3298 if (II->hasOperandBundles())
3299 break;
3300
3301 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3302 // sign+auth component if the key and discriminator match.
3303 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3304 Value *Ptr = II->getArgOperand(0);
3305 Value *Key = II->getArgOperand(1);
3306 Value *Disc = II->getArgOperand(2);
3307
3308 // AuthKey will be the key we need to end up authenticating against in
3309 // whatever we replace this sequence with.
3310 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3311 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3312 // We don't support this optimization on intrinsic calls with deactivation
3313 // symbols, which are represented using operand bundles.
3314 if (CI->hasOperandBundles())
3315 break;
3316
3317 BasePtr = CI->getArgOperand(0);
3318 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3319 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3320 break;
3321 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3322 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3323 break;
3324 AuthKey = CI->getArgOperand(1);
3325 AuthDisc = CI->getArgOperand(2);
3326 } else
3327 break;
3328 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3329 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3330 // our purposes, so check for that too.
3331 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3332 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3333 break;
3334
3335 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3336 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3337 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3338 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3339 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3340 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3341 SignDisc, /*AddrDisc=*/Null,
3342 /*DeactivationSymbol=*/Null);
3344 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3345 return eraseInstFromFunction(*II);
3346 }
3347
3348 // auth(ptrauth(p,k,d),k,d) -> p
3349 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3350 } else
3351 break;
3352
3353 unsigned NewIntrin;
3354 if (AuthKey && NeedSign) {
3355 // resign(0,1) + resign(1,2) = resign(0, 2)
3356 NewIntrin = Intrinsic::ptrauth_resign;
3357 } else if (AuthKey) {
3358 // resign(0,1) + auth(1) = auth(0)
3359 NewIntrin = Intrinsic::ptrauth_auth;
3360 } else if (NeedSign) {
3361 // sign(0) + resign(0, 1) = sign(1)
3362 NewIntrin = Intrinsic::ptrauth_sign;
3363 } else {
3364 // sign(0) + auth(0) = nop
3365 replaceInstUsesWith(*II, BasePtr);
3366 return eraseInstFromFunction(*II);
3367 }
3368
3369 SmallVector<Value *, 4> CallArgs;
3370 CallArgs.push_back(BasePtr);
3371 if (AuthKey) {
3372 CallArgs.push_back(AuthKey);
3373 CallArgs.push_back(AuthDisc);
3374 }
3375
3376 if (NeedSign) {
3377 CallArgs.push_back(II->getArgOperand(3));
3378 CallArgs.push_back(II->getArgOperand(4));
3379 }
3380
3381 Function *NewFn =
3382 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3383 return CallInst::Create(NewFn, CallArgs);
3384 }
3385 case Intrinsic::arm_neon_vtbl1:
3386 case Intrinsic::arm_neon_vtbl2:
3387 case Intrinsic::arm_neon_vtbl3:
3388 case Intrinsic::arm_neon_vtbl4:
3389 case Intrinsic::aarch64_neon_tbl1:
3390 case Intrinsic::aarch64_neon_tbl2:
3391 case Intrinsic::aarch64_neon_tbl3:
3392 case Intrinsic::aarch64_neon_tbl4:
3393 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3394 case Intrinsic::arm_neon_vtbx1:
3395 case Intrinsic::arm_neon_vtbx2:
3396 case Intrinsic::arm_neon_vtbx3:
3397 case Intrinsic::arm_neon_vtbx4:
3398 case Intrinsic::aarch64_neon_tbx1:
3399 case Intrinsic::aarch64_neon_tbx2:
3400 case Intrinsic::aarch64_neon_tbx3:
3401 case Intrinsic::aarch64_neon_tbx4:
3402 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3403
3404 case Intrinsic::arm_neon_vmulls:
3405 case Intrinsic::arm_neon_vmullu:
3406 case Intrinsic::aarch64_neon_smull:
3407 case Intrinsic::aarch64_neon_umull: {
3408 Value *Arg0 = II->getArgOperand(0);
3409 Value *Arg1 = II->getArgOperand(1);
3410
3411 // Handle mul by zero first:
3413 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3414 }
3415
3416 // Check for constant LHS & RHS - in this case we just simplify.
3417 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3418 IID == Intrinsic::aarch64_neon_umull);
3419 VectorType *NewVT = cast<VectorType>(II->getType());
3420 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3421 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3422 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3423 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3424 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3425 }
3426
3427 // Couldn't simplify - canonicalize constant to the RHS.
3428 std::swap(Arg0, Arg1);
3429 }
3430
3431 // Handle mul by one:
3432 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3433 if (ConstantInt *Splat =
3434 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3435 if (Splat->isOne())
3436 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3437 /*isSigned=*/!Zext);
3438
3439 break;
3440 }
3441 case Intrinsic::arm_neon_aesd:
3442 case Intrinsic::arm_neon_aese:
3443 case Intrinsic::aarch64_crypto_aesd:
3444 case Intrinsic::aarch64_crypto_aese:
3445 case Intrinsic::aarch64_sve_aesd:
3446 case Intrinsic::aarch64_sve_aese: {
3447 Value *DataArg = II->getArgOperand(0);
3448 Value *KeyArg = II->getArgOperand(1);
3449
3450 // Accept zero on either operand.
3451 if (!match(KeyArg, m_ZeroInt()))
3452 std::swap(KeyArg, DataArg);
3453
3454 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3455 Value *Data, *Key;
3456 if (match(KeyArg, m_ZeroInt()) &&
3457 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3458 replaceOperand(*II, 0, Data);
3459 replaceOperand(*II, 1, Key);
3460 return II;
3461 }
3462 break;
3463 }
3464 case Intrinsic::arm_neon_vshifts:
3465 case Intrinsic::arm_neon_vshiftu:
3466 case Intrinsic::aarch64_neon_sshl:
3467 case Intrinsic::aarch64_neon_ushl:
3468 return foldNeonShift(II, *this);
3469 case Intrinsic::hexagon_V6_vandvrt:
3470 case Intrinsic::hexagon_V6_vandvrt_128B: {
3471 // Simplify Q -> V -> Q conversion.
3472 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3473 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3474 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3475 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3476 break;
3477 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3478 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3479 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3480 // Check if every byte has common bits in Bytes and Mask.
3481 uint64_t C = Bytes1 & Mask1;
3482 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3483 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3484 }
3485 break;
3486 }
3487 case Intrinsic::stackrestore: {
3488 enum class ClassifyResult {
3489 None,
3490 Alloca,
3491 StackRestore,
3492 CallWithSideEffects,
3493 };
3494 auto Classify = [](const Instruction *I) {
3495 if (isa<AllocaInst>(I))
3496 return ClassifyResult::Alloca;
3497
3498 if (auto *CI = dyn_cast<CallInst>(I)) {
3499 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3500 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3501 return ClassifyResult::StackRestore;
3502
3503 if (II->mayHaveSideEffects())
3504 return ClassifyResult::CallWithSideEffects;
3505 } else {
3506 // Consider all non-intrinsic calls to be side effects
3507 return ClassifyResult::CallWithSideEffects;
3508 }
3509 }
3510
3511 return ClassifyResult::None;
3512 };
3513
3514 // If the stacksave and the stackrestore are in the same BB, and there is
3515 // no intervening call, alloca, or stackrestore of a different stacksave,
3516 // remove the restore. This can happen when variable allocas are DCE'd.
3517 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3518 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3519 SS->getParent() == II->getParent()) {
3520 BasicBlock::iterator BI(SS);
3521 bool CannotRemove = false;
3522 for (++BI; &*BI != II; ++BI) {
3523 switch (Classify(&*BI)) {
3524 case ClassifyResult::None:
3525 // So far so good, look at next instructions.
3526 break;
3527
3528 case ClassifyResult::StackRestore:
3529 // If we found an intervening stackrestore for a different
3530 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3531 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3532 CannotRemove = true;
3533 break;
3534
3535 case ClassifyResult::Alloca:
3536 case ClassifyResult::CallWithSideEffects:
3537 // If we found an alloca, a non-intrinsic call, or an intrinsic
3538 // call with side effects, we can't remove the stackrestore.
3539 CannotRemove = true;
3540 break;
3541 }
3542 if (CannotRemove)
3543 break;
3544 }
3545
3546 if (!CannotRemove)
3547 return eraseInstFromFunction(CI);
3548 }
3549 }
3550
3551 // Scan down this block to see if there is another stack restore in the
3552 // same block without an intervening call/alloca.
3554 Instruction *TI = II->getParent()->getTerminator();
3555 bool CannotRemove = false;
3556 for (++BI; &*BI != TI; ++BI) {
3557 switch (Classify(&*BI)) {
3558 case ClassifyResult::None:
3559 // So far so good, look at next instructions.
3560 break;
3561
3562 case ClassifyResult::StackRestore:
3563 // If there is a stackrestore below this one, remove this one.
3564 return eraseInstFromFunction(CI);
3565
3566 case ClassifyResult::Alloca:
3567 case ClassifyResult::CallWithSideEffects:
3568 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3569 // with side effects (such as llvm.stacksave and llvm.read_register),
3570 // we can't remove the stack restore.
3571 CannotRemove = true;
3572 break;
3573 }
3574 if (CannotRemove)
3575 break;
3576 }
3577
3578 // If the stack restore is in a return, resume, or unwind block and if there
3579 // are no allocas or calls between the restore and the return, nuke the
3580 // restore.
3581 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3582 return eraseInstFromFunction(CI);
3583 break;
3584 }
3585 case Intrinsic::lifetime_end:
3586 // Asan needs to poison memory to detect invalid access which is possible
3587 // even for empty lifetime range.
3588 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3589 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3590 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3591 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3592 break;
3593
3594 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3595 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3596 }))
3597 return nullptr;
3598 break;
3599 case Intrinsic::assume: {
3600 Value *IIOperand = II->getArgOperand(0);
3602 II->getOperandBundlesAsDefs(OpBundles);
3603
3604 /// This will remove the boolean Condition from the assume given as
3605 /// argument and remove the assume if it becomes useless.
3606 /// always returns nullptr for use as a return values.
3607 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3608 assert(isa<AssumeInst>(Assume));
3610 return eraseInstFromFunction(CI);
3611 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3612 return nullptr;
3613 };
3614 // Remove an assume if it is followed by an identical assume.
3615 // TODO: Do we need this? Unless there are conflicting assumptions, the
3616 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3617 Instruction *Next = II->getNextNode();
3619 return RemoveConditionFromAssume(Next);
3620
3621 // Canonicalize assume(a && b) -> assume(a); assume(b);
3622 // Note: New assumption intrinsics created here are registered by
3623 // the InstCombineIRInserter object.
3624 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3625 Value *AssumeIntrinsic = II->getCalledOperand();
3626 Value *A, *B;
3627 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3628 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3629 II->getName());
3630 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3631 return eraseInstFromFunction(*II);
3632 }
3633 // assume(!(a || b)) -> assume(!a); assume(!b);
3634 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3635 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3636 Builder.CreateNot(A), OpBundles, II->getName());
3637 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3638 Builder.CreateNot(B), II->getName());
3639 return eraseInstFromFunction(*II);
3640 }
3641
3642 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3643 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3644
3645 // Separate storage assumptions apply to the underlying allocations, not
3646 // any particular pointer within them. When evaluating the hints for AA
3647 // purposes we getUnderlyingObject them; by precomputing the answers here
3648 // we can avoid having to do so repeatedly there.
3649 if (OBU.getTagName() == "separate_storage") {
3650 assert(OBU.Inputs.size() == 2);
3651 auto MaybeSimplifyHint = [&](const Use &U) {
3652 Value *Hint = U.get();
3653 // Not having a limit is safe because InstCombine removes unreachable
3654 // code.
3655 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3656 if (Hint != UnderlyingObject)
3657 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3658 };
3659 MaybeSimplifyHint(OBU.Inputs[0]);
3660 MaybeSimplifyHint(OBU.Inputs[1]);
3661 }
3662
3663 // Try to remove redundant alignment assumptions.
3664 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3666 *cast<AssumeInst>(II), II->arg_size() + Idx);
3667 if (!RK || RK.AttrKind != Attribute::Alignment ||
3669 continue;
3670
3671 // Remove align 1 bundles; they don't add any useful information.
3672 if (RK.ArgValue == 1)
3674
3675 // Don't try to remove align assumptions for pointers derived from
3676 // arguments. We might lose information if the function gets inline and
3677 // the align argument attribute disappears.
3679 if (!UO || isa<Argument>(UO))
3680 continue;
3681
3682 // Compute known bits for the pointer, passing nullptr as context to
3683 // avoid computeKnownBits using the assumption we are about to remove
3684 // for reasoning.
3685 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3686 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3688 if ((1ULL << TZ) < RK.ArgValue)
3689 continue;
3691 }
3692
3693 if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) {
3695 *cast<AssumeInst>(II), II->arg_size() + Idx);
3696 if (!RK || RK.AttrKind != Attribute::NonNull)
3697 continue;
3698
3699 // Drop assume if we can prove nonnull without it
3700 if (isKnownNonZero(RK.WasOn, getSimplifyQuery().getWithInstruction(II)))
3702
3703 // Fold the assume into metadata if it's valid at the load
3704 if (auto *LI = dyn_cast<LoadInst>(RK.WasOn);
3705 LI &&
3706 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3707 MDNode *MD = MDNode::get(II->getContext(), {});
3708 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3709 LI->setMetadata(LLVMContext::MD_noundef, MD);
3711 }
3712
3713 // TODO: apply nonnull return attributes to calls and invokes
3714 }
3715 }
3716
3717 // Convert nonnull assume like:
3718 // %A = icmp ne i32* %PTR, null
3719 // call void @llvm.assume(i1 %A)
3720 // into
3721 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3722 if (match(IIOperand,
3724 A->getType()->isPointerTy()) {
3725 if (auto *Replacement = buildAssumeFromKnowledge(
3726 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3727
3728 InsertNewInstBefore(Replacement, Next->getIterator());
3729 AC.registerAssumption(Replacement);
3730 return RemoveConditionFromAssume(II);
3731 }
3732 }
3733
3734 // Convert alignment assume like:
3735 // %B = ptrtoint i32* %A to i64
3736 // %C = and i64 %B, Constant
3737 // %D = icmp eq i64 %C, 0
3738 // call void @llvm.assume(i1 %D)
3739 // into
3740 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3741 uint64_t AlignMask = 1;
3742 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3743 match(IIOperand,
3745 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3746 m_Zero())))) {
3747 if (isPowerOf2_64(AlignMask + 1)) {
3748 uint64_t Offset = 0;
3750 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3751 /// Note: this doesn't preserve the offset information but merges
3752 /// offset and alignment.
3753 /// TODO: we can generate a GEP instead of merging the alignment with
3754 /// the offset.
3755 RetainedKnowledge RK{Attribute::Alignment,
3756 MinAlign(Offset, AlignMask + 1), A};
3757 if (auto *Replacement =
3759
3760 Replacement->insertAfter(II->getIterator());
3761 AC.registerAssumption(Replacement);
3762 }
3763 return RemoveConditionFromAssume(II);
3764 }
3765 }
3766 }
3767
3768 /// Canonicalize Knowledge in operand bundles.
3769 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3770 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3771 auto &BOI = II->bundle_op_info_begin()[Idx];
3774 if (BOI.End - BOI.Begin > 2)
3775 continue; // Prevent reducing knowledge in an align with offset since
3776 // extracting a RetainedKnowledge from them looses offset
3777 // information
3778 RetainedKnowledge CanonRK =
3781 &getDominatorTree());
3782 if (CanonRK == RK)
3783 continue;
3784 if (!CanonRK) {
3785 if (BOI.End - BOI.Begin > 0) {
3786 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3787 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3788 }
3789 continue;
3790 }
3791 assert(RK.AttrKind == CanonRK.AttrKind);
3792 if (BOI.End - BOI.Begin > 0)
3793 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3794 if (BOI.End - BOI.Begin > 1)
3795 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3796 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3797 if (RK.WasOn)
3798 Worklist.pushValue(RK.WasOn);
3799 return II;
3800 }
3801 }
3802
3803 // If there is a dominating assume with the same condition as this one,
3804 // then this one is redundant, and should be removed.
3805 KnownBits Known(1);
3806 computeKnownBits(IIOperand, Known, II);
3808 return eraseInstFromFunction(*II);
3809
3810 // assume(false) is unreachable.
3811 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3813 return eraseInstFromFunction(*II);
3814 }
3815
3816 // Update the cache of affected values for this assumption (we might be
3817 // here because we just simplified the condition).
3818 AC.updateAffectedValues(cast<AssumeInst>(II));
3819 break;
3820 }
3821 case Intrinsic::experimental_guard: {
3822 // Is this guard followed by another guard? We scan forward over a small
3823 // fixed window of instructions to handle common cases with conditions
3824 // computed between guards.
3825 Instruction *NextInst = II->getNextNode();
3826 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3827 // Note: Using context-free form to avoid compile time blow up
3828 if (!isSafeToSpeculativelyExecute(NextInst))
3829 break;
3830 NextInst = NextInst->getNextNode();
3831 }
3832 Value *NextCond = nullptr;
3833 if (match(NextInst,
3835 Value *CurrCond = II->getArgOperand(0);
3836
3837 // Remove a guard that it is immediately preceded by an identical guard.
3838 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3839 if (CurrCond != NextCond) {
3840 Instruction *MoveI = II->getNextNode();
3841 while (MoveI != NextInst) {
3842 auto *Temp = MoveI;
3843 MoveI = MoveI->getNextNode();
3844 Temp->moveBefore(II->getIterator());
3845 }
3846 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3847 }
3848 eraseInstFromFunction(*NextInst);
3849 return II;
3850 }
3851 break;
3852 }
3853 case Intrinsic::vector_insert: {
3854 Value *Vec = II->getArgOperand(0);
3855 Value *SubVec = II->getArgOperand(1);
3856 Value *Idx = II->getArgOperand(2);
3857 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3858 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3859 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3860
3861 // Only canonicalize if the destination vector, Vec, and SubVec are all
3862 // fixed vectors.
3863 if (DstTy && VecTy && SubVecTy) {
3864 unsigned DstNumElts = DstTy->getNumElements();
3865 unsigned VecNumElts = VecTy->getNumElements();
3866 unsigned SubVecNumElts = SubVecTy->getNumElements();
3867 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3868
3869 // An insert that entirely overwrites Vec with SubVec is a nop.
3870 if (VecNumElts == SubVecNumElts)
3871 return replaceInstUsesWith(CI, SubVec);
3872
3873 // Widen SubVec into a vector of the same width as Vec, since
3874 // shufflevector requires the two input vectors to be the same width.
3875 // Elements beyond the bounds of SubVec within the widened vector are
3876 // undefined.
3877 SmallVector<int, 8> WidenMask;
3878 unsigned i;
3879 for (i = 0; i != SubVecNumElts; ++i)
3880 WidenMask.push_back(i);
3881 for (; i != VecNumElts; ++i)
3882 WidenMask.push_back(PoisonMaskElem);
3883
3884 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3885
3887 for (unsigned i = 0; i != IdxN; ++i)
3888 Mask.push_back(i);
3889 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3890 Mask.push_back(i);
3891 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3892 Mask.push_back(i);
3893
3894 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3895 return replaceInstUsesWith(CI, Shuffle);
3896 }
3897 break;
3898 }
3899 case Intrinsic::vector_extract: {
3900 Value *Vec = II->getArgOperand(0);
3901 Value *Idx = II->getArgOperand(1);
3902
3903 Type *ReturnType = II->getType();
3904 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3905 // ExtractIdx)
3906 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3907 Value *InsertTuple, *InsertIdx, *InsertValue;
3909 m_Value(InsertValue),
3910 m_Value(InsertIdx))) &&
3911 InsertValue->getType() == ReturnType) {
3912 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3913 // Case where we get the same index right after setting it.
3914 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3915 // InsertValue
3916 if (ExtractIdx == Index)
3917 return replaceInstUsesWith(CI, InsertValue);
3918 // If we are getting a different index than what was set in the
3919 // insert.vector intrinsic. We can just set the input tuple to the one up
3920 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3921 // InsertIndex), ExtractIndex)
3922 // --> extract.vector(InsertTuple, ExtractIndex)
3923 else
3924 return replaceOperand(CI, 0, InsertTuple);
3925 }
3926
3927 ConstantInt *ALMUpperBound;
3929 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3930 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3931 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3932 if (ExtractIdx * VScaleMin >= ALMUpperBound->getZExtValue())
3933 return replaceInstUsesWith(CI,
3934 ConstantVector::getNullValue(ReturnType));
3935 }
3936
3937 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3938 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3939
3940 if (DstTy && VecTy) {
3941 auto DstEltCnt = DstTy->getElementCount();
3942 auto VecEltCnt = VecTy->getElementCount();
3943 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3944
3945 // Extracting the entirety of Vec is a nop.
3946 if (DstEltCnt == VecTy->getElementCount()) {
3947 replaceInstUsesWith(CI, Vec);
3948 return eraseInstFromFunction(CI);
3949 }
3950
3951 // Only canonicalize to shufflevector if the destination vector and
3952 // Vec are fixed vectors.
3953 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3954 break;
3955
3957 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3958 Mask.push_back(IdxN + i);
3959
3960 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3961 return replaceInstUsesWith(CI, Shuffle);
3962 }
3963 break;
3964 }
3965 case Intrinsic::experimental_vp_reverse: {
3966 Value *X;
3967 Value *Vec = II->getArgOperand(0);
3968 Value *Mask = II->getArgOperand(1);
3969 if (!match(Mask, m_AllOnes()))
3970 break;
3971 Value *EVL = II->getArgOperand(2);
3972 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3973 // rev(unop rev(X)) --> unop X
3974 if (match(Vec,
3976 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3977 auto *OldUnOp = cast<UnaryOperator>(Vec);
3979 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3980 II->getIterator());
3981 return replaceInstUsesWith(CI, NewUnOp);
3982 }
3983 break;
3984 }
3985 case Intrinsic::vector_reduce_or:
3986 case Intrinsic::vector_reduce_and: {
3987 // Canonicalize logical or/and reductions:
3988 // Or reduction for i1 is represented as:
3989 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3990 // %res = cmp ne iReduxWidth %val, 0
3991 // And reduction for i1 is represented as:
3992 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3993 // %res = cmp eq iReduxWidth %val, 11111
3994 Value *Arg = II->getArgOperand(0);
3995 Value *Vect;
3996
3997 if (Value *NewOp =
3998 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3999 replaceUse(II->getOperandUse(0), NewOp);
4000 return II;
4001 }
4002
4003 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4004 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4005 if (FTy->getElementType() == Builder.getInt1Ty()) {
4006 Value *Res = Builder.CreateBitCast(
4007 Vect, Builder.getIntNTy(FTy->getNumElements()));
4008 if (IID == Intrinsic::vector_reduce_and) {
4009 Res = Builder.CreateICmpEQ(
4011 } else {
4012 assert(IID == Intrinsic::vector_reduce_or &&
4013 "Expected or reduction.");
4014 Res = Builder.CreateIsNotNull(Res);
4015 }
4016 if (Arg != Vect)
4017 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4018 II->getType());
4019 return replaceInstUsesWith(CI, Res);
4020 }
4021 }
4022 [[fallthrough]];
4023 }
4024 case Intrinsic::vector_reduce_add: {
4025 if (IID == Intrinsic::vector_reduce_add) {
4026 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4027 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4028 // Convert vector_reduce_add(SExt(<n x i1>)) to
4029 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4030 // Convert vector_reduce_add(<n x i1>) to
4031 // Trunc(ctpop(bitcast <n x i1> to in)).
4032 Value *Arg = II->getArgOperand(0);
4033 Value *Vect;
4034
4035 if (Value *NewOp =
4036 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4037 replaceUse(II->getOperandUse(0), NewOp);
4038 return II;
4039 }
4040
4041 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4042 if (Value *Splat = getSplatValue(Arg)) {
4043 ElementCount VecToReduceCount =
4044 cast<VectorType>(Arg->getType())->getElementCount();
4045 if (VecToReduceCount.isFixed()) {
4046 unsigned VectorSize = VecToReduceCount.getFixedValue();
4047 return BinaryOperator::CreateMul(
4048 Splat,
4049 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4050 /*ImplicitTrunc=*/true));
4051 }
4052 }
4053
4054 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4055 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4056 if (FTy->getElementType() == Builder.getInt1Ty()) {
4057 Value *V = Builder.CreateBitCast(
4058 Vect, Builder.getIntNTy(FTy->getNumElements()));
4059 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4060 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4061 if (Arg != Vect &&
4062 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4063 Res = Builder.CreateNeg(Res);
4064 return replaceInstUsesWith(CI, Res);
4065 }
4066 }
4067 }
4068 [[fallthrough]];
4069 }
4070 case Intrinsic::vector_reduce_xor: {
4071 if (IID == Intrinsic::vector_reduce_xor) {
4072 // Exclusive disjunction reduction over the vector with
4073 // (potentially-extended) i1 element type is actually a
4074 // (potentially-extended) arithmetic `add` reduction over the original
4075 // non-extended value:
4076 // vector_reduce_xor(?ext(<n x i1>))
4077 // -->
4078 // ?ext(vector_reduce_add(<n x i1>))
4079 Value *Arg = II->getArgOperand(0);
4080 Value *Vect;
4081
4082 if (Value *NewOp =
4083 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4084 replaceUse(II->getOperandUse(0), NewOp);
4085 return II;
4086 }
4087
4088 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4089 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4090 if (VTy->getElementType() == Builder.getInt1Ty()) {
4091 Value *Res = Builder.CreateAddReduce(Vect);
4092 if (Arg != Vect)
4093 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4094 II->getType());
4095 return replaceInstUsesWith(CI, Res);
4096 }
4097 }
4098 }
4099 [[fallthrough]];
4100 }
4101 case Intrinsic::vector_reduce_mul: {
4102 if (IID == Intrinsic::vector_reduce_mul) {
4103 // Multiplicative reduction over the vector with (potentially-extended)
4104 // i1 element type is actually a (potentially zero-extended)
4105 // logical `and` reduction over the original non-extended value:
4106 // vector_reduce_mul(?ext(<n x i1>))
4107 // -->
4108 // zext(vector_reduce_and(<n x i1>))
4109 Value *Arg = II->getArgOperand(0);
4110 Value *Vect;
4111
4112 if (Value *NewOp =
4113 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4114 replaceUse(II->getOperandUse(0), NewOp);
4115 return II;
4116 }
4117
4118 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4119 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4120 if (VTy->getElementType() == Builder.getInt1Ty()) {
4121 Value *Res = Builder.CreateAndReduce(Vect);
4122 Res = Builder.CreateZExt(Res, II->getType());
4123 return replaceInstUsesWith(CI, Res);
4124 }
4125 }
4126 }
4127 [[fallthrough]];
4128 }
4129 case Intrinsic::vector_reduce_umin:
4130 case Intrinsic::vector_reduce_umax: {
4131 if (IID == Intrinsic::vector_reduce_umin ||
4132 IID == Intrinsic::vector_reduce_umax) {
4133 // UMin/UMax reduction over the vector with (potentially-extended)
4134 // i1 element type is actually a (potentially-extended)
4135 // logical `and`/`or` reduction over the original non-extended value:
4136 // vector_reduce_u{min,max}(?ext(<n x i1>))
4137 // -->
4138 // ?ext(vector_reduce_{and,or}(<n x i1>))
4139 Value *Arg = II->getArgOperand(0);
4140 Value *Vect;
4141
4142 if (Value *NewOp =
4143 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4144 replaceUse(II->getOperandUse(0), NewOp);
4145 return II;
4146 }
4147
4148 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4149 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4150 if (VTy->getElementType() == Builder.getInt1Ty()) {
4151 Value *Res = IID == Intrinsic::vector_reduce_umin
4152 ? Builder.CreateAndReduce(Vect)
4153 : Builder.CreateOrReduce(Vect);
4154 if (Arg != Vect)
4155 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4156 II->getType());
4157 return replaceInstUsesWith(CI, Res);
4158 }
4159 }
4160 }
4161 [[fallthrough]];
4162 }
4163 case Intrinsic::vector_reduce_smin:
4164 case Intrinsic::vector_reduce_smax: {
4165 if (IID == Intrinsic::vector_reduce_smin ||
4166 IID == Intrinsic::vector_reduce_smax) {
4167 // SMin/SMax reduction over the vector with (potentially-extended)
4168 // i1 element type is actually a (potentially-extended)
4169 // logical `and`/`or` reduction over the original non-extended value:
4170 // vector_reduce_s{min,max}(<n x i1>)
4171 // -->
4172 // vector_reduce_{or,and}(<n x i1>)
4173 // and
4174 // vector_reduce_s{min,max}(sext(<n x i1>))
4175 // -->
4176 // sext(vector_reduce_{or,and}(<n x i1>))
4177 // and
4178 // vector_reduce_s{min,max}(zext(<n x i1>))
4179 // -->
4180 // zext(vector_reduce_{and,or}(<n x i1>))
4181 Value *Arg = II->getArgOperand(0);
4182 Value *Vect;
4183
4184 if (Value *NewOp =
4185 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4186 replaceUse(II->getOperandUse(0), NewOp);
4187 return II;
4188 }
4189
4190 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4191 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4192 if (VTy->getElementType() == Builder.getInt1Ty()) {
4193 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4194 if (Arg != Vect)
4195 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4196 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4197 (ExtOpc == Instruction::CastOps::ZExt))
4198 ? Builder.CreateAndReduce(Vect)
4199 : Builder.CreateOrReduce(Vect);
4200 if (Arg != Vect)
4201 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4202 return replaceInstUsesWith(CI, Res);
4203 }
4204 }
4205 }
4206 [[fallthrough]];
4207 }
4208 case Intrinsic::vector_reduce_fmax:
4209 case Intrinsic::vector_reduce_fmin:
4210 case Intrinsic::vector_reduce_fadd:
4211 case Intrinsic::vector_reduce_fmul: {
4212 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4213 IID != Intrinsic::vector_reduce_fmul) ||
4214 II->hasAllowReassoc();
4215 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4216 IID == Intrinsic::vector_reduce_fmul)
4217 ? 1
4218 : 0;
4219 Value *Arg = II->getArgOperand(ArgIdx);
4220 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4221 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4222 return nullptr;
4223 }
4224 break;
4225 }
4226 case Intrinsic::is_fpclass: {
4227 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4228 return I;
4229 break;
4230 }
4231 case Intrinsic::threadlocal_address: {
4232 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4233 MaybeAlign Align = II->getRetAlign();
4234 if (MinAlign > Align.valueOrOne()) {
4235 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4236 return II;
4237 }
4238 break;
4239 }
4240 case Intrinsic::frexp: {
4241 Value *X;
4242 // The first result is idempotent with the added complication of the struct
4243 // return, and the second result is zero because the value is already
4244 // normalized.
4245 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4247 X = Builder.CreateInsertValue(
4248 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4249 1);
4250 return replaceInstUsesWith(*II, X);
4251 }
4252 }
4253 break;
4254 }
4255 case Intrinsic::get_active_lane_mask: {
4256 const APInt *Op0, *Op1;
4257 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4258 match(II->getOperand(1), m_APInt(Op1))) {
4259 Type *OpTy = II->getOperand(0)->getType();
4260 return replaceInstUsesWith(
4261 *II, Builder.CreateIntrinsic(
4262 II->getType(), Intrinsic::get_active_lane_mask,
4263 {Constant::getNullValue(OpTy),
4264 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4265 }
4266 break;
4267 }
4268 case Intrinsic::experimental_get_vector_length: {
4269 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4270 unsigned BitWidth =
4271 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4272 II->getType()->getScalarSizeInBits());
4273 ConstantRange Cnt =
4274 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4275 SQ.getWithInstruction(II))
4277 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4278 ->getValue()
4279 .zextOrTrunc(Cnt.getBitWidth());
4280 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4281 MaxLanes = MaxLanes.multiply(
4282 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4283
4284 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4285 return replaceInstUsesWith(
4286 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4287 return nullptr;
4288 }
4289 default: {
4290 // Handle target specific intrinsics
4291 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4292 if (V)
4293 return *V;
4294 break;
4295 }
4296 }
4297
4298 // Try to fold intrinsic into select/phi operands. This is legal if:
4299 // * The intrinsic is speculatable.
4300 // * The operand is one of the following:
4301 // - a phi.
4302 // - a select with a scalar condition.
4303 // - a select with a vector condition and II is not a cross lane operation.
4305 for (Value *Op : II->args()) {
4306 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4307 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4308 if (IsVectorCond &&
4309 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4310 continue;
4311 // Don't replace a scalar select with a more expensive vector select if
4312 // we can't simplify both arms of the select.
4313 bool SimplifyBothArms =
4314 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4316 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4317 return R;
4318 }
4319 if (auto *Phi = dyn_cast<PHINode>(Op))
4320 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4321 return R;
4322 }
4323 }
4324
4326 return Shuf;
4327
4329 return replaceInstUsesWith(*II, Reverse);
4330
4332 return replaceInstUsesWith(*II, Res);
4333
4334 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4335 // context, so it is handled in visitCallBase and we should trigger it.
4336 return visitCallBase(*II);
4337}
4338
4339// Fence instruction simplification
4341 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4342 // This check is solely here to handle arbitrary target-dependent syncscopes.
4343 // TODO: Can remove if does not matter in practice.
4344 if (NFI && FI.isIdenticalTo(NFI))
4345 return eraseInstFromFunction(FI);
4346
4347 // Returns true if FI1 is identical or stronger fence than FI2.
4348 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4349 auto FI1SyncScope = FI1->getSyncScopeID();
4350 // Consider same scope, where scope is global or single-thread.
4351 if (FI1SyncScope != FI2->getSyncScopeID() ||
4352 (FI1SyncScope != SyncScope::System &&
4353 FI1SyncScope != SyncScope::SingleThread))
4354 return false;
4355
4356 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4357 };
4358 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4359 return eraseInstFromFunction(FI);
4360
4361 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4362 if (isIdenticalOrStrongerFence(PFI, &FI))
4363 return eraseInstFromFunction(FI);
4364 return nullptr;
4365}
4366
4367// InvokeInst simplification
4369 return visitCallBase(II);
4370}
4371
4372// CallBrInst simplification
4374 return visitCallBase(CBI);
4375}
4376
4378 if (!CI->hasFnAttr("modular-format"))
4379 return nullptr;
4380
4382 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4383 // TODO: Make use of the first two arguments
4384 unsigned FirstArgIdx;
4385 [[maybe_unused]] bool Error;
4386 Error = Args[2].getAsInteger(10, FirstArgIdx);
4387 assert(!Error && "invalid first arg index");
4388 --FirstArgIdx;
4389 StringRef FnName = Args[3];
4390 StringRef ImplName = Args[4];
4392
4393 if (AllAspects.empty())
4394 return nullptr;
4395
4396 SmallVector<StringRef> NeededAspects;
4397 for (StringRef Aspect : AllAspects) {
4398 if (Aspect == "float") {
4399 if (llvm::any_of(
4400 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4401 CI->arg_end()),
4402 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4403 NeededAspects.push_back("float");
4404 } else {
4405 // Unknown aspects are always considered to be needed.
4406 NeededAspects.push_back(Aspect);
4407 }
4408 }
4409
4410 if (NeededAspects.size() == AllAspects.size())
4411 return nullptr;
4412
4413 Module *M = CI->getModule();
4414 LLVMContext &Ctx = M->getContext();
4415 Function *Callee = CI->getCalledFunction();
4416 FunctionCallee ModularFn = M->getOrInsertFunction(
4417 FnName, Callee->getFunctionType(),
4418 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4419 CallInst *New = cast<CallInst>(CI->clone());
4420 New->setCalledFunction(ModularFn);
4421 New->removeFnAttr("modular-format");
4422 B.Insert(New);
4423
4424 const auto ReferenceAspect = [&](StringRef Aspect) {
4425 SmallString<20> Name = ImplName;
4426 Name += '_';
4427 Name += Aspect;
4428 Function *RelocNoneFn =
4429 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4430 B.CreateCall(RelocNoneFn,
4431 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4432 };
4433
4434 llvm::sort(NeededAspects);
4435 for (StringRef Request : NeededAspects)
4436 ReferenceAspect(Request);
4437
4438 return New;
4439}
4440
4441Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4442 if (!CI->getCalledFunction()) return nullptr;
4443
4444 // Skip optimizing notail and musttail calls so
4445 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4446 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4447 if (CI->isMustTailCall() || CI->isNoTailCall())
4448 return nullptr;
4449
4450 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4451 replaceInstUsesWith(*From, With);
4452 };
4453 auto InstCombineErase = [this](Instruction *I) {
4455 };
4456 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4457 InstCombineRAUW, InstCombineErase);
4458 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4459 ++NumSimplified;
4460 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4461 }
4462 if (Value *With = optimizeModularFormat(CI, Builder)) {
4463 ++NumSimplified;
4464 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4465 }
4466
4467 return nullptr;
4468}
4469
4471 // Strip off at most one level of pointer casts, looking for an alloca. This
4472 // is good enough in practice and simpler than handling any number of casts.
4473 Value *Underlying = TrampMem->stripPointerCasts();
4474 if (Underlying != TrampMem &&
4475 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4476 return nullptr;
4477 if (!isa<AllocaInst>(Underlying))
4478 return nullptr;
4479
4480 IntrinsicInst *InitTrampoline = nullptr;
4481 for (User *U : TrampMem->users()) {
4483 if (!II)
4484 return nullptr;
4485 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4486 if (InitTrampoline)
4487 // More than one init_trampoline writes to this value. Give up.
4488 return nullptr;
4489 InitTrampoline = II;
4490 continue;
4491 }
4492 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4493 // Allow any number of calls to adjust.trampoline.
4494 continue;
4495 return nullptr;
4496 }
4497
4498 // No call to init.trampoline found.
4499 if (!InitTrampoline)
4500 return nullptr;
4501
4502 // Check that the alloca is being used in the expected way.
4503 if (InitTrampoline->getOperand(0) != TrampMem)
4504 return nullptr;
4505
4506 return InitTrampoline;
4507}
4508
4510 Value *TrampMem) {
4511 // Visit all the previous instructions in the basic block, and try to find a
4512 // init.trampoline which has a direct path to the adjust.trampoline.
4513 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4514 E = AdjustTramp->getParent()->begin();
4515 I != E;) {
4516 Instruction *Inst = &*--I;
4518 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4519 II->getOperand(0) == TrampMem)
4520 return II;
4521 if (Inst->mayWriteToMemory())
4522 return nullptr;
4523 }
4524 return nullptr;
4525}
4526
4527// Given a call to llvm.adjust.trampoline, find and return the corresponding
4528// call to llvm.init.trampoline if the call to the trampoline can be optimized
4529// to a direct call to a function. Otherwise return NULL.
4531 Callee = Callee->stripPointerCasts();
4532 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4533 if (!AdjustTramp ||
4534 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4535 return nullptr;
4536
4537 Value *TrampMem = AdjustTramp->getOperand(0);
4538
4540 return IT;
4541 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4542 return IT;
4543 return nullptr;
4544}
4545
4546Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4547 const Value *Callee = Call.getCalledOperand();
4548 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4549 if (!IPC || !IPC->isNoopCast(DL))
4550 return nullptr;
4551
4552 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4553 if (!II)
4554 return nullptr;
4555
4556 Intrinsic::ID IIID = II->getIntrinsicID();
4557 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4558 return nullptr;
4559
4560 // Isolate the ptrauth bundle from the others.
4561 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4563 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4564 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4565 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4566 PtrAuthBundleOrNone = Bundle;
4567 else
4568 NewBundles.emplace_back(Bundle);
4569 }
4570
4571 if (!PtrAuthBundleOrNone)
4572 return nullptr;
4573
4574 Value *NewCallee = nullptr;
4575 switch (IIID) {
4576 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4577 // assuming the call bundle and the sign operands match.
4578 case Intrinsic::ptrauth_resign: {
4579 // Resign result key should match bundle.
4580 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4581 return nullptr;
4582 // Resign result discriminator should match bundle.
4583 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4584 return nullptr;
4585
4586 // Resign input (auth) key should also match: we can't change the key on
4587 // the new call we're generating, because we don't know what keys are valid.
4588 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4589 return nullptr;
4590
4591 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4592 NewBundles.emplace_back("ptrauth", NewBundleOps);
4593 NewCallee = II->getOperand(0);
4594 break;
4595 }
4596
4597 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4598 // assuming the call bundle and the sign operands match.
4599 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4600 case Intrinsic::ptrauth_sign: {
4601 // Sign key should match bundle.
4602 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4603 return nullptr;
4604 // Sign discriminator should match bundle.
4605 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4606 return nullptr;
4607 NewCallee = II->getOperand(0);
4608 break;
4609 }
4610 default:
4611 llvm_unreachable("unexpected intrinsic ID");
4612 }
4613
4614 if (!NewCallee)
4615 return nullptr;
4616
4617 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4618 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4619 NewCall->setCalledOperand(NewCallee);
4620 return NewCall;
4621}
4622
4623Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4625 if (!CPA)
4626 return nullptr;
4627
4628 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4629 // If the ptrauth constant isn't based on a function pointer, bail out.
4630 if (!CalleeF)
4631 return nullptr;
4632
4633 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4635 if (!PAB)
4636 return nullptr;
4637
4638 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4639 Value *Discriminator = PAB->Inputs[1];
4640
4641 // If the bundle doesn't match, this is probably going to fail to auth.
4642 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4643 return nullptr;
4644
4645 // If the bundle matches the constant, proceed in making this a direct call.
4647 NewCall->setCalledOperand(CalleeF);
4648 return NewCall;
4649}
4650
4651bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4652 const TargetLibraryInfo *TLI) {
4653 // Note: We only handle cases which can't be driven from generic attributes
4654 // here. So, for example, nonnull and noalias (which are common properties
4655 // of some allocation functions) are expected to be handled via annotation
4656 // of the respective allocator declaration with generic attributes.
4657 bool Changed = false;
4658
4659 if (!Call.getType()->isPointerTy())
4660 return Changed;
4661
4662 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4663 if (Size && *Size != 0) {
4664 // TODO: We really should just emit deref_or_null here and then
4665 // let the generic inference code combine that with nonnull.
4666 if (Call.hasRetAttr(Attribute::NonNull)) {
4667 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4669 Call.getContext(), Size->getLimitedValue()));
4670 } else {
4671 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4673 Call.getContext(), Size->getLimitedValue()));
4674 }
4675 }
4676
4677 // Add alignment attribute if alignment is a power of two constant.
4678 Value *Alignment = getAllocAlignment(&Call, TLI);
4679 if (!Alignment)
4680 return Changed;
4681
4682 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4683 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4684 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4685 if (llvm::isPowerOf2_64(AlignmentVal)) {
4686 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4687 Align NewAlign = Align(AlignmentVal);
4688 if (NewAlign > ExistingAlign) {
4691 Changed = true;
4692 }
4693 }
4694 }
4695 return Changed;
4696}
4697
4698/// Improvements for call, callbr and invoke instructions.
4699Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4700 bool Changed = annotateAnyAllocSite(Call, &TLI);
4701
4702 // Mark any parameters that are known to be non-null with the nonnull
4703 // attribute. This is helpful for inlining calls to functions with null
4704 // checks on their arguments.
4705 SmallVector<unsigned, 4> ArgNos;
4706 unsigned ArgNo = 0;
4707
4708 for (Value *V : Call.args()) {
4709 if (V->getType()->isPointerTy()) {
4710 // Simplify the nonnull operand if the parameter is known to be nonnull.
4711 // Otherwise, try to infer nonnull for it.
4712 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4713 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4714 (HasDereferenceable &&
4716 V->getType()->getPointerAddressSpace()))) {
4717 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4718 replaceOperand(Call, ArgNo, Res);
4719 Changed = true;
4720 }
4721 } else if (isKnownNonZero(V,
4722 getSimplifyQuery().getWithInstruction(&Call))) {
4723 ArgNos.push_back(ArgNo);
4724 }
4725 }
4726 ArgNo++;
4727 }
4728
4729 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4730
4731 if (!ArgNos.empty()) {
4732 AttributeList AS = Call.getAttributes();
4733 LLVMContext &Ctx = Call.getContext();
4734 AS = AS.addParamAttribute(Ctx, ArgNos,
4735 Attribute::get(Ctx, Attribute::NonNull));
4736 Call.setAttributes(AS);
4737 Changed = true;
4738 }
4739
4740 // If the callee is a pointer to a function, attempt to move any casts to the
4741 // arguments of the call/callbr/invoke.
4743 Function *CalleeF = dyn_cast<Function>(Callee);
4744 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4745 transformConstExprCastCall(Call))
4746 return nullptr;
4747
4748 if (CalleeF) {
4749 // Remove the convergent attr on calls when the callee is not convergent.
4750 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4751 !CalleeF->isIntrinsic()) {
4752 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4753 << "\n");
4755 return &Call;
4756 }
4757
4758 // If the call and callee calling conventions don't match, and neither one
4759 // of the calling conventions is compatible with C calling convention
4760 // this call must be unreachable, as the call is undefined.
4761 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4762 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4766 // Only do this for calls to a function with a body. A prototype may
4767 // not actually end up matching the implementation's calling conv for a
4768 // variety of reasons (e.g. it may be written in assembly).
4769 !CalleeF->isDeclaration()) {
4770 Instruction *OldCall = &Call;
4772 // If OldCall does not return void then replaceInstUsesWith poison.
4773 // This allows ValueHandlers and custom metadata to adjust itself.
4774 if (!OldCall->getType()->isVoidTy())
4775 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4776 if (isa<CallInst>(OldCall))
4777 return eraseInstFromFunction(*OldCall);
4778
4779 // We cannot remove an invoke or a callbr, because it would change thexi
4780 // CFG, just change the callee to a null pointer.
4781 cast<CallBase>(OldCall)->setCalledFunction(
4782 CalleeF->getFunctionType(),
4783 Constant::getNullValue(CalleeF->getType()));
4784 return nullptr;
4785 }
4786 }
4787
4788 // Calling a null function pointer is undefined if a null address isn't
4789 // dereferenceable.
4790 if ((isa<ConstantPointerNull>(Callee) &&
4792 isa<UndefValue>(Callee)) {
4793 // If Call does not return void then replaceInstUsesWith poison.
4794 // This allows ValueHandlers and custom metadata to adjust itself.
4795 if (!Call.getType()->isVoidTy())
4797
4798 if (Call.isTerminator()) {
4799 // Can't remove an invoke or callbr because we cannot change the CFG.
4800 return nullptr;
4801 }
4802
4803 // This instruction is not reachable, just remove it.
4806 }
4807
4808 if (IntrinsicInst *II = findInitTrampoline(Callee))
4809 return transformCallThroughTrampoline(Call, *II);
4810
4811 // Combine calls involving pointer authentication intrinsics.
4812 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4813 return NewCall;
4814
4815 // Combine calls to ptrauth constants.
4816 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4817 return NewCall;
4818
4819 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4820 InlineAsm *IA = cast<InlineAsm>(Callee);
4821 if (!IA->canThrow()) {
4822 // Normal inline asm calls cannot throw - mark them
4823 // 'nounwind'.
4825 Changed = true;
4826 }
4827 }
4828
4829 // Try to optimize the call if possible, we require DataLayout for most of
4830 // this. None of these calls are seen as possibly dead so go ahead and
4831 // delete the instruction now.
4832 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4833 Instruction *I = tryOptimizeCall(CI);
4834 // If we changed something return the result, etc. Otherwise let
4835 // the fallthrough check.
4836 if (I) return eraseInstFromFunction(*I);
4837 }
4838
4839 if (!Call.use_empty() && !Call.isMustTailCall())
4840 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4841 Type *CallTy = Call.getType();
4842 Type *RetArgTy = ReturnedArg->getType();
4843 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4844 return replaceInstUsesWith(
4845 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4846 }
4847
4848 // Drop unnecessary callee_type metadata from calls that were converted
4849 // into direct calls.
4850 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4851 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4852 Changed = true;
4853 }
4854
4855 // Drop unnecessary kcfi operand bundles from calls that were converted
4856 // into direct calls.
4858 if (Bundle && !Call.isIndirectCall()) {
4859 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4860 if (CalleeF) {
4861 ConstantInt *FunctionType = nullptr;
4862 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4863
4864 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4865 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4866
4867 if (FunctionType &&
4868 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4869 dbgs() << Call.getModule()->getName()
4870 << ": warning: kcfi: " << Call.getCaller()->getName()
4871 << ": call to " << CalleeF->getName()
4872 << " using a mismatching function pointer type\n";
4873 }
4874 });
4875
4877 }
4878
4879 if (isRemovableAlloc(&Call, &TLI))
4880 return visitAllocSite(Call);
4881
4882 // Handle intrinsics which can be used in both call and invoke context.
4883 switch (Call.getIntrinsicID()) {
4884 case Intrinsic::experimental_gc_statepoint: {
4885 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4886 SmallPtrSet<Value *, 32> LiveGcValues;
4887 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4888 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4889
4890 // Remove the relocation if unused.
4891 if (GCR.use_empty()) {
4893 continue;
4894 }
4895
4896 Value *DerivedPtr = GCR.getDerivedPtr();
4897 Value *BasePtr = GCR.getBasePtr();
4898
4899 // Undef is undef, even after relocation.
4900 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4903 continue;
4904 }
4905
4906 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4907 // The relocation of null will be null for most any collector.
4908 // TODO: provide a hook for this in GCStrategy. There might be some
4909 // weird collector this property does not hold for.
4910 if (isa<ConstantPointerNull>(DerivedPtr)) {
4911 // Use null-pointer of gc_relocate's type to replace it.
4914 continue;
4915 }
4916
4917 // isKnownNonNull -> nonnull attribute
4918 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4919 isKnownNonZero(DerivedPtr,
4920 getSimplifyQuery().getWithInstruction(&Call))) {
4921 GCR.addRetAttr(Attribute::NonNull);
4922 // We discovered new fact, re-check users.
4923 Worklist.pushUsersToWorkList(GCR);
4924 }
4925 }
4926
4927 // If we have two copies of the same pointer in the statepoint argument
4928 // list, canonicalize to one. This may let us common gc.relocates.
4929 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4930 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4931 auto *OpIntTy = GCR.getOperand(2)->getType();
4932 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4933 }
4934
4935 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4936 // Canonicalize on the type from the uses to the defs
4937
4938 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4939 LiveGcValues.insert(BasePtr);
4940 LiveGcValues.insert(DerivedPtr);
4941 }
4942 std::optional<OperandBundleUse> Bundle =
4944 unsigned NumOfGCLives = LiveGcValues.size();
4945 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4946 break;
4947 // We can reduce the size of gc live bundle.
4948 DenseMap<Value *, unsigned> Val2Idx;
4949 std::vector<Value *> NewLiveGc;
4950 for (Value *V : Bundle->Inputs) {
4951 auto [It, Inserted] = Val2Idx.try_emplace(V);
4952 if (!Inserted)
4953 continue;
4954 if (LiveGcValues.count(V)) {
4955 It->second = NewLiveGc.size();
4956 NewLiveGc.push_back(V);
4957 } else
4958 It->second = NumOfGCLives;
4959 }
4960 // Update all gc.relocates
4961 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4962 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4963 Value *BasePtr = GCR.getBasePtr();
4964 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4965 "Missed live gc for base pointer");
4966 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4967 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4968 Value *DerivedPtr = GCR.getDerivedPtr();
4969 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4970 "Missed live gc for derived pointer");
4971 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4972 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4973 }
4974 // Create new statepoint instruction.
4975 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4976 return CallBase::Create(&Call, NewBundle);
4977 }
4978 default: { break; }
4979 }
4980
4981 return Changed ? &Call : nullptr;
4982}
4983
4984/// If the callee is a constexpr cast of a function, attempt to move the cast to
4985/// the arguments of the call/invoke.
4986/// CallBrInst is not supported.
4987bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4988 auto *Callee =
4990 if (!Callee)
4991 return false;
4992
4994 "CallBr's don't have a single point after a def to insert at");
4995
4996 // Don't perform the transform for declarations, which may not be fully
4997 // accurate. For example, void @foo() is commonly used as a placeholder for
4998 // unknown prototypes.
4999 if (Callee->isDeclaration())
5000 return false;
5001
5002 // If this is a call to a thunk function, don't remove the cast. Thunks are
5003 // used to transparently forward all incoming parameters and outgoing return
5004 // values, so it's important to leave the cast in place.
5005 if (Callee->hasFnAttribute("thunk"))
5006 return false;
5007
5008 // If this is a call to a naked function, the assembly might be
5009 // using an argument, or otherwise rely on the frame layout,
5010 // the function prototype will mismatch.
5011 if (Callee->hasFnAttribute(Attribute::Naked))
5012 return false;
5013
5014 // If this is a musttail call, the callee's prototype must match the caller's
5015 // prototype with the exception of pointee types. The code below doesn't
5016 // implement that, so we can't do this transform.
5017 // TODO: Do the transform if it only requires adding pointer casts.
5018 if (Call.isMustTailCall())
5019 return false;
5020
5022 const AttributeList &CallerPAL = Call.getAttributes();
5023
5024 // Okay, this is a cast from a function to a different type. Unless doing so
5025 // would cause a type conversion of one of our arguments, change this call to
5026 // be a direct call with arguments casted to the appropriate types.
5027 FunctionType *FT = Callee->getFunctionType();
5028 Type *OldRetTy = Caller->getType();
5029 Type *NewRetTy = FT->getReturnType();
5030
5031 // Check to see if we are changing the return type...
5032 if (OldRetTy != NewRetTy) {
5033
5034 if (NewRetTy->isStructTy())
5035 return false; // TODO: Handle multiple return values.
5036
5037 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5038 if (!Caller->use_empty())
5039 return false; // Cannot transform this return value.
5040 }
5041
5042 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5043 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5044 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5045 NewRetTy, CallerPAL.getRetAttrs())))
5046 return false; // Attribute not compatible with transformed value.
5047 }
5048
5049 // If the callbase is an invoke instruction, and the return value is
5050 // used by a PHI node in a successor, we cannot change the return type of
5051 // the call because there is no place to put the cast instruction (without
5052 // breaking the critical edge). Bail out in this case.
5053 if (!Caller->use_empty()) {
5054 BasicBlock *PhisNotSupportedBlock = nullptr;
5055 if (auto *II = dyn_cast<InvokeInst>(Caller))
5056 PhisNotSupportedBlock = II->getNormalDest();
5057 if (PhisNotSupportedBlock)
5058 for (User *U : Caller->users())
5059 if (PHINode *PN = dyn_cast<PHINode>(U))
5060 if (PN->getParent() == PhisNotSupportedBlock)
5061 return false;
5062 }
5063 }
5064
5065 unsigned NumActualArgs = Call.arg_size();
5066 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5067
5068 // Prevent us turning:
5069 // declare void @takes_i32_inalloca(i32* inalloca)
5070 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5071 //
5072 // into:
5073 // call void @takes_i32_inalloca(i32* null)
5074 //
5075 // Similarly, avoid folding away bitcasts of byval calls.
5076 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5077 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5078 return false;
5079
5080 auto AI = Call.arg_begin();
5081 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5082 Type *ParamTy = FT->getParamType(i);
5083 Type *ActTy = (*AI)->getType();
5084
5085 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5086 return false; // Cannot transform this parameter value.
5087
5088 // Check if there are any incompatible attributes we cannot drop safely.
5089 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5090 .overlaps(AttributeFuncs::typeIncompatible(
5091 ParamTy, CallerPAL.getParamAttrs(i),
5092 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5093 return false; // Attribute not compatible with transformed value.
5094
5095 if (Call.isInAllocaArgument(i) ||
5096 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5097 return false; // Cannot transform to and from inalloca/preallocated.
5098
5099 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5100 return false;
5101
5102 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5103 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5104 return false; // Cannot transform to or from byval.
5105 }
5106
5107 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5108 !CallerPAL.isEmpty()) {
5109 // In this case we have more arguments than the new function type, but we
5110 // won't be dropping them. Check that these extra arguments have attributes
5111 // that are compatible with being a vararg call argument.
5112 unsigned SRetIdx;
5113 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5114 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5115 return false;
5116 }
5117
5118 // Okay, we decided that this is a safe thing to do: go ahead and start
5119 // inserting cast instructions as necessary.
5120 SmallVector<Value *, 8> Args;
5122 Args.reserve(NumActualArgs);
5123 ArgAttrs.reserve(NumActualArgs);
5124
5125 // Get any return attributes.
5126 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5127
5128 // If the return value is not being used, the type may not be compatible
5129 // with the existing attributes. Wipe out any problematic attributes.
5130 RAttrs.remove(
5131 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5132
5133 LLVMContext &Ctx = Call.getContext();
5134 AI = Call.arg_begin();
5135 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5136 Type *ParamTy = FT->getParamType(i);
5137
5138 Value *NewArg = *AI;
5139 if ((*AI)->getType() != ParamTy)
5140 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5141 Args.push_back(NewArg);
5142
5143 // Add any parameter attributes except the ones incompatible with the new
5144 // type. Note that we made sure all incompatible ones are safe to drop.
5145 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5146 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5147 ArgAttrs.push_back(
5148 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5149 }
5150
5151 // If the function takes more arguments than the call was taking, add them
5152 // now.
5153 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5154 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5155 ArgAttrs.push_back(AttributeSet());
5156 }
5157
5158 // If we are removing arguments to the function, emit an obnoxious warning.
5159 if (FT->getNumParams() < NumActualArgs) {
5160 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5161 if (FT->isVarArg()) {
5162 // Add all of the arguments in their promoted form to the arg list.
5163 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5164 Type *PTy = getPromotedType((*AI)->getType());
5165 Value *NewArg = *AI;
5166 if (PTy != (*AI)->getType()) {
5167 // Must promote to pass through va_arg area!
5168 Instruction::CastOps opcode =
5169 CastInst::getCastOpcode(*AI, false, PTy, false);
5170 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5171 }
5172 Args.push_back(NewArg);
5173
5174 // Add any parameter attributes.
5175 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5176 }
5177 }
5178 }
5179
5180 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5181
5182 if (NewRetTy->isVoidTy())
5183 Caller->setName(""); // Void type should not have a name.
5184
5185 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5186 "missing argument attributes");
5187 AttributeList NewCallerPAL = AttributeList::get(
5188 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5189
5191 Call.getOperandBundlesAsDefs(OpBundles);
5192
5193 CallBase *NewCall;
5194 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5195 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5196 II->getUnwindDest(), Args, OpBundles);
5197 } else {
5198 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5199 cast<CallInst>(NewCall)->setTailCallKind(
5200 cast<CallInst>(Caller)->getTailCallKind());
5201 }
5202 NewCall->takeName(Caller);
5204 NewCall->setAttributes(NewCallerPAL);
5205
5206 // Preserve prof metadata if any.
5207 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5208
5209 // Insert a cast of the return type as necessary.
5210 Instruction *NC = NewCall;
5211 Value *NV = NC;
5212 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5213 assert(!NV->getType()->isVoidTy());
5215 NC->setDebugLoc(Caller->getDebugLoc());
5216
5217 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5218 assert(OptInsertPt && "No place to insert cast");
5219 InsertNewInstBefore(NC, *OptInsertPt);
5220 Worklist.pushUsersToWorkList(*Caller);
5221 }
5222
5223 if (!Caller->use_empty())
5224 replaceInstUsesWith(*Caller, NV);
5225 else if (Caller->hasValueHandle()) {
5226 if (OldRetTy == NV->getType())
5228 else
5229 // We cannot call ValueIsRAUWd with a different type, and the
5230 // actual tracked value will disappear.
5232 }
5233
5234 eraseInstFromFunction(*Caller);
5235 return true;
5236}
5237
5238/// Turn a call to a function created by init_trampoline / adjust_trampoline
5239/// intrinsic pair into a direct call to the underlying function.
5241InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5242 IntrinsicInst &Tramp) {
5243 FunctionType *FTy = Call.getFunctionType();
5244 AttributeList Attrs = Call.getAttributes();
5245
5246 // If the call already has the 'nest' attribute somewhere then give up -
5247 // otherwise 'nest' would occur twice after splicing in the chain.
5248 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5249 return nullptr;
5250
5252 FunctionType *NestFTy = NestF->getFunctionType();
5253
5254 AttributeList NestAttrs = NestF->getAttributes();
5255 if (!NestAttrs.isEmpty()) {
5256 unsigned NestArgNo = 0;
5257 Type *NestTy = nullptr;
5258 AttributeSet NestAttr;
5259
5260 // Look for a parameter marked with the 'nest' attribute.
5261 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5262 E = NestFTy->param_end();
5263 I != E; ++NestArgNo, ++I) {
5264 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5265 if (AS.hasAttribute(Attribute::Nest)) {
5266 // Record the parameter type and any other attributes.
5267 NestTy = *I;
5268 NestAttr = AS;
5269 break;
5270 }
5271 }
5272
5273 if (NestTy) {
5274 std::vector<Value*> NewArgs;
5275 std::vector<AttributeSet> NewArgAttrs;
5276 NewArgs.reserve(Call.arg_size() + 1);
5277 NewArgAttrs.reserve(Call.arg_size());
5278
5279 // Insert the nest argument into the call argument list, which may
5280 // mean appending it. Likewise for attributes.
5281
5282 {
5283 unsigned ArgNo = 0;
5284 auto I = Call.arg_begin(), E = Call.arg_end();
5285 do {
5286 if (ArgNo == NestArgNo) {
5287 // Add the chain argument and attributes.
5288 Value *NestVal = Tramp.getArgOperand(2);
5289 if (NestVal->getType() != NestTy)
5290 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5291 NewArgs.push_back(NestVal);
5292 NewArgAttrs.push_back(NestAttr);
5293 }
5294
5295 if (I == E)
5296 break;
5297
5298 // Add the original argument and attributes.
5299 NewArgs.push_back(*I);
5300 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5301
5302 ++ArgNo;
5303 ++I;
5304 } while (true);
5305 }
5306
5307 // The trampoline may have been bitcast to a bogus type (FTy).
5308 // Handle this by synthesizing a new function type, equal to FTy
5309 // with the chain parameter inserted.
5310
5311 std::vector<Type*> NewTypes;
5312 NewTypes.reserve(FTy->getNumParams()+1);
5313
5314 // Insert the chain's type into the list of parameter types, which may
5315 // mean appending it.
5316 {
5317 unsigned ArgNo = 0;
5318 FunctionType::param_iterator I = FTy->param_begin(),
5319 E = FTy->param_end();
5320
5321 do {
5322 if (ArgNo == NestArgNo)
5323 // Add the chain's type.
5324 NewTypes.push_back(NestTy);
5325
5326 if (I == E)
5327 break;
5328
5329 // Add the original type.
5330 NewTypes.push_back(*I);
5331
5332 ++ArgNo;
5333 ++I;
5334 } while (true);
5335 }
5336
5337 // Replace the trampoline call with a direct call. Let the generic
5338 // code sort out any function type mismatches.
5339 FunctionType *NewFTy =
5340 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5341 AttributeList NewPAL =
5342 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5343 Attrs.getRetAttrs(), NewArgAttrs);
5344
5346 Call.getOperandBundlesAsDefs(OpBundles);
5347
5348 Instruction *NewCaller;
5349 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5350 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5351 II->getUnwindDest(), NewArgs, OpBundles);
5352 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5353 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5354 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5355 NewCaller =
5356 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5357 CBI->getIndirectDests(), NewArgs, OpBundles);
5358 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5359 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5360 } else {
5361 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5362 cast<CallInst>(NewCaller)->setTailCallKind(
5363 cast<CallInst>(Call).getTailCallKind());
5364 cast<CallInst>(NewCaller)->setCallingConv(
5365 cast<CallInst>(Call).getCallingConv());
5366 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5367 }
5368 NewCaller->setDebugLoc(Call.getDebugLoc());
5369
5370 return NewCaller;
5371 }
5372 }
5373
5374 // Replace the trampoline call with a direct call. Since there is no 'nest'
5375 // parameter, there is no need to adjust the argument list. Let the generic
5376 // code sort out any function type mismatches.
5377 Call.setCalledFunction(FTy, NestF);
5378 return &Call;
5379}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:259
bool isNegative() const
Definition APFloat.h:1512
void clearSign()
Definition APFloat.h:1349
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1139
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1959
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1939
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1946
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2047
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1952
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
bool hasOperandBundles() const
Return true if this User has any operand bundles.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:67
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:577
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1423
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2054
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2583
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2418
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2181
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1235
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1288
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:832
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:831
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1706
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1661
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1692
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1606
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1777
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1642
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1679
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1719
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:258
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:290
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:305
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:111
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:264
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:296
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:302
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const