LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Statepoint.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/ValueHandle.h"
64#include "llvm/Support/Debug.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <optional>
79#include <utility>
80#include <vector>
81
82#define DEBUG_TYPE "instcombine"
84
85using namespace llvm;
86using namespace PatternMatch;
87
88STATISTIC(NumSimplified, "Number of library calls simplified");
89
91 "instcombine-guard-widening-window",
92 cl::init(3),
93 cl::desc("How wide an instruction window to bypass looking for "
94 "another guard"));
95
96/// Return the specified type promoted as it would be to pass though a va_arg
97/// area.
99 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
100 if (ITy->getBitWidth() < 32)
101 return Type::getInt32Ty(Ty->getContext());
102 }
103 return Ty;
104}
105
106/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107/// TODO: This should probably be integrated with visitAllocSites, but that
108/// requires a deeper change to allow either unread or unwritten objects.
110 auto *Src = MI->getRawSource();
111 while (isa<GetElementPtrInst>(Src)) {
112 if (!Src->hasOneUse())
113 return false;
114 Src = cast<Instruction>(Src)->getOperand(0);
115 }
116 return isa<AllocaInst>(Src) && Src->hasOneUse();
117}
118
120 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
121 MaybeAlign CopyDstAlign = MI->getDestAlign();
122 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
123 MI->setDestAlignment(DstAlign);
124 return MI;
125 }
126
127 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
128 MaybeAlign CopySrcAlign = MI->getSourceAlign();
129 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
130 MI->setSourceAlignment(SrcAlign);
131 return MI;
132 }
133
134 // If we have a store to a location which is known constant, we can conclude
135 // that the store must be storing the constant value (else the memory
136 // wouldn't be constant), and this must be a noop.
137 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
138 // Set the size of the copy to 0, it will be deleted on the next iteration.
139 MI->setLength((uint64_t)0);
140 return MI;
141 }
142
143 // If the source is provably undef, the memcpy/memmove doesn't do anything
144 // (unless the transfer is volatile).
145 if (hasUndefSource(MI) && !MI->isVolatile()) {
146 // Set the size of the copy to 0, it will be deleted on the next iteration.
147 MI->setLength((uint64_t)0);
148 return MI;
149 }
150
151 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152 // load/store.
153 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
154 if (!MemOpLength) return nullptr;
155
156 // Source and destination pointer types are always "i8*" for intrinsic. See
157 // if the size is something we can handle with a single primitive load/store.
158 // A single load+store correctly handles overlapping memory in the memmove
159 // case.
160 uint64_t Size = MemOpLength->getLimitedValue();
161 assert(Size && "0-sized memory transferring should be removed already.");
162
163 if (Size > 8 || (Size&(Size-1)))
164 return nullptr; // If not 1/2/4/8 bytes, exit.
165
166 // If it is an atomic and alignment is less than the size then we will
167 // introduce the unaligned memory access which will be later transformed
168 // into libcall in CodeGen. This is not evident performance gain so disable
169 // it now.
170 if (MI->isAtomic())
171 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
172 return nullptr;
173
174 // Use an integer load+store unless we can find something better.
175 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
176
177 // If the memcpy has metadata describing the members, see if we can get the
178 // TBAA, scope and noalias tags describing our copy.
179 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
180
181 Value *Src = MI->getArgOperand(1);
182 Value *Dest = MI->getArgOperand(0);
183 LoadInst *L = Builder.CreateLoad(IntType, Src);
184 // Alignment from the mem intrinsic will be better, so use it.
185 L->setAlignment(*CopySrcAlign);
186 L->setAAMetadata(AACopyMD);
187 MDNode *LoopMemParallelMD =
188 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
189 if (LoopMemParallelMD)
190 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
191 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
192 if (AccessGroupMD)
193 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
194
195 StoreInst *S = Builder.CreateStore(L, Dest);
196 // Alignment from the mem intrinsic will be better, so use it.
197 S->setAlignment(*CopyDstAlign);
198 S->setAAMetadata(AACopyMD);
199 if (LoopMemParallelMD)
200 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
201 if (AccessGroupMD)
202 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
203 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
204
205 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
206 // non-atomics can be volatile
207 L->setVolatile(MT->isVolatile());
208 S->setVolatile(MT->isVolatile());
209 }
210 if (MI->isAtomic()) {
211 // atomics have to be unordered
212 L->setOrdering(AtomicOrdering::Unordered);
214 }
215
216 // Set the size of the copy to 0, it will be deleted on the next iteration.
217 MI->setLength((uint64_t)0);
218 return MI;
219}
220
222 const Align KnownAlignment =
223 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
224 MaybeAlign MemSetAlign = MI->getDestAlign();
225 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
226 MI->setDestAlignment(KnownAlignment);
227 return MI;
228 }
229
230 // If we have a store to a location which is known constant, we can conclude
231 // that the store must be storing the constant value (else the memory
232 // wouldn't be constant), and this must be a noop.
233 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
234 // Set the size of the copy to 0, it will be deleted on the next iteration.
235 MI->setLength((uint64_t)0);
236 return MI;
237 }
238
239 // Remove memset with an undef value.
240 // FIXME: This is technically incorrect because it might overwrite a poison
241 // value. Change to PoisonValue once #52930 is resolved.
242 if (isa<UndefValue>(MI->getValue())) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI->setLength((uint64_t)0);
245 return MI;
246 }
247
248 // Extract the length and alignment and fill if they are constant.
249 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
250 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
251 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
252 return nullptr;
253 const uint64_t Len = LenC->getLimitedValue();
254 assert(Len && "0-sized memory setting should be removed already.");
255 const Align Alignment = MI->getDestAlign().valueOrOne();
256
257 // If it is an atomic and alignment is less than the size then we will
258 // introduce the unaligned memory access which will be later transformed
259 // into libcall in CodeGen. This is not evident performance gain so disable
260 // it now.
261 if (MI->isAtomic() && Alignment < Len)
262 return nullptr;
263
264 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
265 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
266 Value *Dest = MI->getDest();
267
268 // Extract the fill value and store.
269 Constant *FillVal = ConstantInt::get(
270 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
271 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
272 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
273 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
274 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
275 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
276 }
277
278 S->setAlignment(Alignment);
279 if (MI->isAtomic())
281
282 // Set the size of the copy to 0, it will be deleted on the next iteration.
283 MI->setLength((uint64_t)0);
284 return MI;
285 }
286
287 return nullptr;
288}
289
290// TODO, Obvious Missing Transforms:
291// * Narrow width by halfs excluding zero/undef lanes
292Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293 Value *LoadPtr = II.getArgOperand(0);
294 const Align Alignment = II.getParamAlign(0).valueOrOne();
295
296 // If the mask is all ones or undefs, this is a plain vector load of the 1st
297 // argument.
298 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
299 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
300 "unmaskedload");
301 L->copyMetadata(II);
302 return L;
303 }
304
305 // If we can unconditionally load from this address, replace with a
306 // load/select idiom. TODO: use DT for context sensitive query
307 if (isDereferenceablePointer(LoadPtr, II.getType(),
308 II.getDataLayout(), &II, &AC)) {
309 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
310 "unmaskedload");
311 LI->copyMetadata(II);
312 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
313 }
314
315 return nullptr;
316}
317
318// TODO, Obvious Missing Transforms:
319// * Single constant active lane -> store
320// * Narrow width by halfs excluding zero/undef lanes
321Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322 Value *StorePtr = II.getArgOperand(1);
323 Align Alignment = II.getParamAlign(1).valueOrOne();
324 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
325 if (!ConstMask)
326 return nullptr;
327
328 // If the mask is all zeros, this instruction does nothing.
329 if (maskIsAllZeroOrUndef(ConstMask))
331
332 // If the mask is all ones, this is a plain vector store of the 1st argument.
333 if (maskIsAllOneOrUndef(ConstMask)) {
334 StoreInst *S =
335 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
336 S->copyMetadata(II);
337 return S;
338 }
339
340 if (isa<ScalableVectorType>(ConstMask->getType()))
341 return nullptr;
342
343 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
345 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
346 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
347 PoisonElts))
348 return replaceOperand(II, 0, V);
349
350 return nullptr;
351}
352
353// TODO, Obvious Missing Transforms:
354// * Single constant active lane load -> load
355// * Dereferenceable address & few lanes -> scalarize speculative load/selects
356// * Adjacent vector addresses -> masked.load
357// * Narrow width by halfs excluding zero/undef lanes
358// * Vector incrementing address -> vector masked load
359Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
361 if (!ConstMask)
362 return nullptr;
363
364 // Vector splat address w/known mask -> scalar load
365 // Fold the gather to load the source vector first lane
366 // because it is reloading the same value each time
367 if (ConstMask->isAllOnesValue())
368 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
369 auto *VecTy = cast<VectorType>(II.getType());
370 const Align Alignment = II.getParamAlign(0).valueOrOne();
371 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
372 Alignment, "load.scalar");
373 Value *Shuf =
374 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
376 }
377
378 return nullptr;
379}
380
381// TODO, Obvious Missing Transforms:
382// * Single constant active lane -> store
383// * Adjacent vector addresses -> masked.store
384// * Narrow store width by halfs excluding zero/undef lanes
385// * Vector incrementing address -> vector masked store
386Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
387 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
388 if (!ConstMask)
389 return nullptr;
390
391 // If the mask is all zeros, a scatter does nothing.
392 if (maskIsAllZeroOrUndef(ConstMask))
394
395 // Vector splat address -> scalar store
396 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
397 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
398 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
399 if (maskContainsAllOneOrUndef(ConstMask)) {
400 Align Alignment = II.getParamAlign(1).valueOrOne();
401 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
402 Alignment);
403 S->copyMetadata(II);
404 return S;
405 }
406 }
407 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408 // lastlane), ptr
409 if (ConstMask->isAllOnesValue()) {
410 Align Alignment = II.getParamAlign(1).valueOrOne();
411 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
412 ElementCount VF = WideLoadTy->getElementCount();
413 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
414 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
415 Value *Extract =
416 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
417 StoreInst *S =
418 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
419 S->copyMetadata(II);
420 return S;
421 }
422 }
423 if (isa<ScalableVectorType>(ConstMask->getType()))
424 return nullptr;
425
426 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
428 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
429 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
430 PoisonElts))
431 return replaceOperand(II, 0, V);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 1, V);
435
436 return nullptr;
437}
438
439/// This function transforms launder.invariant.group and strip.invariant.group
440/// like:
441/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442/// launder(strip(%x)) -> launder(%x)
443/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444/// strip(launder(%x)) -> strip(%x)
445/// This is legal because it preserves the most recent information about
446/// the presence or absence of invariant.group.
448 InstCombinerImpl &IC) {
449 auto *Arg = II.getArgOperand(0);
450 auto *StrippedArg = Arg->stripPointerCasts();
451 auto *StrippedInvariantGroupsArg = StrippedArg;
452 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
453 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455 break;
456 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
457 }
458 if (StrippedArg == StrippedInvariantGroupsArg)
459 return nullptr; // No launders/strips to remove.
460
461 Value *Result = nullptr;
462
463 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
465 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
467 else
469 "simplifyInvariantGroupIntrinsic only handles launder and strip");
470 if (Result->getType()->getPointerAddressSpace() !=
471 II.getType()->getPointerAddressSpace())
472 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
473
474 return cast<Instruction>(Result);
475}
476
478 assert((II.getIntrinsicID() == Intrinsic::cttz ||
479 II.getIntrinsicID() == Intrinsic::ctlz) &&
480 "Expected cttz or ctlz intrinsic");
481 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482 Value *Op0 = II.getArgOperand(0);
483 Value *Op1 = II.getArgOperand(1);
484 Value *X;
485 // ctlz(bitreverse(x)) -> cttz(x)
486 // cttz(bitreverse(x)) -> ctlz(x)
487 if (match(Op0, m_BitReverse(m_Value(X)))) {
488 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489 Function *F =
490 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
491 return CallInst::Create(F, {X, II.getArgOperand(1)});
492 }
493
494 if (II.getType()->isIntOrIntVectorTy(1)) {
495 // ctlz/cttz i1 Op0 --> not Op0
496 if (match(Op1, m_Zero()))
497 return BinaryOperator::CreateNot(Op0);
498 // If zero is poison, then the input can be assumed to be "true", so the
499 // instruction simplifies to "false".
500 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
502 }
503
504 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505 if (II.hasOneUse() && match(Op1, m_Zero()) &&
506 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
507 II.dropUBImplyingAttrsAndMetadata();
508 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
509 }
510
511 Constant *C;
512
513 if (IsTZ) {
514 // cttz(-x) -> cttz(x)
515 if (match(Op0, m_Neg(m_Value(X))))
516 return IC.replaceOperand(II, 0, X);
517
518 // cttz(-x & x) -> cttz(x)
519 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(sext(x)) -> cttz(zext(x))
523 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
524 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
525 auto *CttzZext =
526 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
527 return IC.replaceInstUsesWith(II, CttzZext);
528 }
529
530 // Zext doesn't change the number of trailing zeros, so narrow:
531 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
533 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
534 IC.Builder.getTrue());
535 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
536 return IC.replaceInstUsesWith(II, ZextCttz);
537 }
538
539 // cttz(abs(x)) -> cttz(x)
540 // cttz(nabs(x)) -> cttz(x)
541 Value *Y;
543 if (SPF == SPF_ABS || SPF == SPF_NABS)
544 return IC.replaceOperand(II, 0, X);
545
547 return IC.replaceOperand(II, 0, X);
548
549 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
551 match(Op1, m_One())) {
552 Value *ConstCttz =
553 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
554 return BinaryOperator::CreateAdd(ConstCttz, X);
555 }
556
557 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
559 match(Op1, m_One())) {
560 Value *ConstCttz =
561 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
562 return BinaryOperator::CreateSub(ConstCttz, X);
563 }
564
565 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
567 Value *Width =
568 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
569 return BinaryOperator::CreateSub(Width, X);
570 }
571 } else {
572 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
574 match(Op1, m_One())) {
575 Value *ConstCtlz =
576 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
577 return BinaryOperator::CreateAdd(ConstCtlz, X);
578 }
579
580 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
582 match(Op1, m_One())) {
583 Value *ConstCtlz =
584 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
585 return BinaryOperator::CreateSub(ConstCtlz, X);
586 }
587
588 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
589 if (Op0->hasOneUse() &&
590 match(Op0,
592 Type *Ty = II.getType();
593 unsigned BitWidth = Ty->getScalarSizeInBits();
594 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
595 {X, IC.Builder.getFalse()});
596 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
597 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
598 }
599 }
600
601 // cttz(Pow2) -> Log2(Pow2)
602 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
603 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
604 if (IsTZ)
605 return IC.replaceInstUsesWith(II, R);
606 BinaryOperator *BO = BinaryOperator::CreateSub(
607 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
608 R);
609 BO->setHasNoSignedWrap();
611 return BO;
612 }
613
614 KnownBits Known = IC.computeKnownBits(Op0, &II);
615
616 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
617 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
618 : Known.countMaxLeadingZeros();
619 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
620 : Known.countMinLeadingZeros();
621
622 // If all bits above (ctlz) or below (cttz) the first known one are known
623 // zero, this value is constant.
624 // FIXME: This should be in InstSimplify because we're replacing an
625 // instruction with a constant.
626 if (PossibleZeros == DefiniteZeros) {
627 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
628 return IC.replaceInstUsesWith(II, C);
629 }
630
631 // If the input to cttz/ctlz is known to be non-zero,
632 // then change the 'ZeroIsPoison' parameter to 'true'
633 // because we know the zero behavior can't affect the result.
634 if (!Known.One.isZero() ||
636 if (!match(II.getArgOperand(1), m_One()))
637 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
638 }
639
640 // Add range attribute since known bits can't completely reflect what we know.
641 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
642 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
643 !II.getMetadata(LLVMContext::MD_range)) {
644 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
645 APInt(BitWidth, PossibleZeros + 1));
646 II.addRangeRetAttr(Range);
647 return &II;
648 }
649
650 return nullptr;
651}
652
654 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
655 "Expected ctpop intrinsic");
656 Type *Ty = II.getType();
657 unsigned BitWidth = Ty->getScalarSizeInBits();
658 Value *Op0 = II.getArgOperand(0);
659 Value *X, *Y;
660
661 // ctpop(bitreverse(x)) -> ctpop(x)
662 // ctpop(bswap(x)) -> ctpop(x)
663 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
664 return IC.replaceOperand(II, 0, X);
665
666 // ctpop(rot(x)) -> ctpop(x)
667 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
668 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
669 X == Y)
670 return IC.replaceOperand(II, 0, X);
671
672 // ctpop(x | -x) -> bitwidth - cttz(x, false)
673 if (Op0->hasOneUse() &&
674 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
675 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
676 {X, IC.Builder.getFalse()});
677 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
678 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
679 }
680
681 // ctpop(~x & (x - 1)) -> cttz(x, false)
682 if (match(Op0,
684 Function *F =
685 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
686 return CallInst::Create(F, {X, IC.Builder.getFalse()});
687 }
688
689 // Zext doesn't change the number of set bits, so narrow:
690 // ctpop (zext X) --> zext (ctpop X)
691 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
692 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
693 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
694 }
695
696 KnownBits Known(BitWidth);
697 IC.computeKnownBits(Op0, Known, &II);
698
699 // If all bits are zero except for exactly one fixed bit, then the result
700 // must be 0 or 1, and we can get that answer by shifting to LSB:
701 // ctpop (X & 32) --> (X & 32) >> 5
702 // TODO: Investigate removing this as its likely unnecessary given the below
703 // `isKnownToBeAPowerOfTwo` check.
704 if ((~Known.Zero).isPowerOf2())
705 return BinaryOperator::CreateLShr(
706 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
707
708 // More generally we can also handle non-constant power of 2 patterns such as
709 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
710 // ctpop(Pow2OrZero) --> icmp ne X, 0
711 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
712 return CastInst::Create(Instruction::ZExt,
715 Ty);
716
717 // Add range attribute since known bits can't completely reflect what we know.
718 if (BitWidth != 1) {
719 ConstantRange OldRange =
720 II.getRange().value_or(ConstantRange::getFull(BitWidth));
721
722 unsigned Lower = Known.countMinPopulation();
723 unsigned Upper = Known.countMaxPopulation() + 1;
724
725 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
727 Lower = 1;
728
730 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
731
732 if (Range != OldRange) {
733 II.addRangeRetAttr(Range);
734 return &II;
735 }
736 }
737
738 return nullptr;
739}
740
741/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
742/// at most two source operands are actually referenced.
744 bool IsExtension) {
745 // Bail out if the mask is not a constant.
746 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
747 if (!C)
748 return nullptr;
749
750 auto *RetTy = cast<FixedVectorType>(II.getType());
751 unsigned NumIndexes = RetTy->getNumElements();
752
753 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
754 if (!RetTy->getElementType()->isIntegerTy(8) ||
755 (NumIndexes != 8 && NumIndexes != 16))
756 return nullptr;
757
758 // For tbx instructions, the first argument is the "fallback" vector, which
759 // has the same length as the mask and return type.
760 unsigned int StartIndex = (unsigned)IsExtension;
761 auto *SourceTy =
762 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
763 // Note that the element count of each source vector does *not* need to be the
764 // same as the element count of the return type and mask! All source vectors
765 // must have the same element count as each other, though.
766 unsigned NumElementsPerSource = SourceTy->getNumElements();
767
768 // There are no tbl/tbx intrinsics for which the destination size exceeds the
769 // source size. However, our definitions of the intrinsics, at least in
770 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
771 // *could* technically happen.
772 if (NumIndexes > NumElementsPerSource)
773 return nullptr;
774
775 // The tbl/tbx intrinsics take several source operands followed by a mask
776 // operand.
777 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
778
779 // Map input operands to shuffle indices. This also helpfully deduplicates the
780 // input arguments, in case the same value is passed as an argument multiple
781 // times.
782 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
783 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
784 PoisonValue::get(SourceTy)};
785
786 int Indexes[16];
787 for (unsigned I = 0; I < NumIndexes; ++I) {
788 Constant *COp = C->getAggregateElement(I);
789
790 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
791 return nullptr;
792
793 if (isa<UndefValue>(COp)) {
794 Indexes[I] = -1;
795 continue;
796 }
797
798 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
799 // The index of the input argument that this index references (0 = first
800 // source argument, etc).
801 unsigned SourceOperandIndex = Index / NumElementsPerSource;
802 // The index of the element at that source operand.
803 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
804
805 Value *SourceOperand;
806 if (SourceOperandIndex >= NumSourceOperands) {
807 // This index is out of bounds. Map it to index into either the fallback
808 // vector (tbx) or vector of zeroes (tbl).
809 SourceOperandIndex = NumSourceOperands;
810 if (IsExtension) {
811 // For out-of-bounds indices in tbx, choose the `I`th element of the
812 // fallback.
813 SourceOperand = II.getArgOperand(0);
814 SourceOperandElementIndex = I;
815 } else {
816 // Otherwise, choose some element from the dummy vector of zeroes (we'll
817 // always choose the first).
818 SourceOperand = Constant::getNullValue(SourceTy);
819 SourceOperandElementIndex = 0;
820 }
821 } else {
822 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
823 }
824
825 // The source operand may be the fallback vector, which may not have the
826 // same number of elements as the source vector. In that case, we *could*
827 // choose to extend its length with another shufflevector, but it's simpler
828 // to just bail instead.
829 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
830 NumElementsPerSource)
831 return nullptr;
832
833 // We now know the source operand referenced by this index. Make it a
834 // shufflevector operand, if it isn't already.
835 unsigned NumSlots = ValueToShuffleSlot.size();
836 // This shuffle references more than two sources, and hence cannot be
837 // represented as a shufflevector.
838 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
839 return nullptr;
840
841 auto [It, Inserted] =
842 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
843 if (Inserted)
844 ShuffleOperands[It->getSecond()] = SourceOperand;
845
846 unsigned RemappedIndex =
847 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
848 Indexes[I] = RemappedIndex;
849 }
850
852 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
853 return IC.replaceInstUsesWith(II, Shuf);
854}
855
856// Returns true iff the 2 intrinsics have the same operands, limiting the
857// comparison to the first NumOperands.
858static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
859 unsigned NumOperands) {
860 assert(I.arg_size() >= NumOperands && "Not enough operands");
861 assert(E.arg_size() >= NumOperands && "Not enough operands");
862 for (unsigned i = 0; i < NumOperands; i++)
863 if (I.getArgOperand(i) != E.getArgOperand(i))
864 return false;
865 return true;
866}
867
868// Remove trivially empty start/end intrinsic ranges, i.e. a start
869// immediately followed by an end (ignoring debuginfo or other
870// start/end intrinsics in between). As this handles only the most trivial
871// cases, tracking the nesting level is not needed:
872//
873// call @llvm.foo.start(i1 0)
874// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
875// call @llvm.foo.end(i1 0)
876// call @llvm.foo.end(i1 0) ; &I
877static bool
879 std::function<bool(const IntrinsicInst &)> IsStart) {
880 // We start from the end intrinsic and scan backwards, so that InstCombine
881 // has already processed (and potentially removed) all the instructions
882 // before the end intrinsic.
883 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
884 for (; BI != BE; ++BI) {
885 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
886 if (I->isDebugOrPseudoInst() ||
887 I->getIntrinsicID() == EndI.getIntrinsicID())
888 continue;
889 if (IsStart(*I)) {
890 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
892 IC.eraseInstFromFunction(EndI);
893 return true;
894 }
895 // Skip start intrinsics that don't pair with this end intrinsic.
896 continue;
897 }
898 }
899 break;
900 }
901
902 return false;
903}
904
906 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
907 // Bail out on the case where the source va_list of a va_copy is destroyed
908 // immediately by a follow-up va_end.
909 return II.getIntrinsicID() == Intrinsic::vastart ||
910 (II.getIntrinsicID() == Intrinsic::vacopy &&
911 I.getArgOperand(0) != II.getArgOperand(1));
912 });
913 return nullptr;
914}
915
917 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
918 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
919 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
920 Call.setArgOperand(0, Arg1);
921 Call.setArgOperand(1, Arg0);
922 return &Call;
923 }
924 return nullptr;
925}
926
927/// Creates a result tuple for an overflow intrinsic \p II with a given
928/// \p Result and a constant \p Overflow value.
930 Constant *Overflow) {
931 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
932 StructType *ST = cast<StructType>(II->getType());
933 Constant *Struct = ConstantStruct::get(ST, V);
934 return InsertValueInst::Create(Struct, Result, 0);
935}
936
938InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
939 WithOverflowInst *WO = cast<WithOverflowInst>(II);
940 Value *OperationResult = nullptr;
941 Constant *OverflowResult = nullptr;
942 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
943 WO->getRHS(), *WO, OperationResult, OverflowResult))
944 return createOverflowTuple(WO, OperationResult, OverflowResult);
945
946 // See whether we can optimize the overflow check with assumption information.
947 for (User *U : WO->users()) {
948 if (!match(U, m_ExtractValue<1>(m_Value())))
949 continue;
950
951 for (auto &AssumeVH : AC.assumptionsFor(U)) {
952 if (!AssumeVH)
953 continue;
954 CallInst *I = cast<CallInst>(AssumeVH);
955 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
956 continue;
957 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
958 /*AllowEphemerals=*/true))
959 continue;
960 Value *Result =
961 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
962 Result->takeName(WO);
963 if (auto *Inst = dyn_cast<Instruction>(Result)) {
964 if (WO->isSigned())
965 Inst->setHasNoSignedWrap();
966 else
967 Inst->setHasNoUnsignedWrap();
968 }
969 return createOverflowTuple(WO, Result,
970 ConstantInt::getFalse(U->getType()));
971 }
972 }
973
974 return nullptr;
975}
976
977static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
978 Ty = Ty->getScalarType();
979 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
980}
981
982static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
983 Ty = Ty->getScalarType();
984 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
985}
986
987/// \returns the compare predicate type if the test performed by
988/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
989/// floating-point environment assumed for \p F for type \p Ty
991 const Function &F, Type *Ty) {
992 switch (static_cast<unsigned>(Mask)) {
993 case fcZero:
994 if (inputDenormalIsIEEE(F, Ty))
995 return FCmpInst::FCMP_OEQ;
996 break;
997 case fcZero | fcSubnormal:
998 if (inputDenormalIsDAZ(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcPositive | fcNegZero:
1002 if (inputDenormalIsIEEE(F, Ty))
1003 return FCmpInst::FCMP_OGE;
1004 break;
1006 if (inputDenormalIsDAZ(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1010 if (inputDenormalIsIEEE(F, Ty))
1011 return FCmpInst::FCMP_OGT;
1012 break;
1013 case fcNegative | fcPosZero:
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OLE;
1016 break;
1018 if (inputDenormalIsDAZ(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1022 if (inputDenormalIsIEEE(F, Ty))
1023 return FCmpInst::FCMP_OLT;
1024 break;
1025 case fcPosNormal | fcPosInf:
1026 if (inputDenormalIsDAZ(F, Ty))
1027 return FCmpInst::FCMP_OGT;
1028 break;
1029 case fcNegNormal | fcNegInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OLT;
1032 break;
1033 case ~fcZero & ~fcNan:
1034 if (inputDenormalIsIEEE(F, Ty))
1035 return FCmpInst::FCMP_ONE;
1036 break;
1037 case ~(fcZero | fcSubnormal) & ~fcNan:
1038 if (inputDenormalIsDAZ(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 default:
1042 break;
1043 }
1044
1046}
1047
1048Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1049 Value *Src0 = II.getArgOperand(0);
1050 Value *Src1 = II.getArgOperand(1);
1051 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1052 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1053 const bool IsUnordered = (Mask & fcNan) == fcNan;
1054 const bool IsOrdered = (Mask & fcNan) == fcNone;
1055 const FPClassTest OrderedMask = Mask & ~fcNan;
1056 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1057
1058 const bool IsStrict =
1059 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1060
1061 Value *FNegSrc;
1062 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1063 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1064
1065 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1066 return replaceOperand(II, 0, FNegSrc);
1067 }
1068
1069 Value *FAbsSrc;
1070 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1071 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1072 return replaceOperand(II, 0, FAbsSrc);
1073 }
1074
1075 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1076 (IsOrdered || IsUnordered) && !IsStrict) {
1077 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1078 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1079 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1080 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1082 FCmpInst::Predicate Pred =
1083 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1084 if (OrderedInvertedMask == fcInf)
1085 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1086
1087 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
1088 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1089 CmpInf->takeName(&II);
1090 return replaceInstUsesWith(II, CmpInf);
1091 }
1092
1093 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1094 (IsOrdered || IsUnordered) && !IsStrict) {
1095 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1096 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1097 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1098 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1099 Constant *Inf =
1100 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1101 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1102 : Builder.CreateFCmpOEQ(Src0, Inf);
1103
1104 EqInf->takeName(&II);
1105 return replaceInstUsesWith(II, EqInf);
1106 }
1107
1108 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1109 (IsOrdered || IsUnordered) && !IsStrict) {
1110 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1111 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1112 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1113 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1115 OrderedInvertedMask == fcNegInf);
1116 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1117 : Builder.CreateFCmpONE(Src0, Inf);
1118 NeInf->takeName(&II);
1119 return replaceInstUsesWith(II, NeInf);
1120 }
1121
1122 if (Mask == fcNan && !IsStrict) {
1123 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1124 // exceptions.
1125 Value *IsNan =
1126 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1127 IsNan->takeName(&II);
1128 return replaceInstUsesWith(II, IsNan);
1129 }
1130
1131 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1132 // Equivalent of !isnan. Replace with standard fcmp.
1133 Value *FCmp =
1134 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1135 FCmp->takeName(&II);
1136 return replaceInstUsesWith(II, FCmp);
1137 }
1138
1140
1141 // Try to replace with an fcmp with 0
1142 //
1143 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1144 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1145 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1146 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1147 //
1148 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1149 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1150 //
1151 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1152 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1153 //
1154 if (!IsStrict && (IsOrdered || IsUnordered) &&
1155 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1156 Src0->getType())) !=
1159 // Equivalent of == 0.
1160 Value *FCmp = Builder.CreateFCmp(
1161 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1162 Src0, Zero);
1163
1164 FCmp->takeName(&II);
1165 return replaceInstUsesWith(II, FCmp);
1166 }
1167
1168 KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
1169
1170 // Clear test bits we know must be false from the source value.
1171 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1172 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1173 if ((Mask & Known.KnownFPClasses) != Mask) {
1174 II.setArgOperand(
1175 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1176 return &II;
1177 }
1178
1179 // If none of the tests which can return false are possible, fold to true.
1180 // fp_class (nnan x), ~(qnan|snan) -> true
1181 // fp_class (ninf x), ~(ninf|pinf) -> true
1182 if (Mask == Known.KnownFPClasses)
1183 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1184
1185 return nullptr;
1186}
1187
1188static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1189 KnownBits Known = computeKnownBits(Op, SQ);
1190 if (Known.isNonNegative())
1191 return false;
1192 if (Known.isNegative())
1193 return true;
1194
1195 Value *X, *Y;
1196 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1198
1199 return std::nullopt;
1200}
1201
1202static std::optional<bool> getKnownSignOrZero(Value *Op,
1203 const SimplifyQuery &SQ) {
1204 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1205 return Sign;
1206
1207 Value *X, *Y;
1208 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1210
1211 return std::nullopt;
1212}
1213
1214/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1215static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1216 const SimplifyQuery &SQ) {
1217 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1218 if (!Known1)
1219 return false;
1220 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1221 if (!Known0)
1222 return false;
1223 return *Known0 == *Known1;
1224}
1225
1226/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1227/// can trigger other combines.
1229 InstCombiner::BuilderTy &Builder) {
1230 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1231 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1232 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1233 "Expected a min or max intrinsic");
1234
1235 // TODO: Match vectors with undef elements, but undef may not propagate.
1236 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1237 Value *X;
1238 const APInt *C0, *C1;
1239 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1240 !match(Op1, m_APInt(C1)))
1241 return nullptr;
1242
1243 // Check for necessary no-wrap and overflow constraints.
1244 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1245 auto *Add = cast<BinaryOperator>(Op0);
1246 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1247 (!IsSigned && !Add->hasNoUnsignedWrap()))
1248 return nullptr;
1249
1250 // If the constant difference overflows, then instsimplify should reduce the
1251 // min/max to the add or C1.
1252 bool Overflow;
1253 APInt CDiff =
1254 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1255 assert(!Overflow && "Expected simplify of min/max");
1256
1257 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1258 // Note: the "mismatched" no-overflow setting does not propagate.
1259 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1260 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1261 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1262 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1263}
1264/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1265Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1266 Type *Ty = MinMax1.getType();
1267
1268 // We are looking for a tree of:
1269 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1270 // Where the min and max could be reversed
1271 Instruction *MinMax2;
1272 BinaryOperator *AddSub;
1273 const APInt *MinValue, *MaxValue;
1274 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1275 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1276 return nullptr;
1277 } else if (match(&MinMax1,
1278 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1279 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1280 return nullptr;
1281 } else
1282 return nullptr;
1283
1284 // Check that the constants clamp a saturate, and that the new type would be
1285 // sensible to convert to.
1286 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1287 return nullptr;
1288 // In what bitwidth can this be treated as saturating arithmetics?
1289 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1290 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1291 // good first approximation for what should be done there.
1292 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1293 return nullptr;
1294
1295 // Also make sure that the inner min/max and the add/sub have one use.
1296 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1297 return nullptr;
1298
1299 // Create the new type (which can be a vector type)
1300 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1301
1302 Intrinsic::ID IntrinsicID;
1303 if (AddSub->getOpcode() == Instruction::Add)
1304 IntrinsicID = Intrinsic::sadd_sat;
1305 else if (AddSub->getOpcode() == Instruction::Sub)
1306 IntrinsicID = Intrinsic::ssub_sat;
1307 else
1308 return nullptr;
1309
1310 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1311 // is usually achieved via a sext from a smaller type.
1312 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1313 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1314 return nullptr;
1315
1316 // Finally create and return the sat intrinsic, truncated to the new type
1317 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1318 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1319 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1320 return CastInst::Create(Instruction::SExt, Sat, Ty);
1321}
1322
1323
1324/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1325/// can only be one of two possible constant values -- turn that into a select
1326/// of constants.
1328 InstCombiner::BuilderTy &Builder) {
1329 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1330 Value *X;
1331 const APInt *C0, *C1;
1332 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1333 return nullptr;
1334
1336 switch (II->getIntrinsicID()) {
1337 case Intrinsic::smax:
1338 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1339 Pred = ICmpInst::ICMP_SGT;
1340 break;
1341 case Intrinsic::smin:
1342 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1343 Pred = ICmpInst::ICMP_SLT;
1344 break;
1345 case Intrinsic::umax:
1346 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1347 Pred = ICmpInst::ICMP_UGT;
1348 break;
1349 case Intrinsic::umin:
1350 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1351 Pred = ICmpInst::ICMP_ULT;
1352 break;
1353 default:
1354 llvm_unreachable("Expected min/max intrinsic");
1355 }
1356 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1357 return nullptr;
1358
1359 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1360 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1361 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1362 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1363}
1364
1365/// If this min/max has a constant operand and an operand that is a matching
1366/// min/max with a constant operand, constant-fold the 2 constant operands.
1368 IRBuilderBase &Builder,
1369 const SimplifyQuery &SQ) {
1370 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1371 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1372 if (!LHS)
1373 return nullptr;
1374
1375 Constant *C0, *C1;
1376 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1377 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1378 return nullptr;
1379
1380 // max (max X, C0), C1 --> max X, (max C0, C1)
1381 // min (min X, C0), C1 --> min X, (min C0, C1)
1382 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1383 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1384 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1385 if (InnerMinMaxID != MinMaxID &&
1386 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1387 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1388 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1389 return nullptr;
1390
1392 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1393 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1394 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1395 {LHS->getArgOperand(0), NewC});
1396}
1397
1398/// If this min/max has a matching min/max operand with a constant, try to push
1399/// the constant operand into this instruction. This can enable more folds.
1400static Instruction *
1402 InstCombiner::BuilderTy &Builder) {
1403 // Match and capture a min/max operand candidate.
1404 Value *X, *Y;
1405 Constant *C;
1406 Instruction *Inner;
1408 m_Instruction(Inner),
1410 m_Value(Y))))
1411 return nullptr;
1412
1413 // The inner op must match. Check for constants to avoid infinite loops.
1414 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1415 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1416 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1418 return nullptr;
1419
1420 // max (max X, C), Y --> max (max X, Y), C
1422 MinMaxID, II->getType());
1423 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1424 NewInner->takeName(Inner);
1425 return CallInst::Create(MinMax, {NewInner, C});
1426}
1427
1428/// Reduce a sequence of min/max intrinsics with a common operand.
1430 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1431 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1432 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1433 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1434 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1435 RHS->getIntrinsicID() != MinMaxID ||
1436 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1437 return nullptr;
1438
1439 Value *A = LHS->getArgOperand(0);
1440 Value *B = LHS->getArgOperand(1);
1441 Value *C = RHS->getArgOperand(0);
1442 Value *D = RHS->getArgOperand(1);
1443
1444 // Look for a common operand.
1445 Value *MinMaxOp = nullptr;
1446 Value *ThirdOp = nullptr;
1447 if (LHS->hasOneUse()) {
1448 // If the LHS is only used in this chain and the RHS is used outside of it,
1449 // reuse the RHS min/max because that will eliminate the LHS.
1450 if (D == A || C == A) {
1451 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1452 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1453 MinMaxOp = RHS;
1454 ThirdOp = B;
1455 } else if (D == B || C == B) {
1456 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1457 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1458 MinMaxOp = RHS;
1459 ThirdOp = A;
1460 }
1461 } else {
1462 assert(RHS->hasOneUse() && "Expected one-use operand");
1463 // Reuse the LHS. This will eliminate the RHS.
1464 if (D == A || D == B) {
1465 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1466 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1467 MinMaxOp = LHS;
1468 ThirdOp = C;
1469 } else if (C == A || C == B) {
1470 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1471 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1472 MinMaxOp = LHS;
1473 ThirdOp = D;
1474 }
1475 }
1476
1477 if (!MinMaxOp || !ThirdOp)
1478 return nullptr;
1479
1480 Module *Mod = II->getModule();
1481 Function *MinMax =
1482 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1483 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1484}
1485
1486/// If all arguments of the intrinsic are unary shuffles with the same mask,
1487/// try to shuffle after the intrinsic.
1490 if (!II->getType()->isVectorTy() ||
1491 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1492 !II->getCalledFunction()->isSpeculatable())
1493 return nullptr;
1494
1495 Value *X;
1496 Constant *C;
1497 ArrayRef<int> Mask;
1498 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1499 return isa<Constant>(Arg.get()) ||
1500 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1501 Arg.getOperandNo(), nullptr);
1502 });
1503 if (!NonConstArg ||
1504 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1505 return nullptr;
1506
1507 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1508 // instructions.
1509 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1510 return nullptr;
1511
1512 // See if all arguments are shuffled with the same mask.
1514 Type *SrcTy = X->getType();
1515 for (Use &Arg : II->args()) {
1516 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1517 Arg.getOperandNo(), nullptr))
1518 NewArgs.push_back(Arg);
1519 else if (match(&Arg,
1520 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1521 X->getType() == SrcTy)
1522 NewArgs.push_back(X);
1523 else if (match(&Arg, m_ImmConstant(C))) {
1524 // If it's a constant, try find the constant that would be shuffled to C.
1525 if (Constant *ShuffledC =
1526 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1527 NewArgs.push_back(ShuffledC);
1528 else
1529 return nullptr;
1530 } else
1531 return nullptr;
1532 }
1533
1534 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1535 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1536 // Result type might be a different vector width.
1537 // TODO: Check that the result type isn't widened?
1538 VectorType *ResTy =
1539 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1540 Value *NewIntrinsic =
1541 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1542 return new ShuffleVectorInst(NewIntrinsic, Mask);
1543}
1544
1545/// If all arguments of the intrinsic are reverses, try to pull the reverse
1546/// after the intrinsic.
1548 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1549 return nullptr;
1550
1551 // At least 1 operand must be a reverse with 1 use because we are creating 2
1552 // instructions.
1553 if (none_of(II->args(), [](Value *V) {
1554 return match(V, m_OneUse(m_VecReverse(m_Value())));
1555 }))
1556 return nullptr;
1557
1558 Value *X;
1559 Constant *C;
1560 SmallVector<Value *> NewArgs;
1561 for (Use &Arg : II->args()) {
1562 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1563 Arg.getOperandNo(), nullptr))
1564 NewArgs.push_back(Arg);
1565 else if (match(&Arg, m_VecReverse(m_Value(X))))
1566 NewArgs.push_back(X);
1567 else if (isSplatValue(Arg))
1568 NewArgs.push_back(Arg);
1569 else if (match(&Arg, m_ImmConstant(C)))
1570 NewArgs.push_back(Builder.CreateVectorReverse(C));
1571 else
1572 return nullptr;
1573 }
1574
1575 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1576 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1577 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1578 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1579 return Builder.CreateVectorReverse(NewIntrinsic);
1580}
1581
1582/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1583/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1584/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1585template <Intrinsic::ID IntrID>
1587 InstCombiner::BuilderTy &Builder) {
1588 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1589 "This helper only supports BSWAP and BITREVERSE intrinsics");
1590
1591 Value *X, *Y;
1592 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1593 // don't match ConstantExpr that aren't meaningful for this transform.
1596 Value *OldReorderX, *OldReorderY;
1598
1599 // If both X and Y are bswap/bitreverse, the transform reduces the number
1600 // of instructions even if there's multiuse.
1601 // If only one operand is bswap/bitreverse, we need to ensure the operand
1602 // have only one use.
1603 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1604 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1605 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1606 }
1607
1608 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1609 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1610 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1611 }
1612
1613 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1614 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1615 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1616 }
1617 }
1618 return nullptr;
1619}
1620
1621/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1622/// `f(f(x, y), y) == f(x, y)` holds.
1624 switch (IID) {
1625 case Intrinsic::smax:
1626 case Intrinsic::smin:
1627 case Intrinsic::umax:
1628 case Intrinsic::umin:
1629 case Intrinsic::maximum:
1630 case Intrinsic::minimum:
1631 case Intrinsic::maximumnum:
1632 case Intrinsic::minimumnum:
1633 case Intrinsic::maxnum:
1634 case Intrinsic::minnum:
1635 return true;
1636 default:
1637 return false;
1638 }
1639}
1640
1641/// Attempt to simplify value-accumulating recurrences of kind:
1642/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1643/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1644/// And let the idempotent binary intrinsic be hoisted, when the operands are
1645/// known to be loop-invariant.
1647 IntrinsicInst *II) {
1648 PHINode *PN;
1649 Value *Init, *OtherOp;
1650
1651 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1652 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1653 auto IID = II->getIntrinsicID();
1654 if (!isIdempotentBinaryIntrinsic(IID) ||
1656 !IC.getDominatorTree().dominates(OtherOp, PN))
1657 return nullptr;
1658
1659 auto *InvariantBinaryInst =
1660 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1661 if (isa<FPMathOperator>(InvariantBinaryInst))
1662 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1663 return InvariantBinaryInst;
1664}
1665
1666static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1667 if (!CanReorderLanes)
1668 return nullptr;
1669
1670 Value *V;
1671 if (match(Arg, m_VecReverse(m_Value(V))))
1672 return V;
1673
1674 ArrayRef<int> Mask;
1675 if (!isa<FixedVectorType>(Arg->getType()) ||
1676 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1677 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1678 return nullptr;
1679
1680 int Sz = Mask.size();
1681 SmallBitVector UsedIndices(Sz);
1682 for (int Idx : Mask) {
1683 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1684 return nullptr;
1685 UsedIndices.set(Idx);
1686 }
1687
1688 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1689 // other changes.
1690 return UsedIndices.all() ? V : nullptr;
1691}
1692
1693/// Fold an unsigned minimum of trailing or leading zero bits counts:
1694/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1695/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1696/// >> ConstOp))
1697/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1698/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1699template <Intrinsic::ID IntrID>
1700static Value *
1702 const DataLayout &DL,
1703 InstCombiner::BuilderTy &Builder) {
1704 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1705 "This helper only supports cttz and ctlz intrinsics");
1706
1707 Value *CtOp1, *CtOp2;
1708 Value *ZeroUndef1, *ZeroUndef2;
1709 if (!match(I0, m_OneUse(
1710 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1711 return nullptr;
1712
1713 if (match(I1,
1714 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1715 return Builder.CreateBinaryIntrinsic(
1716 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1717 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1718
1719 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1720 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1721 if (!match(I1, m_CheckedInt(LessBitWidth)))
1722 // We have a constant >= BitWidth (which can be handled by CVP)
1723 // or a non-splat vector with elements < and >= BitWidth
1724 return nullptr;
1725
1726 Type *Ty = I1->getType();
1728 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1729 IntrID == Intrinsic::cttz
1730 ? ConstantInt::get(Ty, 1)
1731 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1732 cast<Constant>(I1), DL);
1733 return Builder.CreateBinaryIntrinsic(
1734 IntrID, Builder.CreateOr(CtOp1, NewConst),
1735 ConstantInt::getTrue(ZeroUndef1->getType()));
1736}
1737
1738/// Return whether "X LOp (Y ROp Z)" is always equal to
1739/// "(X LOp Y) ROp (X LOp Z)".
1741 bool HasNSW, Intrinsic::ID ROp) {
1742 switch (ROp) {
1743 case Intrinsic::umax:
1744 case Intrinsic::umin:
1745 if (HasNUW && LOp == Instruction::Add)
1746 return true;
1747 if (HasNUW && LOp == Instruction::Shl)
1748 return true;
1749 return false;
1750 case Intrinsic::smax:
1751 case Intrinsic::smin:
1752 return HasNSW && LOp == Instruction::Add;
1753 default:
1754 return false;
1755 }
1756}
1757
1758/// Return whether "(X ROp Y) LOp Z" is always equal to
1759/// "(X LOp Z) ROp (Y LOp Z)".
1761 bool HasNSW, Intrinsic::ID ROp) {
1762 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1763 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1764 switch (ROp) {
1765 case Intrinsic::umax:
1766 case Intrinsic::umin:
1767 return HasNUW && LOp == Instruction::Sub;
1768 case Intrinsic::smax:
1769 case Intrinsic::smin:
1770 return HasNSW && LOp == Instruction::Sub;
1771 default:
1772 return false;
1773 }
1774}
1775
1776// Attempts to factorise a common term
1777// in an instruction that has the form "(A op' B) op (C op' D)
1778// where op is an intrinsic and op' is a binop
1779static Value *
1781 InstCombiner::BuilderTy &Builder) {
1782 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1783 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1784
1787
1788 if (!Op0 || !Op1)
1789 return nullptr;
1790
1791 if (Op0->getOpcode() != Op1->getOpcode())
1792 return nullptr;
1793
1794 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1795 return nullptr;
1796
1797 Instruction::BinaryOps InnerOpcode =
1798 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1799 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1800 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1801
1802 Value *A = Op0->getOperand(0);
1803 Value *B = Op0->getOperand(1);
1804 Value *C = Op1->getOperand(0);
1805 Value *D = Op1->getOperand(1);
1806
1807 // Attempts to swap variables such that A equals C or B equals D,
1808 // if the inner operation is commutative.
1809 if (Op0->isCommutative() && A != C && B != D) {
1810 if (A == D || B == C)
1811 std::swap(C, D);
1812 else
1813 return nullptr;
1814 }
1815
1816 BinaryOperator *NewBinop;
1817 if (A == C &&
1818 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1819 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1820 NewBinop =
1821 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1822 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1823 TopLevelOpcode)) {
1824 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1825 NewBinop =
1826 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1827 } else {
1828 return nullptr;
1829 }
1830
1831 NewBinop->setHasNoUnsignedWrap(HasNUW);
1832 NewBinop->setHasNoSignedWrap(HasNSW);
1833
1834 return NewBinop;
1835}
1836
1838 Value *Arg0 = II->getArgOperand(0);
1839 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1840 if (!ShiftConst)
1841 return nullptr;
1842
1843 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1844 bool AllPositive = true;
1845 bool AllNegative = true;
1846
1847 auto Check = [&](Constant *C) -> bool {
1848 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1849 const APInt &V = CI->getValue();
1850 if (V.isNonNegative()) {
1851 AllNegative = false;
1852 return AllPositive && V.ult(ElemBits);
1853 }
1854 AllPositive = false;
1855 return AllNegative && V.sgt(-ElemBits);
1856 }
1857 return false;
1858 };
1859
1860 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1861 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1862 if (!Check(ShiftConst->getAggregateElement(I)))
1863 return nullptr;
1864 }
1865
1866 } else if (!Check(ShiftConst))
1867 return nullptr;
1868
1869 IRBuilderBase &B = IC.Builder;
1870 if (AllPositive)
1871 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1872
1873 Value *NegAmt = B.CreateNeg(ShiftConst);
1874 Intrinsic::ID IID = II->getIntrinsicID();
1875 const bool IsSigned =
1876 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1877 Value *Result =
1878 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1879 return IC.replaceInstUsesWith(*II, Result);
1880}
1881
1882/// CallInst simplification. This mostly only handles folding of intrinsic
1883/// instructions. For normal calls, it allows visitCallBase to do the heavy
1884/// lifting.
1886 // Don't try to simplify calls without uses. It will not do anything useful,
1887 // but will result in the following folds being skipped.
1888 if (!CI.use_empty()) {
1889 SmallVector<Value *, 8> Args(CI.args());
1890 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1891 SQ.getWithInstruction(&CI)))
1892 return replaceInstUsesWith(CI, V);
1893 }
1894
1895 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1896 return visitFree(CI, FreedOp);
1897
1898 // If the caller function (i.e. us, the function that contains this CallInst)
1899 // is nounwind, mark the call as nounwind, even if the callee isn't.
1900 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1901 CI.setDoesNotThrow();
1902 return &CI;
1903 }
1904
1906 if (!II)
1907 return visitCallBase(CI);
1908
1909 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1910 // instead of in visitCallBase.
1911 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1912 if (auto NumBytes = MI->getLengthInBytes()) {
1913 // memmove/cpy/set of zero bytes is a noop.
1914 if (NumBytes->isZero())
1915 return eraseInstFromFunction(CI);
1916
1917 // For atomic unordered mem intrinsics if len is not a positive or
1918 // not a multiple of element size then behavior is undefined.
1919 if (MI->isAtomic() &&
1920 (NumBytes->isNegative() ||
1921 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1923 assert(MI->getType()->isVoidTy() &&
1924 "non void atomic unordered mem intrinsic");
1925 return eraseInstFromFunction(*MI);
1926 }
1927 }
1928
1929 // No other transformations apply to volatile transfers.
1930 if (MI->isVolatile())
1931 return nullptr;
1932
1934 // memmove(x,x,size) -> noop.
1935 if (MTI->getSource() == MTI->getDest())
1936 return eraseInstFromFunction(CI);
1937 }
1938
1939 auto IsPointerUndefined = [MI](Value *Ptr) {
1940 return isa<ConstantPointerNull>(Ptr) &&
1942 MI->getFunction(),
1943 cast<PointerType>(Ptr->getType())->getAddressSpace());
1944 };
1945 bool SrcIsUndefined = false;
1946 // If we can determine a pointer alignment that is bigger than currently
1947 // set, update the alignment.
1948 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1950 return I;
1951 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1952 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1953 if (Instruction *I = SimplifyAnyMemSet(MSI))
1954 return I;
1955 }
1956
1957 // If src/dest is null, this memory intrinsic must be a noop.
1958 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1959 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1960 return eraseInstFromFunction(CI);
1961 }
1962
1963 // If we have a memmove and the source operation is a constant global,
1964 // then the source and dest pointers can't alias, so we can change this
1965 // into a call to memcpy.
1966 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1967 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1968 if (GVSrc->isConstant()) {
1969 Module *M = CI.getModule();
1970 Intrinsic::ID MemCpyID =
1971 MMI->isAtomic()
1972 ? Intrinsic::memcpy_element_unordered_atomic
1973 : Intrinsic::memcpy;
1974 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1975 CI.getArgOperand(1)->getType(),
1976 CI.getArgOperand(2)->getType() };
1978 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1979 return II;
1980 }
1981 }
1982 }
1983
1984 // For fixed width vector result intrinsics, use the generic demanded vector
1985 // support.
1986 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1987 auto VWidth = IIFVTy->getNumElements();
1988 APInt PoisonElts(VWidth, 0);
1989 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1990 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1991 if (V != II)
1992 return replaceInstUsesWith(*II, V);
1993 return II;
1994 }
1995 }
1996
1997 if (II->isCommutative()) {
1998 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
1999 replaceOperand(*II, 0, Pair->first);
2000 replaceOperand(*II, 1, Pair->second);
2001 return II;
2002 }
2003
2004 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2005 return NewCall;
2006 }
2007
2008 // Unused constrained FP intrinsic calls may have declared side effect, which
2009 // prevents it from being removed. In some cases however the side effect is
2010 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2011 // returns a replacement, the call may be removed.
2012 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2013 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2014 return eraseInstFromFunction(CI);
2015 }
2016
2017 Intrinsic::ID IID = II->getIntrinsicID();
2018 switch (IID) {
2019 case Intrinsic::objectsize: {
2020 SmallVector<Instruction *> InsertedInstructions;
2021 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2022 &InsertedInstructions)) {
2023 for (Instruction *Inserted : InsertedInstructions)
2024 Worklist.add(Inserted);
2025 return replaceInstUsesWith(CI, V);
2026 }
2027 return nullptr;
2028 }
2029 case Intrinsic::abs: {
2030 Value *IIOperand = II->getArgOperand(0);
2031 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2032
2033 // abs(-x) -> abs(x)
2034 Value *X;
2035 if (match(IIOperand, m_Neg(m_Value(X)))) {
2036 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2037 replaceOperand(*II, 1, Builder.getTrue());
2038 return replaceOperand(*II, 0, X);
2039 }
2040 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2041 return replaceOperand(*II, 0, X);
2042
2043 Value *Y;
2044 // abs(a * abs(b)) -> abs(a * b)
2045 if (match(IIOperand,
2048 bool NSW =
2049 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2050 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2051 return replaceOperand(*II, 0, XY);
2052 }
2053
2054 if (std::optional<bool> Known =
2055 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2056 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2057 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2058 if (!*Known)
2059 return replaceInstUsesWith(*II, IIOperand);
2060
2061 // abs(x) -> -x if x < 0
2062 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2063 if (IntMinIsPoison)
2064 return BinaryOperator::CreateNSWNeg(IIOperand);
2065 return BinaryOperator::CreateNeg(IIOperand);
2066 }
2067
2068 // abs (sext X) --> zext (abs X*)
2069 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2070 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2071 Value *NarrowAbs =
2072 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2073 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2074 }
2075
2076 // Match a complicated way to check if a number is odd/even:
2077 // abs (srem X, 2) --> and X, 1
2078 const APInt *C;
2079 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2080 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2081
2082 break;
2083 }
2084 case Intrinsic::umin: {
2085 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2086 // umin(x, 1) == zext(x != 0)
2087 if (match(I1, m_One())) {
2088 assert(II->getType()->getScalarSizeInBits() != 1 &&
2089 "Expected simplify of umin with max constant");
2090 Value *Zero = Constant::getNullValue(I0->getType());
2091 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2092 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2093 }
2094 // umin(cttz(x), const) --> cttz(x | (1 << const))
2095 if (Value *FoldedCttz =
2097 I0, I1, DL, Builder))
2098 return replaceInstUsesWith(*II, FoldedCttz);
2099 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2100 if (Value *FoldedCtlz =
2102 I0, I1, DL, Builder))
2103 return replaceInstUsesWith(*II, FoldedCtlz);
2104 [[fallthrough]];
2105 }
2106 case Intrinsic::umax: {
2107 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2108 Value *X, *Y;
2109 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2110 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2111 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2112 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2113 }
2114 Constant *C;
2115 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2116 I0->hasOneUse()) {
2117 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2118 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2119 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2120 }
2121 }
2122 // If C is not 0:
2123 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2124 // If C is not 0 or 1:
2125 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2126 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2127 const APInt *C;
2128 Value *X;
2129 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2130 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2131 return nullptr;
2132 if (C->isZero())
2133 return nullptr;
2134 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2135 return nullptr;
2136
2137 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2138 Value *NewSelect = nullptr;
2139 NewSelect = Builder.CreateSelectWithUnknownProfile(
2140 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2141 return replaceInstUsesWith(*II, NewSelect);
2142 };
2143
2144 if (IID == Intrinsic::umax) {
2145 if (Instruction *I = foldMaxMulShift(I0, I1))
2146 return I;
2147 if (Instruction *I = foldMaxMulShift(I1, I0))
2148 return I;
2149 }
2150
2151 // If both operands of unsigned min/max are sign-extended, it is still ok
2152 // to narrow the operation.
2153 [[fallthrough]];
2154 }
2155 case Intrinsic::smax:
2156 case Intrinsic::smin: {
2157 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2158 Value *X, *Y;
2159 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2160 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2161 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2162 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2163 }
2164
2165 Constant *C;
2166 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2167 I0->hasOneUse()) {
2168 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2169 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2170 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2171 }
2172 }
2173
2174 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2175 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2176 const APInt *MinC, *MaxC;
2177 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2178 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2179 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2180 Value *NewMax = Builder.CreateBinaryIntrinsic(
2181 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2182 return replaceInstUsesWith(
2183 *II, Builder.CreateBinaryIntrinsic(
2184 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2185 };
2186 if (IID == Intrinsic::smax &&
2188 m_APInt(MinC)))) &&
2189 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2190 return CreateCanonicalClampForm(true);
2191 if (IID == Intrinsic::umax &&
2193 m_APInt(MinC)))) &&
2194 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2195 return CreateCanonicalClampForm(false);
2196
2197 // umin(i1 X, i1 Y) -> and i1 X, Y
2198 // smax(i1 X, i1 Y) -> and i1 X, Y
2199 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2200 II->getType()->isIntOrIntVectorTy(1)) {
2201 return BinaryOperator::CreateAnd(I0, I1);
2202 }
2203
2204 // umax(i1 X, i1 Y) -> or i1 X, Y
2205 // smin(i1 X, i1 Y) -> or i1 X, Y
2206 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2207 II->getType()->isIntOrIntVectorTy(1)) {
2208 return BinaryOperator::CreateOr(I0, I1);
2209 }
2210
2211 // smin(smax(X, -1), 1) -> scmp(X, 0)
2212 // smax(smin(X, 1), -1) -> scmp(X, 0)
2213 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2214 // And i1's have been changed to and/ors
2215 // So we only need to check for smin
2216 if (IID == Intrinsic::smin) {
2217 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2218 match(I1, m_One())) {
2219 Value *Zero = ConstantInt::get(X->getType(), 0);
2220 return replaceInstUsesWith(
2221 CI,
2222 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2223 }
2224 }
2225
2226 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2227 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2228 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2229 // TODO: Canonicalize neg after min/max if I1 is constant.
2230 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2231 (I0->hasOneUse() || I1->hasOneUse())) {
2233 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2234 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2235 }
2236 }
2237
2238 // (umax X, (xor X, Pow2))
2239 // -> (or X, Pow2)
2240 // (umin X, (xor X, Pow2))
2241 // -> (and X, ~Pow2)
2242 // (smax X, (xor X, Pos_Pow2))
2243 // -> (or X, Pos_Pow2)
2244 // (smin X, (xor X, Pos_Pow2))
2245 // -> (and X, ~Pos_Pow2)
2246 // (smax X, (xor X, Neg_Pow2))
2247 // -> (and X, ~Neg_Pow2)
2248 // (smin X, (xor X, Neg_Pow2))
2249 // -> (or X, Neg_Pow2)
2250 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2251 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2252 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2253 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2254 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2255
2256 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2257 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2258 if (KnownSign == std::nullopt) {
2259 UseOr = false;
2260 UseAndN = false;
2261 } else if (*KnownSign /* true is Signed. */) {
2262 UseOr ^= true;
2263 UseAndN ^= true;
2264 Type *Ty = I0->getType();
2265 // Negative power of 2 must be IntMin. It's possible to be able to
2266 // prove negative / power of 2 without actually having known bits, so
2267 // just get the value by hand.
2269 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2270 }
2271 }
2272 if (UseOr)
2273 return BinaryOperator::CreateOr(I0, X);
2274 else if (UseAndN)
2275 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2276 }
2277
2278 // If we can eliminate ~A and Y is free to invert:
2279 // max ~A, Y --> ~(min A, ~Y)
2280 //
2281 // Examples:
2282 // max ~A, ~Y --> ~(min A, Y)
2283 // max ~A, C --> ~(min A, ~C)
2284 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2285 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2286 Value *A;
2287 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2288 !isFreeToInvert(A, A->hasOneUse())) {
2289 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2291 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2292 return BinaryOperator::CreateNot(InvMaxMin);
2293 }
2294 }
2295 return nullptr;
2296 };
2297
2298 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2299 return I;
2300 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2301 return I;
2302
2304 return I;
2305
2306 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2307 const APInt *RHSC;
2308 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2309 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2310 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2311 ConstantInt::get(II->getType(), *RHSC));
2312
2313 // smax(X, -X) --> abs(X)
2314 // smin(X, -X) --> -abs(X)
2315 // umax(X, -X) --> -abs(X)
2316 // umin(X, -X) --> abs(X)
2317 if (isKnownNegation(I0, I1)) {
2318 // We can choose either operand as the input to abs(), but if we can
2319 // eliminate the only use of a value, that's better for subsequent
2320 // transforms/analysis.
2321 if (I0->hasOneUse() && !I1->hasOneUse())
2322 std::swap(I0, I1);
2323
2324 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2325 // operation and potentially its negation.
2326 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2327 Value *Abs = Builder.CreateBinaryIntrinsic(
2328 Intrinsic::abs, I0,
2329 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2330
2331 // We don't have a "nabs" intrinsic, so negate if needed based on the
2332 // max/min operation.
2333 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2334 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2335 return replaceInstUsesWith(CI, Abs);
2336 }
2337
2339 return Sel;
2340
2341 if (Instruction *SAdd = matchSAddSubSat(*II))
2342 return SAdd;
2343
2344 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2345 return replaceInstUsesWith(*II, NewMinMax);
2346
2348 return R;
2349
2350 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2351 return NewMinMax;
2352
2353 // Try to fold minmax with constant RHS based on range information
2354 if (match(I1, m_APIntAllowPoison(RHSC))) {
2355 ICmpInst::Predicate Pred =
2357 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2359 I0, IsSigned, SQ.getWithInstruction(II));
2360 if (!LHS_CR.isFullSet()) {
2361 if (LHS_CR.icmp(Pred, *RHSC))
2362 return replaceInstUsesWith(*II, I0);
2363 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2364 return replaceInstUsesWith(*II,
2365 ConstantInt::get(II->getType(), *RHSC));
2366 }
2367 }
2368
2370 return replaceInstUsesWith(*II, V);
2371
2372 break;
2373 }
2374 case Intrinsic::scmp: {
2375 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2376 Value *LHS, *RHS;
2377 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2378 return replaceInstUsesWith(
2379 CI,
2380 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2381 break;
2382 }
2383 case Intrinsic::bitreverse: {
2384 Value *IIOperand = II->getArgOperand(0);
2385 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2386 Value *X;
2387 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2388 X->getType()->isIntOrIntVectorTy(1)) {
2389 Type *Ty = II->getType();
2390 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2391 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2393 }
2394
2395 if (Instruction *crossLogicOpFold =
2397 return crossLogicOpFold;
2398
2399 break;
2400 }
2401 case Intrinsic::bswap: {
2402 Value *IIOperand = II->getArgOperand(0);
2403
2404 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2405 // inverse-shift-of-bswap:
2406 // bswap (shl X, Y) --> lshr (bswap X), Y
2407 // bswap (lshr X, Y) --> shl (bswap X), Y
2408 Value *X, *Y;
2409 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2410 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2412 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2413 BinaryOperator::BinaryOps InverseShift =
2414 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2415 ? Instruction::LShr
2416 : Instruction::Shl;
2417 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2418 }
2419 }
2420
2421 KnownBits Known = computeKnownBits(IIOperand, II);
2422 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2423 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2424 unsigned BW = Known.getBitWidth();
2425
2426 // bswap(x) -> shift(x) if x has exactly one "active byte"
2427 if (BW - LZ - TZ == 8) {
2428 assert(LZ != TZ && "active byte cannot be in the middle");
2429 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2430 return BinaryOperator::CreateNUWShl(
2431 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2432 // -> lshr(x) if the "active byte" is in the high part of x
2433 return BinaryOperator::CreateExactLShr(
2434 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2435 }
2436
2437 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2438 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2439 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2440 Value *CV = ConstantInt::get(X->getType(), C);
2441 Value *V = Builder.CreateLShr(X, CV);
2442 return new TruncInst(V, IIOperand->getType());
2443 }
2444
2445 if (Instruction *crossLogicOpFold =
2447 return crossLogicOpFold;
2448 }
2449
2450 // Try to fold into bitreverse if bswap is the root of the expression tree.
2451 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2452 /*MatchBitReversals*/ true))
2453 return BitOp;
2454 break;
2455 }
2456 case Intrinsic::masked_load:
2457 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2458 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2459 break;
2460 case Intrinsic::masked_store:
2461 return simplifyMaskedStore(*II);
2462 case Intrinsic::masked_gather:
2463 return simplifyMaskedGather(*II);
2464 case Intrinsic::masked_scatter:
2465 return simplifyMaskedScatter(*II);
2466 case Intrinsic::launder_invariant_group:
2467 case Intrinsic::strip_invariant_group:
2468 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2469 return replaceInstUsesWith(*II, SkippedBarrier);
2470 break;
2471 case Intrinsic::powi:
2472 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2473 // 0 and 1 are handled in instsimplify
2474 // powi(x, -1) -> 1/x
2475 if (Power->isMinusOne())
2476 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2477 II->getArgOperand(0), II);
2478 // powi(x, 2) -> x*x
2479 if (Power->equalsInt(2))
2480 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2481 II->getArgOperand(0), II);
2482
2483 if (!Power->getValue()[0]) {
2484 Value *X;
2485 // If power is even:
2486 // powi(-x, p) -> powi(x, p)
2487 // powi(fabs(x), p) -> powi(x, p)
2488 // powi(copysign(x, y), p) -> powi(x, p)
2489 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2490 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2491 match(II->getArgOperand(0),
2493 return replaceOperand(*II, 0, X);
2494 }
2495 }
2496 break;
2497
2498 case Intrinsic::cttz:
2499 case Intrinsic::ctlz:
2500 if (auto *I = foldCttzCtlz(*II, *this))
2501 return I;
2502 break;
2503
2504 case Intrinsic::ctpop:
2505 if (auto *I = foldCtpop(*II, *this))
2506 return I;
2507 break;
2508
2509 case Intrinsic::fshl:
2510 case Intrinsic::fshr: {
2511 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2512 Type *Ty = II->getType();
2513 unsigned BitWidth = Ty->getScalarSizeInBits();
2514 Constant *ShAmtC;
2515 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2516 // Canonicalize a shift amount constant operand to modulo the bit-width.
2517 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2518 Constant *ModuloC =
2519 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2520 if (!ModuloC)
2521 return nullptr;
2522 if (ModuloC != ShAmtC)
2523 return replaceOperand(*II, 2, ModuloC);
2524
2526 ShAmtC, DL),
2527 m_One()) &&
2528 "Shift amount expected to be modulo bitwidth");
2529
2530 // Canonicalize funnel shift right by constant to funnel shift left. This
2531 // is not entirely arbitrary. For historical reasons, the backend may
2532 // recognize rotate left patterns but miss rotate right patterns.
2533 if (IID == Intrinsic::fshr) {
2534 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2535 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2536 return nullptr;
2537
2538 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2539 Module *Mod = II->getModule();
2540 Function *Fshl =
2541 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2542 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2543 }
2544 assert(IID == Intrinsic::fshl &&
2545 "All funnel shifts by simple constants should go left");
2546
2547 // fshl(X, 0, C) --> shl X, C
2548 // fshl(X, undef, C) --> shl X, C
2549 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2550 return BinaryOperator::CreateShl(Op0, ShAmtC);
2551
2552 // fshl(0, X, C) --> lshr X, (BW-C)
2553 // fshl(undef, X, C) --> lshr X, (BW-C)
2554 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2555 return BinaryOperator::CreateLShr(Op1,
2556 ConstantExpr::getSub(WidthC, ShAmtC));
2557
2558 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2559 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2560 Module *Mod = II->getModule();
2561 Function *Bswap =
2562 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2563 return CallInst::Create(Bswap, { Op0 });
2564 }
2565 if (Instruction *BitOp =
2566 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2567 /*MatchBitReversals*/ true))
2568 return BitOp;
2569
2570 // R = fshl(X, X, C2)
2571 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2572 Value *InnerOp;
2573 const APInt *ShAmtInnerC, *ShAmtOuterC;
2574 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2575 m_APInt(ShAmtInnerC))) &&
2576 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2577 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2578 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2579 if (Modulo.isZero())
2580 return replaceInstUsesWith(*II, InnerOp);
2581 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2583 {InnerOp, InnerOp, ModuloC});
2584 }
2585 }
2586
2587 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2588 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2589 // if BitWidth is a power-of-2
2590 Value *Y;
2591 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2592 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2593 Module *Mod = II->getModule();
2595 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2596 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2597 }
2598
2599 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2600 // power-of-2
2601 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2602 match(Op1, m_ZeroInt())) {
2603 Value *Op2 = II->getArgOperand(2);
2604 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2605 return BinaryOperator::CreateShl(Op0, And);
2606 }
2607
2608 // Left or right might be masked.
2610 return &CI;
2611
2612 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2613 // so only the low bits of the shift amount are demanded if the bitwidth is
2614 // a power-of-2.
2615 if (!isPowerOf2_32(BitWidth))
2616 break;
2618 KnownBits Op2Known(BitWidth);
2619 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2620 return &CI;
2621 break;
2622 }
2623 case Intrinsic::ptrmask: {
2624 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2625 KnownBits Known(BitWidth);
2627 return II;
2628
2629 Value *InnerPtr, *InnerMask;
2630 bool Changed = false;
2631 // Combine:
2632 // (ptrmask (ptrmask p, A), B)
2633 // -> (ptrmask p, (and A, B))
2634 if (match(II->getArgOperand(0),
2636 m_Value(InnerMask))))) {
2637 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2638 "Mask types must match");
2639 // TODO: If InnerMask == Op1, we could copy attributes from inner
2640 // callsite -> outer callsite.
2641 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2642 replaceOperand(CI, 0, InnerPtr);
2643 replaceOperand(CI, 1, NewMask);
2644 Changed = true;
2645 }
2646
2647 // See if we can deduce non-null.
2648 if (!CI.hasRetAttr(Attribute::NonNull) &&
2649 (Known.isNonZero() ||
2650 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2651 CI.addRetAttr(Attribute::NonNull);
2652 Changed = true;
2653 }
2654
2655 unsigned NewAlignmentLog =
2657 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2658 // Known bits will capture if we had alignment information associated with
2659 // the pointer argument.
2660 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2662 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2663 Changed = true;
2664 }
2665 if (Changed)
2666 return &CI;
2667 break;
2668 }
2669 case Intrinsic::uadd_with_overflow:
2670 case Intrinsic::sadd_with_overflow: {
2671 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2672 return I;
2673
2674 // Given 2 constant operands whose sum does not overflow:
2675 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2676 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2677 Value *X;
2678 const APInt *C0, *C1;
2679 Value *Arg0 = II->getArgOperand(0);
2680 Value *Arg1 = II->getArgOperand(1);
2681 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2682 bool HasNWAdd = IsSigned
2683 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2684 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2685 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2686 bool Overflow;
2687 APInt NewC =
2688 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2689 if (!Overflow)
2690 return replaceInstUsesWith(
2691 *II, Builder.CreateBinaryIntrinsic(
2692 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2693 }
2694 break;
2695 }
2696
2697 case Intrinsic::umul_with_overflow:
2698 case Intrinsic::smul_with_overflow:
2699 case Intrinsic::usub_with_overflow:
2700 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2701 return I;
2702 break;
2703
2704 case Intrinsic::ssub_with_overflow: {
2705 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2706 return I;
2707
2708 Constant *C;
2709 Value *Arg0 = II->getArgOperand(0);
2710 Value *Arg1 = II->getArgOperand(1);
2711 // Given a constant C that is not the minimum signed value
2712 // for an integer of a given bit width:
2713 //
2714 // ssubo X, C -> saddo X, -C
2715 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2716 Value *NegVal = ConstantExpr::getNeg(C);
2717 // Build a saddo call that is equivalent to the discovered
2718 // ssubo call.
2719 return replaceInstUsesWith(
2720 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2721 Arg0, NegVal));
2722 }
2723
2724 break;
2725 }
2726
2727 case Intrinsic::uadd_sat:
2728 case Intrinsic::sadd_sat:
2729 case Intrinsic::usub_sat:
2730 case Intrinsic::ssub_sat: {
2732 Type *Ty = SI->getType();
2733 Value *Arg0 = SI->getLHS();
2734 Value *Arg1 = SI->getRHS();
2735
2736 // Make use of known overflow information.
2737 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2738 Arg0, Arg1, SI);
2739 switch (OR) {
2741 break;
2743 if (SI->isSigned())
2744 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2745 else
2746 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2748 unsigned BitWidth = Ty->getScalarSizeInBits();
2749 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2750 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2751 }
2753 unsigned BitWidth = Ty->getScalarSizeInBits();
2754 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2755 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2756 }
2757 }
2758
2759 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2760 // which after that:
2761 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2762 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2763 Constant *C, *C1;
2764 Value *A;
2765 if (IID == Intrinsic::usub_sat &&
2766 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2767 match(Arg1, m_ImmConstant(C1))) {
2768 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2769 auto *NewSub =
2770 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2771 return replaceInstUsesWith(*SI, NewSub);
2772 }
2773
2774 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2775 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2776 C->isNotMinSignedValue()) {
2777 Value *NegVal = ConstantExpr::getNeg(C);
2778 return replaceInstUsesWith(
2779 *II, Builder.CreateBinaryIntrinsic(
2780 Intrinsic::sadd_sat, Arg0, NegVal));
2781 }
2782
2783 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2784 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2785 // if Val and Val2 have the same sign
2786 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2787 Value *X;
2788 const APInt *Val, *Val2;
2789 APInt NewVal;
2790 bool IsUnsigned =
2791 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2792 if (Other->getIntrinsicID() == IID &&
2793 match(Arg1, m_APInt(Val)) &&
2794 match(Other->getArgOperand(0), m_Value(X)) &&
2795 match(Other->getArgOperand(1), m_APInt(Val2))) {
2796 if (IsUnsigned)
2797 NewVal = Val->uadd_sat(*Val2);
2798 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2799 bool Overflow;
2800 NewVal = Val->sadd_ov(*Val2, Overflow);
2801 if (Overflow) {
2802 // Both adds together may add more than SignedMaxValue
2803 // without saturating the final result.
2804 break;
2805 }
2806 } else {
2807 // Cannot fold saturated addition with different signs.
2808 break;
2809 }
2810
2811 return replaceInstUsesWith(
2812 *II, Builder.CreateBinaryIntrinsic(
2813 IID, X, ConstantInt::get(II->getType(), NewVal)));
2814 }
2815 }
2816 break;
2817 }
2818
2819 case Intrinsic::minnum:
2820 case Intrinsic::maxnum:
2821 case Intrinsic::minimumnum:
2822 case Intrinsic::maximumnum:
2823 case Intrinsic::minimum:
2824 case Intrinsic::maximum: {
2825 Value *Arg0 = II->getArgOperand(0);
2826 Value *Arg1 = II->getArgOperand(1);
2827 Value *X, *Y;
2828 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2829 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2830 // If both operands are negated, invert the call and negate the result:
2831 // min(-X, -Y) --> -(max(X, Y))
2832 // max(-X, -Y) --> -(min(X, Y))
2833 Intrinsic::ID NewIID;
2834 switch (IID) {
2835 case Intrinsic::maxnum:
2836 NewIID = Intrinsic::minnum;
2837 break;
2838 case Intrinsic::minnum:
2839 NewIID = Intrinsic::maxnum;
2840 break;
2841 case Intrinsic::maximumnum:
2842 NewIID = Intrinsic::minimumnum;
2843 break;
2844 case Intrinsic::minimumnum:
2845 NewIID = Intrinsic::maximumnum;
2846 break;
2847 case Intrinsic::maximum:
2848 NewIID = Intrinsic::minimum;
2849 break;
2850 case Intrinsic::minimum:
2851 NewIID = Intrinsic::maximum;
2852 break;
2853 default:
2854 llvm_unreachable("unexpected intrinsic ID");
2855 }
2856 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2857 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2858 FNeg->copyIRFlags(II);
2859 return FNeg;
2860 }
2861
2862 // m(m(X, C2), C1) -> m(X, C)
2863 const APFloat *C1, *C2;
2864 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2865 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2866 ((match(M->getArgOperand(0), m_Value(X)) &&
2867 match(M->getArgOperand(1), m_APFloat(C2))) ||
2868 (match(M->getArgOperand(1), m_Value(X)) &&
2869 match(M->getArgOperand(0), m_APFloat(C2))))) {
2870 APFloat Res(0.0);
2871 switch (IID) {
2872 case Intrinsic::maxnum:
2873 Res = maxnum(*C1, *C2);
2874 break;
2875 case Intrinsic::minnum:
2876 Res = minnum(*C1, *C2);
2877 break;
2878 case Intrinsic::maximumnum:
2879 Res = maximumnum(*C1, *C2);
2880 break;
2881 case Intrinsic::minimumnum:
2882 Res = minimumnum(*C1, *C2);
2883 break;
2884 case Intrinsic::maximum:
2885 Res = maximum(*C1, *C2);
2886 break;
2887 case Intrinsic::minimum:
2888 Res = minimum(*C1, *C2);
2889 break;
2890 default:
2891 llvm_unreachable("unexpected intrinsic ID");
2892 }
2893 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2894 // was a simplification (so Arg0 and its original flags could
2895 // propagate?)
2896 Value *V = Builder.CreateBinaryIntrinsic(
2897 IID, X, ConstantFP::get(Arg0->getType(), Res),
2899 return replaceInstUsesWith(*II, V);
2900 }
2901 }
2902
2903 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2904 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2905 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2906 X->getType() == Y->getType()) {
2907 Value *NewCall =
2908 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2909 return new FPExtInst(NewCall, II->getType());
2910 }
2911
2912 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2913 Constant *C;
2914 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2915 match(Arg1, m_ImmConstant(C))) {
2916 if (Constant *TruncC =
2917 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2918 Value *NewCall =
2919 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2920 return new FPExtInst(NewCall, II->getType());
2921 }
2922 }
2923
2924 // max X, -X --> fabs X
2925 // min X, -X --> -(fabs X)
2926 // TODO: Remove one-use limitation? That is obviously better for max,
2927 // hence why we don't check for one-use for that. However,
2928 // it would be an extra instruction for min (fnabs), but
2929 // that is still likely better for analysis and codegen.
2930 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2931 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2932 return Op0->hasOneUse() ||
2933 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2934 IID != Intrinsic::minimumnum);
2935 return false;
2936 };
2937
2938 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2939 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2940 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2941 IID == Intrinsic::minimumnum)
2942 R = Builder.CreateFNegFMF(R, II);
2943 return replaceInstUsesWith(*II, R);
2944 }
2945
2946 break;
2947 }
2948 case Intrinsic::matrix_multiply: {
2949 // Optimize negation in matrix multiplication.
2950
2951 // -A * -B -> A * B
2952 Value *A, *B;
2953 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2954 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2955 replaceOperand(*II, 0, A);
2956 replaceOperand(*II, 1, B);
2957 return II;
2958 }
2959
2960 Value *Op0 = II->getOperand(0);
2961 Value *Op1 = II->getOperand(1);
2962 Value *OpNotNeg, *NegatedOp;
2963 unsigned NegatedOpArg, OtherOpArg;
2964 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2965 NegatedOp = Op0;
2966 NegatedOpArg = 0;
2967 OtherOpArg = 1;
2968 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2969 NegatedOp = Op1;
2970 NegatedOpArg = 1;
2971 OtherOpArg = 0;
2972 } else
2973 // Multiplication doesn't have a negated operand.
2974 break;
2975
2976 // Only optimize if the negated operand has only one use.
2977 if (!NegatedOp->hasOneUse())
2978 break;
2979
2980 Value *OtherOp = II->getOperand(OtherOpArg);
2981 VectorType *RetTy = cast<VectorType>(II->getType());
2982 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2983 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2984 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2985 ElementCount OtherCount = OtherOpTy->getElementCount();
2986 ElementCount RetCount = RetTy->getElementCount();
2987 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2988 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2989 ElementCount::isKnownLT(OtherCount, RetCount)) {
2990 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2991 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2992 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2993 return II;
2994 }
2995 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2996 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2997 SmallVector<Value *, 5> NewArgs(II->args());
2998 NewArgs[NegatedOpArg] = OpNotNeg;
2999 Instruction *NewMul =
3000 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3001 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3002 }
3003 break;
3004 }
3005 case Intrinsic::fmuladd: {
3006 // Try to simplify the underlying FMul.
3007 if (Value *V =
3008 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3009 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3010 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3011 II->getFastMathFlags());
3012
3013 [[fallthrough]];
3014 }
3015 case Intrinsic::fma: {
3016 // fma fneg(x), fneg(y), z -> fma x, y, z
3017 Value *Src0 = II->getArgOperand(0);
3018 Value *Src1 = II->getArgOperand(1);
3019 Value *Src2 = II->getArgOperand(2);
3020 Value *X, *Y;
3021 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3022 replaceOperand(*II, 0, X);
3023 replaceOperand(*II, 1, Y);
3024 return II;
3025 }
3026
3027 // fma fabs(x), fabs(x), z -> fma x, x, z
3028 if (match(Src0, m_FAbs(m_Value(X))) &&
3029 match(Src1, m_FAbs(m_Specific(X)))) {
3030 replaceOperand(*II, 0, X);
3031 replaceOperand(*II, 1, X);
3032 return II;
3033 }
3034
3035 // Try to simplify the underlying FMul. We can only apply simplifications
3036 // that do not require rounding.
3037 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3038 SQ.getWithInstruction(II)))
3039 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3040
3041 // fma x, y, 0 -> fmul x, y
3042 // This is always valid for -0.0, but requires nsz for +0.0 as
3043 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3044 if (match(Src2, m_NegZeroFP()) ||
3045 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3046 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3047
3048 // fma x, -1.0, y -> fsub y, x
3049 if (match(Src1, m_SpecificFP(-1.0)))
3050 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3051
3052 break;
3053 }
3054 case Intrinsic::copysign: {
3055 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3056 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3057 Sign, getSimplifyQuery().getWithInstruction(II))) {
3058 if (*KnownSignBit) {
3059 // If we know that the sign argument is negative, reduce to FNABS:
3060 // copysign Mag, -Sign --> fneg (fabs Mag)
3061 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3062 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3063 }
3064
3065 // If we know that the sign argument is positive, reduce to FABS:
3066 // copysign Mag, +Sign --> fabs Mag
3067 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3068 return replaceInstUsesWith(*II, Fabs);
3069 }
3070
3071 // Propagate sign argument through nested calls:
3072 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3073 Value *X;
3075 Value *CopySign =
3076 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3077 return replaceInstUsesWith(*II, CopySign);
3078 }
3079
3080 // Clear sign-bit of constant magnitude:
3081 // copysign -MagC, X --> copysign MagC, X
3082 // TODO: Support constant folding for fabs
3083 const APFloat *MagC;
3084 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3085 APFloat PosMagC = *MagC;
3086 PosMagC.clearSign();
3087 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3088 }
3089
3090 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3091 // copysign (fabs X), Sign --> copysign X, Sign
3092 // copysign (fneg X), Sign --> copysign X, Sign
3093 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3094 return replaceOperand(*II, 0, X);
3095
3096 Type *SignEltTy = Sign->getType()->getScalarType();
3097
3098 Value *CastSrc;
3099 if (match(Sign,
3101 CastSrc->getType()->isIntOrIntVectorTy() &&
3103 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3105 APInt::getSignMask(Known.getBitWidth()), Known,
3106 SQ))
3107 return II;
3108 }
3109
3110 break;
3111 }
3112 case Intrinsic::fabs: {
3113 Value *Cond, *TVal, *FVal;
3114 Value *Arg = II->getArgOperand(0);
3115 Value *X;
3116 // fabs (-X) --> fabs (X)
3117 if (match(Arg, m_FNeg(m_Value(X)))) {
3118 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
3119 return replaceInstUsesWith(CI, Fabs);
3120 }
3121
3122 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3123 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3124 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3125 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3126 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3127 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3128 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3129 FastMathFlags FMF1 = II->getFastMathFlags();
3130 FastMathFlags FMF2 = cast<SelectInst>(Arg)->getFastMathFlags();
3131 FMF2.setNoSignedZeros(false);
3132 SI->setFastMathFlags(FMF1 | FMF2);
3133 return SI;
3134 }
3135 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3136 if (match(TVal, m_FNeg(m_Specific(FVal))))
3137 return replaceOperand(*II, 0, FVal);
3138 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3139 if (match(FVal, m_FNeg(m_Specific(TVal))))
3140 return replaceOperand(*II, 0, TVal);
3141 }
3142
3143 Value *Magnitude, *Sign;
3144 if (match(II->getArgOperand(0),
3145 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3146 // fabs (copysign x, y) -> (fabs x)
3147 CallInst *AbsSign =
3148 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
3149 return replaceInstUsesWith(*II, AbsSign);
3150 }
3151
3152 [[fallthrough]];
3153 }
3154 case Intrinsic::ceil:
3155 case Intrinsic::floor:
3156 case Intrinsic::round:
3157 case Intrinsic::roundeven:
3158 case Intrinsic::nearbyint:
3159 case Intrinsic::rint:
3160 case Intrinsic::trunc: {
3161 Value *ExtSrc;
3162 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3163 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3164 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3165 return new FPExtInst(NarrowII, II->getType());
3166 }
3167 break;
3168 }
3169 case Intrinsic::cos:
3170 case Intrinsic::amdgcn_cos:
3171 case Intrinsic::cosh: {
3172 Value *X, *Sign;
3173 Value *Src = II->getArgOperand(0);
3174 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3175 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3176 // f(-x) --> f(x)
3177 // f(fabs(x)) --> f(x)
3178 // f(copysign(x, y)) --> f(x)
3179 // for f in {cos, cosh}
3180 return replaceOperand(*II, 0, X);
3181 }
3182 break;
3183 }
3184 case Intrinsic::sin:
3185 case Intrinsic::amdgcn_sin:
3186 case Intrinsic::sinh:
3187 case Intrinsic::tan:
3188 case Intrinsic::tanh: {
3189 Value *X;
3190 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3191 // f(-x) --> -f(x)
3192 // for f in {sin, sinh, tan, tanh}
3193 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3194 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3195 }
3196 break;
3197 }
3198 case Intrinsic::ldexp: {
3199 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3200 //
3201 // The danger is if the first ldexp would overflow to infinity or underflow
3202 // to zero, but the combined exponent avoids it. We ignore this with
3203 // reassoc.
3204 //
3205 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3206 // it would just double down on the overflow/underflow which would occur
3207 // anyway.
3208 //
3209 // TODO: Could do better if we had range tracking for the input value
3210 // exponent. Also could broaden sign check to cover == 0 case.
3211 Value *Src = II->getArgOperand(0);
3212 Value *Exp = II->getArgOperand(1);
3213
3214 uint64_t ConstExp;
3215 if (match(Exp, m_ConstantInt(ConstExp))) {
3216 // ldexp(x, K) -> fmul x, 2^K
3217 const fltSemantics &FPTy =
3218 Src->getType()->getScalarType()->getFltSemantics();
3219
3220 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3222 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3223 // Skip overflow and underflow cases.
3224 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3225 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3226 }
3227 }
3228
3229 Value *InnerSrc;
3230 Value *InnerExp;
3232 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3233 Exp->getType() == InnerExp->getType()) {
3234 FastMathFlags FMF = II->getFastMathFlags();
3235 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3236
3237 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3238 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
3239 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3240 // width.
3241 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3242 II->setArgOperand(1, NewExp);
3243 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3244 return replaceOperand(*II, 0, InnerSrc);
3245 }
3246 }
3247
3248 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3249 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3250 Value *ExtSrc;
3251 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3252 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3253 Value *Select =
3254 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3255 ConstantFP::get(II->getType(), 1.0));
3257 }
3258 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3259 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3260 Value *Select =
3261 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3262 ConstantFP::get(II->getType(), 1.0));
3264 }
3265
3266 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3267 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3268 ///
3269 // TODO: If we cared, should insert a canonicalize for x
3270 Value *SelectCond, *SelectLHS, *SelectRHS;
3271 if (match(II->getArgOperand(1),
3272 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3273 m_Value(SelectRHS))))) {
3274 Value *NewLdexp = nullptr;
3275 Value *Select = nullptr;
3276 if (match(SelectRHS, m_ZeroInt())) {
3277 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3278 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3279 } else if (match(SelectLHS, m_ZeroInt())) {
3280 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3281 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3282 }
3283
3284 if (NewLdexp) {
3285 Select->takeName(II);
3286 return replaceInstUsesWith(*II, Select);
3287 }
3288 }
3289
3290 break;
3291 }
3292 case Intrinsic::ptrauth_auth:
3293 case Intrinsic::ptrauth_resign: {
3294 // We don't support this optimization on intrinsic calls with deactivation
3295 // symbols, which are represented using operand bundles.
3296 if (II->hasOperandBundles())
3297 break;
3298
3299 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3300 // sign+auth component if the key and discriminator match.
3301 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3302 Value *Ptr = II->getArgOperand(0);
3303 Value *Key = II->getArgOperand(1);
3304 Value *Disc = II->getArgOperand(2);
3305
3306 // AuthKey will be the key we need to end up authenticating against in
3307 // whatever we replace this sequence with.
3308 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3309 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3310 // We don't support this optimization on intrinsic calls with deactivation
3311 // symbols, which are represented using operand bundles.
3312 if (CI->hasOperandBundles())
3313 break;
3314
3315 BasePtr = CI->getArgOperand(0);
3316 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3317 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3318 break;
3319 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3320 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3321 break;
3322 AuthKey = CI->getArgOperand(1);
3323 AuthDisc = CI->getArgOperand(2);
3324 } else
3325 break;
3326 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3327 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3328 // our purposes, so check for that too.
3329 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3330 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3331 break;
3332
3333 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3334 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3335 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3336 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3337 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3338 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3339 SignDisc, /*AddrDisc=*/Null,
3340 /*DeactivationSymbol=*/Null);
3342 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3343 return eraseInstFromFunction(*II);
3344 }
3345
3346 // auth(ptrauth(p,k,d),k,d) -> p
3347 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3348 } else
3349 break;
3350
3351 unsigned NewIntrin;
3352 if (AuthKey && NeedSign) {
3353 // resign(0,1) + resign(1,2) = resign(0, 2)
3354 NewIntrin = Intrinsic::ptrauth_resign;
3355 } else if (AuthKey) {
3356 // resign(0,1) + auth(1) = auth(0)
3357 NewIntrin = Intrinsic::ptrauth_auth;
3358 } else if (NeedSign) {
3359 // sign(0) + resign(0, 1) = sign(1)
3360 NewIntrin = Intrinsic::ptrauth_sign;
3361 } else {
3362 // sign(0) + auth(0) = nop
3363 replaceInstUsesWith(*II, BasePtr);
3364 return eraseInstFromFunction(*II);
3365 }
3366
3367 SmallVector<Value *, 4> CallArgs;
3368 CallArgs.push_back(BasePtr);
3369 if (AuthKey) {
3370 CallArgs.push_back(AuthKey);
3371 CallArgs.push_back(AuthDisc);
3372 }
3373
3374 if (NeedSign) {
3375 CallArgs.push_back(II->getArgOperand(3));
3376 CallArgs.push_back(II->getArgOperand(4));
3377 }
3378
3379 Function *NewFn =
3380 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3381 return CallInst::Create(NewFn, CallArgs);
3382 }
3383 case Intrinsic::arm_neon_vtbl1:
3384 case Intrinsic::arm_neon_vtbl2:
3385 case Intrinsic::arm_neon_vtbl3:
3386 case Intrinsic::arm_neon_vtbl4:
3387 case Intrinsic::aarch64_neon_tbl1:
3388 case Intrinsic::aarch64_neon_tbl2:
3389 case Intrinsic::aarch64_neon_tbl3:
3390 case Intrinsic::aarch64_neon_tbl4:
3391 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3392 case Intrinsic::arm_neon_vtbx1:
3393 case Intrinsic::arm_neon_vtbx2:
3394 case Intrinsic::arm_neon_vtbx3:
3395 case Intrinsic::arm_neon_vtbx4:
3396 case Intrinsic::aarch64_neon_tbx1:
3397 case Intrinsic::aarch64_neon_tbx2:
3398 case Intrinsic::aarch64_neon_tbx3:
3399 case Intrinsic::aarch64_neon_tbx4:
3400 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3401
3402 case Intrinsic::arm_neon_vmulls:
3403 case Intrinsic::arm_neon_vmullu:
3404 case Intrinsic::aarch64_neon_smull:
3405 case Intrinsic::aarch64_neon_umull: {
3406 Value *Arg0 = II->getArgOperand(0);
3407 Value *Arg1 = II->getArgOperand(1);
3408
3409 // Handle mul by zero first:
3411 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3412 }
3413
3414 // Check for constant LHS & RHS - in this case we just simplify.
3415 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3416 IID == Intrinsic::aarch64_neon_umull);
3417 VectorType *NewVT = cast<VectorType>(II->getType());
3418 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3419 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3420 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3421 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3422 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3423 }
3424
3425 // Couldn't simplify - canonicalize constant to the RHS.
3426 std::swap(Arg0, Arg1);
3427 }
3428
3429 // Handle mul by one:
3430 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3431 if (ConstantInt *Splat =
3432 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3433 if (Splat->isOne())
3434 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3435 /*isSigned=*/!Zext);
3436
3437 break;
3438 }
3439 case Intrinsic::arm_neon_aesd:
3440 case Intrinsic::arm_neon_aese:
3441 case Intrinsic::aarch64_crypto_aesd:
3442 case Intrinsic::aarch64_crypto_aese:
3443 case Intrinsic::aarch64_sve_aesd:
3444 case Intrinsic::aarch64_sve_aese: {
3445 Value *DataArg = II->getArgOperand(0);
3446 Value *KeyArg = II->getArgOperand(1);
3447
3448 // Accept zero on either operand.
3449 if (!match(KeyArg, m_ZeroInt()))
3450 std::swap(KeyArg, DataArg);
3451
3452 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3453 Value *Data, *Key;
3454 if (match(KeyArg, m_ZeroInt()) &&
3455 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3456 replaceOperand(*II, 0, Data);
3457 replaceOperand(*II, 1, Key);
3458 return II;
3459 }
3460 break;
3461 }
3462 case Intrinsic::arm_neon_vshifts:
3463 case Intrinsic::arm_neon_vshiftu:
3464 case Intrinsic::aarch64_neon_sshl:
3465 case Intrinsic::aarch64_neon_ushl:
3466 return foldNeonShift(II, *this);
3467 case Intrinsic::hexagon_V6_vandvrt:
3468 case Intrinsic::hexagon_V6_vandvrt_128B: {
3469 // Simplify Q -> V -> Q conversion.
3470 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3471 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3472 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3473 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3474 break;
3475 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3476 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3477 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3478 // Check if every byte has common bits in Bytes and Mask.
3479 uint64_t C = Bytes1 & Mask1;
3480 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3481 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3482 }
3483 break;
3484 }
3485 case Intrinsic::stackrestore: {
3486 enum class ClassifyResult {
3487 None,
3488 Alloca,
3489 StackRestore,
3490 CallWithSideEffects,
3491 };
3492 auto Classify = [](const Instruction *I) {
3493 if (isa<AllocaInst>(I))
3494 return ClassifyResult::Alloca;
3495
3496 if (auto *CI = dyn_cast<CallInst>(I)) {
3497 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3498 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3499 return ClassifyResult::StackRestore;
3500
3501 if (II->mayHaveSideEffects())
3502 return ClassifyResult::CallWithSideEffects;
3503 } else {
3504 // Consider all non-intrinsic calls to be side effects
3505 return ClassifyResult::CallWithSideEffects;
3506 }
3507 }
3508
3509 return ClassifyResult::None;
3510 };
3511
3512 // If the stacksave and the stackrestore are in the same BB, and there is
3513 // no intervening call, alloca, or stackrestore of a different stacksave,
3514 // remove the restore. This can happen when variable allocas are DCE'd.
3515 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3516 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3517 SS->getParent() == II->getParent()) {
3518 BasicBlock::iterator BI(SS);
3519 bool CannotRemove = false;
3520 for (++BI; &*BI != II; ++BI) {
3521 switch (Classify(&*BI)) {
3522 case ClassifyResult::None:
3523 // So far so good, look at next instructions.
3524 break;
3525
3526 case ClassifyResult::StackRestore:
3527 // If we found an intervening stackrestore for a different
3528 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3529 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3530 CannotRemove = true;
3531 break;
3532
3533 case ClassifyResult::Alloca:
3534 case ClassifyResult::CallWithSideEffects:
3535 // If we found an alloca, a non-intrinsic call, or an intrinsic
3536 // call with side effects, we can't remove the stackrestore.
3537 CannotRemove = true;
3538 break;
3539 }
3540 if (CannotRemove)
3541 break;
3542 }
3543
3544 if (!CannotRemove)
3545 return eraseInstFromFunction(CI);
3546 }
3547 }
3548
3549 // Scan down this block to see if there is another stack restore in the
3550 // same block without an intervening call/alloca.
3552 Instruction *TI = II->getParent()->getTerminator();
3553 bool CannotRemove = false;
3554 for (++BI; &*BI != TI; ++BI) {
3555 switch (Classify(&*BI)) {
3556 case ClassifyResult::None:
3557 // So far so good, look at next instructions.
3558 break;
3559
3560 case ClassifyResult::StackRestore:
3561 // If there is a stackrestore below this one, remove this one.
3562 return eraseInstFromFunction(CI);
3563
3564 case ClassifyResult::Alloca:
3565 case ClassifyResult::CallWithSideEffects:
3566 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3567 // with side effects (such as llvm.stacksave and llvm.read_register),
3568 // we can't remove the stack restore.
3569 CannotRemove = true;
3570 break;
3571 }
3572 if (CannotRemove)
3573 break;
3574 }
3575
3576 // If the stack restore is in a return, resume, or unwind block and if there
3577 // are no allocas or calls between the restore and the return, nuke the
3578 // restore.
3579 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3580 return eraseInstFromFunction(CI);
3581 break;
3582 }
3583 case Intrinsic::lifetime_end:
3584 // Asan needs to poison memory to detect invalid access which is possible
3585 // even for empty lifetime range.
3586 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3587 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3588 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3589 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3590 break;
3591
3592 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3593 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3594 }))
3595 return nullptr;
3596 break;
3597 case Intrinsic::assume: {
3598 Value *IIOperand = II->getArgOperand(0);
3600 II->getOperandBundlesAsDefs(OpBundles);
3601
3602 /// This will remove the boolean Condition from the assume given as
3603 /// argument and remove the assume if it becomes useless.
3604 /// always returns nullptr for use as a return values.
3605 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3606 assert(isa<AssumeInst>(Assume));
3608 return eraseInstFromFunction(CI);
3609 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3610 return nullptr;
3611 };
3612 // Remove an assume if it is followed by an identical assume.
3613 // TODO: Do we need this? Unless there are conflicting assumptions, the
3614 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3615 Instruction *Next = II->getNextNode();
3617 return RemoveConditionFromAssume(Next);
3618
3619 // Canonicalize assume(a && b) -> assume(a); assume(b);
3620 // Note: New assumption intrinsics created here are registered by
3621 // the InstCombineIRInserter object.
3622 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3623 Value *AssumeIntrinsic = II->getCalledOperand();
3624 Value *A, *B;
3625 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3626 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3627 II->getName());
3628 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3629 return eraseInstFromFunction(*II);
3630 }
3631 // assume(!(a || b)) -> assume(!a); assume(!b);
3632 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3633 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3634 Builder.CreateNot(A), OpBundles, II->getName());
3635 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3636 Builder.CreateNot(B), II->getName());
3637 return eraseInstFromFunction(*II);
3638 }
3639
3640 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3641 // (if assume is valid at the load)
3642 Instruction *LHS;
3644 m_Zero())) &&
3645 LHS->getOpcode() == Instruction::Load &&
3646 LHS->getType()->isPointerTy() &&
3647 isValidAssumeForContext(II, LHS, &DT)) {
3648 MDNode *MD = MDNode::get(II->getContext(), {});
3649 LHS->setMetadata(LLVMContext::MD_nonnull, MD);
3650 LHS->setMetadata(LLVMContext::MD_noundef, MD);
3651 return RemoveConditionFromAssume(II);
3652
3653 // TODO: apply nonnull return attributes to calls and invokes
3654 // TODO: apply range metadata for range check patterns?
3655 }
3656
3657 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3658 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3659
3660 // Separate storage assumptions apply to the underlying allocations, not
3661 // any particular pointer within them. When evaluating the hints for AA
3662 // purposes we getUnderlyingObject them; by precomputing the answers here
3663 // we can avoid having to do so repeatedly there.
3664 if (OBU.getTagName() == "separate_storage") {
3665 assert(OBU.Inputs.size() == 2);
3666 auto MaybeSimplifyHint = [&](const Use &U) {
3667 Value *Hint = U.get();
3668 // Not having a limit is safe because InstCombine removes unreachable
3669 // code.
3670 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3671 if (Hint != UnderlyingObject)
3672 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3673 };
3674 MaybeSimplifyHint(OBU.Inputs[0]);
3675 MaybeSimplifyHint(OBU.Inputs[1]);
3676 }
3677
3678 // Try to remove redundant alignment assumptions.
3679 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3681 *cast<AssumeInst>(II), II->arg_size() + Idx);
3682 if (!RK || RK.AttrKind != Attribute::Alignment ||
3684 continue;
3685
3686 // Remove align 1 bundles; they don't add any useful information.
3687 if (RK.ArgValue == 1)
3689
3690 // Don't try to remove align assumptions for pointers derived from
3691 // arguments. We might lose information if the function gets inline and
3692 // the align argument attribute disappears.
3694 if (!UO || isa<Argument>(UO))
3695 continue;
3696
3697 // Compute known bits for the pointer, passing nullptr as context to
3698 // avoid computeKnownBits using the assumption we are about to remove
3699 // for reasoning.
3700 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3701 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3703 if ((1ULL << TZ) < RK.ArgValue)
3704 continue;
3706 }
3707
3708 if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) {
3710 *cast<AssumeInst>(II), II->arg_size() + Idx);
3711 if (!RK || RK.AttrKind != Attribute::NonNull ||
3713 getSimplifyQuery().getWithInstruction(II)))
3714 continue;
3716 }
3717 }
3718
3719 // Convert nonnull assume like:
3720 // %A = icmp ne i32* %PTR, null
3721 // call void @llvm.assume(i1 %A)
3722 // into
3723 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3725 match(IIOperand,
3727 A->getType()->isPointerTy()) {
3728 if (auto *Replacement = buildAssumeFromKnowledge(
3729 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3730
3731 Replacement->insertBefore(Next->getIterator());
3732 AC.registerAssumption(Replacement);
3733 return RemoveConditionFromAssume(II);
3734 }
3735 }
3736
3737 // Convert alignment assume like:
3738 // %B = ptrtoint i32* %A to i64
3739 // %C = and i64 %B, Constant
3740 // %D = icmp eq i64 %C, 0
3741 // call void @llvm.assume(i1 %D)
3742 // into
3743 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3744 uint64_t AlignMask = 1;
3745 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3746 match(IIOperand,
3748 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3749 m_Zero())))) {
3750 if (isPowerOf2_64(AlignMask + 1)) {
3751 uint64_t Offset = 0;
3753 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3754 /// Note: this doesn't preserve the offset information but merges
3755 /// offset and alignment.
3756 /// TODO: we can generate a GEP instead of merging the alignment with
3757 /// the offset.
3758 RetainedKnowledge RK{Attribute::Alignment,
3759 MinAlign(Offset, AlignMask + 1), A};
3760 if (auto *Replacement =
3762
3763 Replacement->insertAfter(II->getIterator());
3764 AC.registerAssumption(Replacement);
3765 }
3766 return RemoveConditionFromAssume(II);
3767 }
3768 }
3769 }
3770
3771 /// Canonicalize Knowledge in operand bundles.
3772 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3773 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3774 auto &BOI = II->bundle_op_info_begin()[Idx];
3777 if (BOI.End - BOI.Begin > 2)
3778 continue; // Prevent reducing knowledge in an align with offset since
3779 // extracting a RetainedKnowledge from them looses offset
3780 // information
3781 RetainedKnowledge CanonRK =
3784 &getDominatorTree());
3785 if (CanonRK == RK)
3786 continue;
3787 if (!CanonRK) {
3788 if (BOI.End - BOI.Begin > 0) {
3789 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3790 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3791 }
3792 continue;
3793 }
3794 assert(RK.AttrKind == CanonRK.AttrKind);
3795 if (BOI.End - BOI.Begin > 0)
3796 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3797 if (BOI.End - BOI.Begin > 1)
3798 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3799 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3800 if (RK.WasOn)
3801 Worklist.pushValue(RK.WasOn);
3802 return II;
3803 }
3804 }
3805
3806 // If there is a dominating assume with the same condition as this one,
3807 // then this one is redundant, and should be removed.
3808 KnownBits Known(1);
3809 computeKnownBits(IIOperand, Known, II);
3811 return eraseInstFromFunction(*II);
3812
3813 // assume(false) is unreachable.
3814 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3816 return eraseInstFromFunction(*II);
3817 }
3818
3819 // Update the cache of affected values for this assumption (we might be
3820 // here because we just simplified the condition).
3821 AC.updateAffectedValues(cast<AssumeInst>(II));
3822 break;
3823 }
3824 case Intrinsic::experimental_guard: {
3825 // Is this guard followed by another guard? We scan forward over a small
3826 // fixed window of instructions to handle common cases with conditions
3827 // computed between guards.
3828 Instruction *NextInst = II->getNextNode();
3829 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3830 // Note: Using context-free form to avoid compile time blow up
3831 if (!isSafeToSpeculativelyExecute(NextInst))
3832 break;
3833 NextInst = NextInst->getNextNode();
3834 }
3835 Value *NextCond = nullptr;
3836 if (match(NextInst,
3838 Value *CurrCond = II->getArgOperand(0);
3839
3840 // Remove a guard that it is immediately preceded by an identical guard.
3841 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3842 if (CurrCond != NextCond) {
3843 Instruction *MoveI = II->getNextNode();
3844 while (MoveI != NextInst) {
3845 auto *Temp = MoveI;
3846 MoveI = MoveI->getNextNode();
3847 Temp->moveBefore(II->getIterator());
3848 }
3849 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3850 }
3851 eraseInstFromFunction(*NextInst);
3852 return II;
3853 }
3854 break;
3855 }
3856 case Intrinsic::vector_insert: {
3857 Value *Vec = II->getArgOperand(0);
3858 Value *SubVec = II->getArgOperand(1);
3859 Value *Idx = II->getArgOperand(2);
3860 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3861 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3862 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3863
3864 // Only canonicalize if the destination vector, Vec, and SubVec are all
3865 // fixed vectors.
3866 if (DstTy && VecTy && SubVecTy) {
3867 unsigned DstNumElts = DstTy->getNumElements();
3868 unsigned VecNumElts = VecTy->getNumElements();
3869 unsigned SubVecNumElts = SubVecTy->getNumElements();
3870 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3871
3872 // An insert that entirely overwrites Vec with SubVec is a nop.
3873 if (VecNumElts == SubVecNumElts)
3874 return replaceInstUsesWith(CI, SubVec);
3875
3876 // Widen SubVec into a vector of the same width as Vec, since
3877 // shufflevector requires the two input vectors to be the same width.
3878 // Elements beyond the bounds of SubVec within the widened vector are
3879 // undefined.
3880 SmallVector<int, 8> WidenMask;
3881 unsigned i;
3882 for (i = 0; i != SubVecNumElts; ++i)
3883 WidenMask.push_back(i);
3884 for (; i != VecNumElts; ++i)
3885 WidenMask.push_back(PoisonMaskElem);
3886
3887 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3888
3890 for (unsigned i = 0; i != IdxN; ++i)
3891 Mask.push_back(i);
3892 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3893 Mask.push_back(i);
3894 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3895 Mask.push_back(i);
3896
3897 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3898 return replaceInstUsesWith(CI, Shuffle);
3899 }
3900 break;
3901 }
3902 case Intrinsic::vector_extract: {
3903 Value *Vec = II->getArgOperand(0);
3904 Value *Idx = II->getArgOperand(1);
3905
3906 Type *ReturnType = II->getType();
3907 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3908 // ExtractIdx)
3909 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3910 Value *InsertTuple, *InsertIdx, *InsertValue;
3912 m_Value(InsertValue),
3913 m_Value(InsertIdx))) &&
3914 InsertValue->getType() == ReturnType) {
3915 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3916 // Case where we get the same index right after setting it.
3917 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3918 // InsertValue
3919 if (ExtractIdx == Index)
3920 return replaceInstUsesWith(CI, InsertValue);
3921 // If we are getting a different index than what was set in the
3922 // insert.vector intrinsic. We can just set the input tuple to the one up
3923 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3924 // InsertIndex), ExtractIndex)
3925 // --> extract.vector(InsertTuple, ExtractIndex)
3926 else
3927 return replaceOperand(CI, 0, InsertTuple);
3928 }
3929
3930 ConstantInt *ALMUpperBound;
3932 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3933 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3934 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3935 if (ExtractIdx * VScaleMin >= ALMUpperBound->getZExtValue())
3936 return replaceInstUsesWith(CI,
3937 ConstantVector::getNullValue(ReturnType));
3938 }
3939
3940 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3941 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3942
3943 if (DstTy && VecTy) {
3944 auto DstEltCnt = DstTy->getElementCount();
3945 auto VecEltCnt = VecTy->getElementCount();
3946 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3947
3948 // Extracting the entirety of Vec is a nop.
3949 if (DstEltCnt == VecTy->getElementCount()) {
3950 replaceInstUsesWith(CI, Vec);
3951 return eraseInstFromFunction(CI);
3952 }
3953
3954 // Only canonicalize to shufflevector if the destination vector and
3955 // Vec are fixed vectors.
3956 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3957 break;
3958
3960 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3961 Mask.push_back(IdxN + i);
3962
3963 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3964 return replaceInstUsesWith(CI, Shuffle);
3965 }
3966 break;
3967 }
3968 case Intrinsic::experimental_vp_reverse: {
3969 Value *X;
3970 Value *Vec = II->getArgOperand(0);
3971 Value *Mask = II->getArgOperand(1);
3972 if (!match(Mask, m_AllOnes()))
3973 break;
3974 Value *EVL = II->getArgOperand(2);
3975 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3976 // rev(unop rev(X)) --> unop X
3977 if (match(Vec,
3979 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3980 auto *OldUnOp = cast<UnaryOperator>(Vec);
3982 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3983 II->getIterator());
3984 return replaceInstUsesWith(CI, NewUnOp);
3985 }
3986 break;
3987 }
3988 case Intrinsic::vector_reduce_or:
3989 case Intrinsic::vector_reduce_and: {
3990 // Canonicalize logical or/and reductions:
3991 // Or reduction for i1 is represented as:
3992 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3993 // %res = cmp ne iReduxWidth %val, 0
3994 // And reduction for i1 is represented as:
3995 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3996 // %res = cmp eq iReduxWidth %val, 11111
3997 Value *Arg = II->getArgOperand(0);
3998 Value *Vect;
3999
4000 if (Value *NewOp =
4001 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4002 replaceUse(II->getOperandUse(0), NewOp);
4003 return II;
4004 }
4005
4006 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4007 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4008 if (FTy->getElementType() == Builder.getInt1Ty()) {
4009 Value *Res = Builder.CreateBitCast(
4010 Vect, Builder.getIntNTy(FTy->getNumElements()));
4011 if (IID == Intrinsic::vector_reduce_and) {
4012 Res = Builder.CreateICmpEQ(
4014 } else {
4015 assert(IID == Intrinsic::vector_reduce_or &&
4016 "Expected or reduction.");
4017 Res = Builder.CreateIsNotNull(Res);
4018 }
4019 if (Arg != Vect)
4020 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4021 II->getType());
4022 return replaceInstUsesWith(CI, Res);
4023 }
4024 }
4025 [[fallthrough]];
4026 }
4027 case Intrinsic::vector_reduce_add: {
4028 if (IID == Intrinsic::vector_reduce_add) {
4029 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4030 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4031 // Convert vector_reduce_add(SExt(<n x i1>)) to
4032 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4033 // Convert vector_reduce_add(<n x i1>) to
4034 // Trunc(ctpop(bitcast <n x i1> to in)).
4035 Value *Arg = II->getArgOperand(0);
4036 Value *Vect;
4037
4038 if (Value *NewOp =
4039 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4040 replaceUse(II->getOperandUse(0), NewOp);
4041 return II;
4042 }
4043
4044 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4045 if (Value *Splat = getSplatValue(Arg)) {
4046 ElementCount VecToReduceCount =
4047 cast<VectorType>(Arg->getType())->getElementCount();
4048 if (VecToReduceCount.isFixed()) {
4049 unsigned VectorSize = VecToReduceCount.getFixedValue();
4050 return BinaryOperator::CreateMul(
4051 Splat,
4052 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4053 /*ImplicitTrunc=*/true));
4054 }
4055 }
4056
4057 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4058 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4059 if (FTy->getElementType() == Builder.getInt1Ty()) {
4060 Value *V = Builder.CreateBitCast(
4061 Vect, Builder.getIntNTy(FTy->getNumElements()));
4062 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4063 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4064 if (Arg != Vect &&
4065 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4066 Res = Builder.CreateNeg(Res);
4067 return replaceInstUsesWith(CI, Res);
4068 }
4069 }
4070 }
4071 [[fallthrough]];
4072 }
4073 case Intrinsic::vector_reduce_xor: {
4074 if (IID == Intrinsic::vector_reduce_xor) {
4075 // Exclusive disjunction reduction over the vector with
4076 // (potentially-extended) i1 element type is actually a
4077 // (potentially-extended) arithmetic `add` reduction over the original
4078 // non-extended value:
4079 // vector_reduce_xor(?ext(<n x i1>))
4080 // -->
4081 // ?ext(vector_reduce_add(<n x i1>))
4082 Value *Arg = II->getArgOperand(0);
4083 Value *Vect;
4084
4085 if (Value *NewOp =
4086 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4087 replaceUse(II->getOperandUse(0), NewOp);
4088 return II;
4089 }
4090
4091 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4092 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4093 if (VTy->getElementType() == Builder.getInt1Ty()) {
4094 Value *Res = Builder.CreateAddReduce(Vect);
4095 if (Arg != Vect)
4096 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4097 II->getType());
4098 return replaceInstUsesWith(CI, Res);
4099 }
4100 }
4101 }
4102 [[fallthrough]];
4103 }
4104 case Intrinsic::vector_reduce_mul: {
4105 if (IID == Intrinsic::vector_reduce_mul) {
4106 // Multiplicative reduction over the vector with (potentially-extended)
4107 // i1 element type is actually a (potentially zero-extended)
4108 // logical `and` reduction over the original non-extended value:
4109 // vector_reduce_mul(?ext(<n x i1>))
4110 // -->
4111 // zext(vector_reduce_and(<n x i1>))
4112 Value *Arg = II->getArgOperand(0);
4113 Value *Vect;
4114
4115 if (Value *NewOp =
4116 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4117 replaceUse(II->getOperandUse(0), NewOp);
4118 return II;
4119 }
4120
4121 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4122 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4123 if (VTy->getElementType() == Builder.getInt1Ty()) {
4124 Value *Res = Builder.CreateAndReduce(Vect);
4125 Res = Builder.CreateZExt(Res, II->getType());
4126 return replaceInstUsesWith(CI, Res);
4127 }
4128 }
4129 }
4130 [[fallthrough]];
4131 }
4132 case Intrinsic::vector_reduce_umin:
4133 case Intrinsic::vector_reduce_umax: {
4134 if (IID == Intrinsic::vector_reduce_umin ||
4135 IID == Intrinsic::vector_reduce_umax) {
4136 // UMin/UMax reduction over the vector with (potentially-extended)
4137 // i1 element type is actually a (potentially-extended)
4138 // logical `and`/`or` reduction over the original non-extended value:
4139 // vector_reduce_u{min,max}(?ext(<n x i1>))
4140 // -->
4141 // ?ext(vector_reduce_{and,or}(<n x i1>))
4142 Value *Arg = II->getArgOperand(0);
4143 Value *Vect;
4144
4145 if (Value *NewOp =
4146 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4147 replaceUse(II->getOperandUse(0), NewOp);
4148 return II;
4149 }
4150
4151 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4152 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4153 if (VTy->getElementType() == Builder.getInt1Ty()) {
4154 Value *Res = IID == Intrinsic::vector_reduce_umin
4155 ? Builder.CreateAndReduce(Vect)
4156 : Builder.CreateOrReduce(Vect);
4157 if (Arg != Vect)
4158 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4159 II->getType());
4160 return replaceInstUsesWith(CI, Res);
4161 }
4162 }
4163 }
4164 [[fallthrough]];
4165 }
4166 case Intrinsic::vector_reduce_smin:
4167 case Intrinsic::vector_reduce_smax: {
4168 if (IID == Intrinsic::vector_reduce_smin ||
4169 IID == Intrinsic::vector_reduce_smax) {
4170 // SMin/SMax reduction over the vector with (potentially-extended)
4171 // i1 element type is actually a (potentially-extended)
4172 // logical `and`/`or` reduction over the original non-extended value:
4173 // vector_reduce_s{min,max}(<n x i1>)
4174 // -->
4175 // vector_reduce_{or,and}(<n x i1>)
4176 // and
4177 // vector_reduce_s{min,max}(sext(<n x i1>))
4178 // -->
4179 // sext(vector_reduce_{or,and}(<n x i1>))
4180 // and
4181 // vector_reduce_s{min,max}(zext(<n x i1>))
4182 // -->
4183 // zext(vector_reduce_{and,or}(<n x i1>))
4184 Value *Arg = II->getArgOperand(0);
4185 Value *Vect;
4186
4187 if (Value *NewOp =
4188 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4189 replaceUse(II->getOperandUse(0), NewOp);
4190 return II;
4191 }
4192
4193 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4194 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4195 if (VTy->getElementType() == Builder.getInt1Ty()) {
4196 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4197 if (Arg != Vect)
4198 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4199 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4200 (ExtOpc == Instruction::CastOps::ZExt))
4201 ? Builder.CreateAndReduce(Vect)
4202 : Builder.CreateOrReduce(Vect);
4203 if (Arg != Vect)
4204 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4205 return replaceInstUsesWith(CI, Res);
4206 }
4207 }
4208 }
4209 [[fallthrough]];
4210 }
4211 case Intrinsic::vector_reduce_fmax:
4212 case Intrinsic::vector_reduce_fmin:
4213 case Intrinsic::vector_reduce_fadd:
4214 case Intrinsic::vector_reduce_fmul: {
4215 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4216 IID != Intrinsic::vector_reduce_fmul) ||
4217 II->hasAllowReassoc();
4218 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4219 IID == Intrinsic::vector_reduce_fmul)
4220 ? 1
4221 : 0;
4222 Value *Arg = II->getArgOperand(ArgIdx);
4223 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4224 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4225 return nullptr;
4226 }
4227 break;
4228 }
4229 case Intrinsic::is_fpclass: {
4230 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4231 return I;
4232 break;
4233 }
4234 case Intrinsic::threadlocal_address: {
4235 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4236 MaybeAlign Align = II->getRetAlign();
4237 if (MinAlign > Align.valueOrOne()) {
4238 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4239 return II;
4240 }
4241 break;
4242 }
4243 case Intrinsic::frexp: {
4244 Value *X;
4245 // The first result is idempotent with the added complication of the struct
4246 // return, and the second result is zero because the value is already
4247 // normalized.
4248 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4250 X = Builder.CreateInsertValue(
4251 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4252 1);
4253 return replaceInstUsesWith(*II, X);
4254 }
4255 }
4256 break;
4257 }
4258 case Intrinsic::get_active_lane_mask: {
4259 const APInt *Op0, *Op1;
4260 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4261 match(II->getOperand(1), m_APInt(Op1))) {
4262 Type *OpTy = II->getOperand(0)->getType();
4263 return replaceInstUsesWith(
4264 *II, Builder.CreateIntrinsic(
4265 II->getType(), Intrinsic::get_active_lane_mask,
4266 {Constant::getNullValue(OpTy),
4267 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4268 }
4269 break;
4270 }
4271 case Intrinsic::experimental_get_vector_length: {
4272 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4273 unsigned BitWidth =
4274 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4275 II->getType()->getScalarSizeInBits());
4276 ConstantRange Cnt =
4277 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4278 SQ.getWithInstruction(II))
4280 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4281 ->getValue()
4282 .zextOrTrunc(Cnt.getBitWidth());
4283 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4284 MaxLanes = MaxLanes.multiply(
4285 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4286
4287 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4288 return replaceInstUsesWith(
4289 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4290 return nullptr;
4291 }
4292 default: {
4293 // Handle target specific intrinsics
4294 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4295 if (V)
4296 return *V;
4297 break;
4298 }
4299 }
4300
4301 // Try to fold intrinsic into select/phi operands. This is legal if:
4302 // * The intrinsic is speculatable.
4303 // * The operand is one of the following:
4304 // - a phi.
4305 // - a select with a scalar condition.
4306 // - a select with a vector condition and II is not a cross lane operation.
4308 for (Value *Op : II->args()) {
4309 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4310 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4311 if (IsVectorCond &&
4312 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4313 continue;
4314 // Don't replace a scalar select with a more expensive vector select if
4315 // we can't simplify both arms of the select.
4316 bool SimplifyBothArms =
4317 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4319 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4320 return R;
4321 }
4322 if (auto *Phi = dyn_cast<PHINode>(Op))
4323 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4324 return R;
4325 }
4326 }
4327
4329 return Shuf;
4330
4332 return replaceInstUsesWith(*II, Reverse);
4333
4335 return replaceInstUsesWith(*II, Res);
4336
4337 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4338 // context, so it is handled in visitCallBase and we should trigger it.
4339 return visitCallBase(*II);
4340}
4341
4342// Fence instruction simplification
4344 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4345 // This check is solely here to handle arbitrary target-dependent syncscopes.
4346 // TODO: Can remove if does not matter in practice.
4347 if (NFI && FI.isIdenticalTo(NFI))
4348 return eraseInstFromFunction(FI);
4349
4350 // Returns true if FI1 is identical or stronger fence than FI2.
4351 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4352 auto FI1SyncScope = FI1->getSyncScopeID();
4353 // Consider same scope, where scope is global or single-thread.
4354 if (FI1SyncScope != FI2->getSyncScopeID() ||
4355 (FI1SyncScope != SyncScope::System &&
4356 FI1SyncScope != SyncScope::SingleThread))
4357 return false;
4358
4359 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4360 };
4361 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4362 return eraseInstFromFunction(FI);
4363
4364 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4365 if (isIdenticalOrStrongerFence(PFI, &FI))
4366 return eraseInstFromFunction(FI);
4367 return nullptr;
4368}
4369
4370// InvokeInst simplification
4372 return visitCallBase(II);
4373}
4374
4375// CallBrInst simplification
4377 return visitCallBase(CBI);
4378}
4379
4381 if (!CI->hasFnAttr("modular-format"))
4382 return nullptr;
4383
4385 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4386 // TODO: Make use of the first two arguments
4387 unsigned FirstArgIdx;
4388 [[maybe_unused]] bool Error;
4389 Error = Args[2].getAsInteger(10, FirstArgIdx);
4390 assert(!Error && "invalid first arg index");
4391 --FirstArgIdx;
4392 StringRef FnName = Args[3];
4393 StringRef ImplName = Args[4];
4395
4396 if (AllAspects.empty())
4397 return nullptr;
4398
4399 SmallVector<StringRef> NeededAspects;
4400 for (StringRef Aspect : AllAspects) {
4401 if (Aspect == "float") {
4402 if (llvm::any_of(
4403 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4404 CI->arg_end()),
4405 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4406 NeededAspects.push_back("float");
4407 } else {
4408 // Unknown aspects are always considered to be needed.
4409 NeededAspects.push_back(Aspect);
4410 }
4411 }
4412
4413 if (NeededAspects.size() == AllAspects.size())
4414 return nullptr;
4415
4416 Module *M = CI->getModule();
4417 LLVMContext &Ctx = M->getContext();
4418 Function *Callee = CI->getCalledFunction();
4419 FunctionCallee ModularFn = M->getOrInsertFunction(
4420 FnName, Callee->getFunctionType(),
4421 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4422 CallInst *New = cast<CallInst>(CI->clone());
4423 New->setCalledFunction(ModularFn);
4424 New->removeFnAttr("modular-format");
4425 B.Insert(New);
4426
4427 const auto ReferenceAspect = [&](StringRef Aspect) {
4428 SmallString<20> Name = ImplName;
4429 Name += '_';
4430 Name += Aspect;
4431 Function *RelocNoneFn =
4432 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4433 B.CreateCall(RelocNoneFn,
4434 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4435 };
4436
4437 llvm::sort(NeededAspects);
4438 for (StringRef Request : NeededAspects)
4439 ReferenceAspect(Request);
4440
4441 return New;
4442}
4443
4444Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4445 if (!CI->getCalledFunction()) return nullptr;
4446
4447 // Skip optimizing notail and musttail calls so
4448 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4449 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4450 if (CI->isMustTailCall() || CI->isNoTailCall())
4451 return nullptr;
4452
4453 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4454 replaceInstUsesWith(*From, With);
4455 };
4456 auto InstCombineErase = [this](Instruction *I) {
4458 };
4459 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4460 InstCombineRAUW, InstCombineErase);
4461 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4462 ++NumSimplified;
4463 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4464 }
4465 if (Value *With = optimizeModularFormat(CI, Builder)) {
4466 ++NumSimplified;
4467 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4468 }
4469
4470 return nullptr;
4471}
4472
4474 // Strip off at most one level of pointer casts, looking for an alloca. This
4475 // is good enough in practice and simpler than handling any number of casts.
4476 Value *Underlying = TrampMem->stripPointerCasts();
4477 if (Underlying != TrampMem &&
4478 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4479 return nullptr;
4480 if (!isa<AllocaInst>(Underlying))
4481 return nullptr;
4482
4483 IntrinsicInst *InitTrampoline = nullptr;
4484 for (User *U : TrampMem->users()) {
4486 if (!II)
4487 return nullptr;
4488 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4489 if (InitTrampoline)
4490 // More than one init_trampoline writes to this value. Give up.
4491 return nullptr;
4492 InitTrampoline = II;
4493 continue;
4494 }
4495 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4496 // Allow any number of calls to adjust.trampoline.
4497 continue;
4498 return nullptr;
4499 }
4500
4501 // No call to init.trampoline found.
4502 if (!InitTrampoline)
4503 return nullptr;
4504
4505 // Check that the alloca is being used in the expected way.
4506 if (InitTrampoline->getOperand(0) != TrampMem)
4507 return nullptr;
4508
4509 return InitTrampoline;
4510}
4511
4513 Value *TrampMem) {
4514 // Visit all the previous instructions in the basic block, and try to find a
4515 // init.trampoline which has a direct path to the adjust.trampoline.
4516 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4517 E = AdjustTramp->getParent()->begin();
4518 I != E;) {
4519 Instruction *Inst = &*--I;
4521 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4522 II->getOperand(0) == TrampMem)
4523 return II;
4524 if (Inst->mayWriteToMemory())
4525 return nullptr;
4526 }
4527 return nullptr;
4528}
4529
4530// Given a call to llvm.adjust.trampoline, find and return the corresponding
4531// call to llvm.init.trampoline if the call to the trampoline can be optimized
4532// to a direct call to a function. Otherwise return NULL.
4534 Callee = Callee->stripPointerCasts();
4535 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4536 if (!AdjustTramp ||
4537 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4538 return nullptr;
4539
4540 Value *TrampMem = AdjustTramp->getOperand(0);
4541
4543 return IT;
4544 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4545 return IT;
4546 return nullptr;
4547}
4548
4549Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4550 const Value *Callee = Call.getCalledOperand();
4551 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4552 if (!IPC || !IPC->isNoopCast(DL))
4553 return nullptr;
4554
4555 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4556 if (!II)
4557 return nullptr;
4558
4559 Intrinsic::ID IIID = II->getIntrinsicID();
4560 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4561 return nullptr;
4562
4563 // Isolate the ptrauth bundle from the others.
4564 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4566 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4567 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4568 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4569 PtrAuthBundleOrNone = Bundle;
4570 else
4571 NewBundles.emplace_back(Bundle);
4572 }
4573
4574 if (!PtrAuthBundleOrNone)
4575 return nullptr;
4576
4577 Value *NewCallee = nullptr;
4578 switch (IIID) {
4579 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4580 // assuming the call bundle and the sign operands match.
4581 case Intrinsic::ptrauth_resign: {
4582 // Resign result key should match bundle.
4583 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4584 return nullptr;
4585 // Resign result discriminator should match bundle.
4586 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4587 return nullptr;
4588
4589 // Resign input (auth) key should also match: we can't change the key on
4590 // the new call we're generating, because we don't know what keys are valid.
4591 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4592 return nullptr;
4593
4594 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4595 NewBundles.emplace_back("ptrauth", NewBundleOps);
4596 NewCallee = II->getOperand(0);
4597 break;
4598 }
4599
4600 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4601 // assuming the call bundle and the sign operands match.
4602 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4603 case Intrinsic::ptrauth_sign: {
4604 // Sign key should match bundle.
4605 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4606 return nullptr;
4607 // Sign discriminator should match bundle.
4608 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4609 return nullptr;
4610 NewCallee = II->getOperand(0);
4611 break;
4612 }
4613 default:
4614 llvm_unreachable("unexpected intrinsic ID");
4615 }
4616
4617 if (!NewCallee)
4618 return nullptr;
4619
4620 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4621 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4622 NewCall->setCalledOperand(NewCallee);
4623 return NewCall;
4624}
4625
4626Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4628 if (!CPA)
4629 return nullptr;
4630
4631 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4632 // If the ptrauth constant isn't based on a function pointer, bail out.
4633 if (!CalleeF)
4634 return nullptr;
4635
4636 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4638 if (!PAB)
4639 return nullptr;
4640
4641 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4642 Value *Discriminator = PAB->Inputs[1];
4643
4644 // If the bundle doesn't match, this is probably going to fail to auth.
4645 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4646 return nullptr;
4647
4648 // If the bundle matches the constant, proceed in making this a direct call.
4650 NewCall->setCalledOperand(CalleeF);
4651 return NewCall;
4652}
4653
4654bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4655 const TargetLibraryInfo *TLI) {
4656 // Note: We only handle cases which can't be driven from generic attributes
4657 // here. So, for example, nonnull and noalias (which are common properties
4658 // of some allocation functions) are expected to be handled via annotation
4659 // of the respective allocator declaration with generic attributes.
4660 bool Changed = false;
4661
4662 if (!Call.getType()->isPointerTy())
4663 return Changed;
4664
4665 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4666 if (Size && *Size != 0) {
4667 // TODO: We really should just emit deref_or_null here and then
4668 // let the generic inference code combine that with nonnull.
4669 if (Call.hasRetAttr(Attribute::NonNull)) {
4670 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4672 Call.getContext(), Size->getLimitedValue()));
4673 } else {
4674 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4676 Call.getContext(), Size->getLimitedValue()));
4677 }
4678 }
4679
4680 // Add alignment attribute if alignment is a power of two constant.
4681 Value *Alignment = getAllocAlignment(&Call, TLI);
4682 if (!Alignment)
4683 return Changed;
4684
4685 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4686 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4687 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4688 if (llvm::isPowerOf2_64(AlignmentVal)) {
4689 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4690 Align NewAlign = Align(AlignmentVal);
4691 if (NewAlign > ExistingAlign) {
4694 Changed = true;
4695 }
4696 }
4697 }
4698 return Changed;
4699}
4700
4701/// Improvements for call, callbr and invoke instructions.
4702Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4703 bool Changed = annotateAnyAllocSite(Call, &TLI);
4704
4705 // Mark any parameters that are known to be non-null with the nonnull
4706 // attribute. This is helpful for inlining calls to functions with null
4707 // checks on their arguments.
4708 SmallVector<unsigned, 4> ArgNos;
4709 unsigned ArgNo = 0;
4710
4711 for (Value *V : Call.args()) {
4712 if (V->getType()->isPointerTy()) {
4713 // Simplify the nonnull operand if the parameter is known to be nonnull.
4714 // Otherwise, try to infer nonnull for it.
4715 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4716 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4717 (HasDereferenceable &&
4719 V->getType()->getPointerAddressSpace()))) {
4720 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4721 replaceOperand(Call, ArgNo, Res);
4722 Changed = true;
4723 }
4724 } else if (isKnownNonZero(V,
4725 getSimplifyQuery().getWithInstruction(&Call))) {
4726 ArgNos.push_back(ArgNo);
4727 }
4728 }
4729 ArgNo++;
4730 }
4731
4732 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4733
4734 if (!ArgNos.empty()) {
4735 AttributeList AS = Call.getAttributes();
4736 LLVMContext &Ctx = Call.getContext();
4737 AS = AS.addParamAttribute(Ctx, ArgNos,
4738 Attribute::get(Ctx, Attribute::NonNull));
4739 Call.setAttributes(AS);
4740 Changed = true;
4741 }
4742
4743 // If the callee is a pointer to a function, attempt to move any casts to the
4744 // arguments of the call/callbr/invoke.
4746 Function *CalleeF = dyn_cast<Function>(Callee);
4747 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4748 transformConstExprCastCall(Call))
4749 return nullptr;
4750
4751 if (CalleeF) {
4752 // Remove the convergent attr on calls when the callee is not convergent.
4753 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4754 !CalleeF->isIntrinsic()) {
4755 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4756 << "\n");
4758 return &Call;
4759 }
4760
4761 // If the call and callee calling conventions don't match, and neither one
4762 // of the calling conventions is compatible with C calling convention
4763 // this call must be unreachable, as the call is undefined.
4764 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4765 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4769 // Only do this for calls to a function with a body. A prototype may
4770 // not actually end up matching the implementation's calling conv for a
4771 // variety of reasons (e.g. it may be written in assembly).
4772 !CalleeF->isDeclaration()) {
4773 Instruction *OldCall = &Call;
4775 // If OldCall does not return void then replaceInstUsesWith poison.
4776 // This allows ValueHandlers and custom metadata to adjust itself.
4777 if (!OldCall->getType()->isVoidTy())
4778 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4779 if (isa<CallInst>(OldCall))
4780 return eraseInstFromFunction(*OldCall);
4781
4782 // We cannot remove an invoke or a callbr, because it would change thexi
4783 // CFG, just change the callee to a null pointer.
4784 cast<CallBase>(OldCall)->setCalledFunction(
4785 CalleeF->getFunctionType(),
4786 Constant::getNullValue(CalleeF->getType()));
4787 return nullptr;
4788 }
4789 }
4790
4791 // Calling a null function pointer is undefined if a null address isn't
4792 // dereferenceable.
4793 if ((isa<ConstantPointerNull>(Callee) &&
4795 isa<UndefValue>(Callee)) {
4796 // If Call does not return void then replaceInstUsesWith poison.
4797 // This allows ValueHandlers and custom metadata to adjust itself.
4798 if (!Call.getType()->isVoidTy())
4800
4801 if (Call.isTerminator()) {
4802 // Can't remove an invoke or callbr because we cannot change the CFG.
4803 return nullptr;
4804 }
4805
4806 // This instruction is not reachable, just remove it.
4809 }
4810
4811 if (IntrinsicInst *II = findInitTrampoline(Callee))
4812 return transformCallThroughTrampoline(Call, *II);
4813
4814 // Combine calls involving pointer authentication intrinsics.
4815 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4816 return NewCall;
4817
4818 // Combine calls to ptrauth constants.
4819 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4820 return NewCall;
4821
4822 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4823 InlineAsm *IA = cast<InlineAsm>(Callee);
4824 if (!IA->canThrow()) {
4825 // Normal inline asm calls cannot throw - mark them
4826 // 'nounwind'.
4828 Changed = true;
4829 }
4830 }
4831
4832 // Try to optimize the call if possible, we require DataLayout for most of
4833 // this. None of these calls are seen as possibly dead so go ahead and
4834 // delete the instruction now.
4835 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4836 Instruction *I = tryOptimizeCall(CI);
4837 // If we changed something return the result, etc. Otherwise let
4838 // the fallthrough check.
4839 if (I) return eraseInstFromFunction(*I);
4840 }
4841
4842 if (!Call.use_empty() && !Call.isMustTailCall())
4843 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4844 Type *CallTy = Call.getType();
4845 Type *RetArgTy = ReturnedArg->getType();
4846 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4847 return replaceInstUsesWith(
4848 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4849 }
4850
4851 // Drop unnecessary callee_type metadata from calls that were converted
4852 // into direct calls.
4853 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4854 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4855 Changed = true;
4856 }
4857
4858 // Drop unnecessary kcfi operand bundles from calls that were converted
4859 // into direct calls.
4861 if (Bundle && !Call.isIndirectCall()) {
4862 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4863 if (CalleeF) {
4864 ConstantInt *FunctionType = nullptr;
4865 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4866
4867 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4868 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4869
4870 if (FunctionType &&
4871 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4872 dbgs() << Call.getModule()->getName()
4873 << ": warning: kcfi: " << Call.getCaller()->getName()
4874 << ": call to " << CalleeF->getName()
4875 << " using a mismatching function pointer type\n";
4876 }
4877 });
4878
4880 }
4881
4882 if (isRemovableAlloc(&Call, &TLI))
4883 return visitAllocSite(Call);
4884
4885 // Handle intrinsics which can be used in both call and invoke context.
4886 switch (Call.getIntrinsicID()) {
4887 case Intrinsic::experimental_gc_statepoint: {
4888 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4889 SmallPtrSet<Value *, 32> LiveGcValues;
4890 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4891 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4892
4893 // Remove the relocation if unused.
4894 if (GCR.use_empty()) {
4896 continue;
4897 }
4898
4899 Value *DerivedPtr = GCR.getDerivedPtr();
4900 Value *BasePtr = GCR.getBasePtr();
4901
4902 // Undef is undef, even after relocation.
4903 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4906 continue;
4907 }
4908
4909 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4910 // The relocation of null will be null for most any collector.
4911 // TODO: provide a hook for this in GCStrategy. There might be some
4912 // weird collector this property does not hold for.
4913 if (isa<ConstantPointerNull>(DerivedPtr)) {
4914 // Use null-pointer of gc_relocate's type to replace it.
4917 continue;
4918 }
4919
4920 // isKnownNonNull -> nonnull attribute
4921 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4922 isKnownNonZero(DerivedPtr,
4923 getSimplifyQuery().getWithInstruction(&Call))) {
4924 GCR.addRetAttr(Attribute::NonNull);
4925 // We discovered new fact, re-check users.
4926 Worklist.pushUsersToWorkList(GCR);
4927 }
4928 }
4929
4930 // If we have two copies of the same pointer in the statepoint argument
4931 // list, canonicalize to one. This may let us common gc.relocates.
4932 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4933 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4934 auto *OpIntTy = GCR.getOperand(2)->getType();
4935 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4936 }
4937
4938 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4939 // Canonicalize on the type from the uses to the defs
4940
4941 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4942 LiveGcValues.insert(BasePtr);
4943 LiveGcValues.insert(DerivedPtr);
4944 }
4945 std::optional<OperandBundleUse> Bundle =
4947 unsigned NumOfGCLives = LiveGcValues.size();
4948 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4949 break;
4950 // We can reduce the size of gc live bundle.
4951 DenseMap<Value *, unsigned> Val2Idx;
4952 std::vector<Value *> NewLiveGc;
4953 for (Value *V : Bundle->Inputs) {
4954 auto [It, Inserted] = Val2Idx.try_emplace(V);
4955 if (!Inserted)
4956 continue;
4957 if (LiveGcValues.count(V)) {
4958 It->second = NewLiveGc.size();
4959 NewLiveGc.push_back(V);
4960 } else
4961 It->second = NumOfGCLives;
4962 }
4963 // Update all gc.relocates
4964 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4965 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4966 Value *BasePtr = GCR.getBasePtr();
4967 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4968 "Missed live gc for base pointer");
4969 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4970 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4971 Value *DerivedPtr = GCR.getDerivedPtr();
4972 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4973 "Missed live gc for derived pointer");
4974 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4975 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4976 }
4977 // Create new statepoint instruction.
4978 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4979 return CallBase::Create(&Call, NewBundle);
4980 }
4981 default: { break; }
4982 }
4983
4984 return Changed ? &Call : nullptr;
4985}
4986
4987/// If the callee is a constexpr cast of a function, attempt to move the cast to
4988/// the arguments of the call/invoke.
4989/// CallBrInst is not supported.
4990bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4991 auto *Callee =
4993 if (!Callee)
4994 return false;
4995
4997 "CallBr's don't have a single point after a def to insert at");
4998
4999 // Don't perform the transform for declarations, which may not be fully
5000 // accurate. For example, void @foo() is commonly used as a placeholder for
5001 // unknown prototypes.
5002 if (Callee->isDeclaration())
5003 return false;
5004
5005 // If this is a call to a thunk function, don't remove the cast. Thunks are
5006 // used to transparently forward all incoming parameters and outgoing return
5007 // values, so it's important to leave the cast in place.
5008 if (Callee->hasFnAttribute("thunk"))
5009 return false;
5010
5011 // If this is a call to a naked function, the assembly might be
5012 // using an argument, or otherwise rely on the frame layout,
5013 // the function prototype will mismatch.
5014 if (Callee->hasFnAttribute(Attribute::Naked))
5015 return false;
5016
5017 // If this is a musttail call, the callee's prototype must match the caller's
5018 // prototype with the exception of pointee types. The code below doesn't
5019 // implement that, so we can't do this transform.
5020 // TODO: Do the transform if it only requires adding pointer casts.
5021 if (Call.isMustTailCall())
5022 return false;
5023
5025 const AttributeList &CallerPAL = Call.getAttributes();
5026
5027 // Okay, this is a cast from a function to a different type. Unless doing so
5028 // would cause a type conversion of one of our arguments, change this call to
5029 // be a direct call with arguments casted to the appropriate types.
5030 FunctionType *FT = Callee->getFunctionType();
5031 Type *OldRetTy = Caller->getType();
5032 Type *NewRetTy = FT->getReturnType();
5033
5034 // Check to see if we are changing the return type...
5035 if (OldRetTy != NewRetTy) {
5036
5037 if (NewRetTy->isStructTy())
5038 return false; // TODO: Handle multiple return values.
5039
5040 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5041 if (!Caller->use_empty())
5042 return false; // Cannot transform this return value.
5043 }
5044
5045 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5046 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5047 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5048 NewRetTy, CallerPAL.getRetAttrs())))
5049 return false; // Attribute not compatible with transformed value.
5050 }
5051
5052 // If the callbase is an invoke instruction, and the return value is
5053 // used by a PHI node in a successor, we cannot change the return type of
5054 // the call because there is no place to put the cast instruction (without
5055 // breaking the critical edge). Bail out in this case.
5056 if (!Caller->use_empty()) {
5057 BasicBlock *PhisNotSupportedBlock = nullptr;
5058 if (auto *II = dyn_cast<InvokeInst>(Caller))
5059 PhisNotSupportedBlock = II->getNormalDest();
5060 if (PhisNotSupportedBlock)
5061 for (User *U : Caller->users())
5062 if (PHINode *PN = dyn_cast<PHINode>(U))
5063 if (PN->getParent() == PhisNotSupportedBlock)
5064 return false;
5065 }
5066 }
5067
5068 unsigned NumActualArgs = Call.arg_size();
5069 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5070
5071 // Prevent us turning:
5072 // declare void @takes_i32_inalloca(i32* inalloca)
5073 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5074 //
5075 // into:
5076 // call void @takes_i32_inalloca(i32* null)
5077 //
5078 // Similarly, avoid folding away bitcasts of byval calls.
5079 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5080 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5081 return false;
5082
5083 auto AI = Call.arg_begin();
5084 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5085 Type *ParamTy = FT->getParamType(i);
5086 Type *ActTy = (*AI)->getType();
5087
5088 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5089 return false; // Cannot transform this parameter value.
5090
5091 // Check if there are any incompatible attributes we cannot drop safely.
5092 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5093 .overlaps(AttributeFuncs::typeIncompatible(
5094 ParamTy, CallerPAL.getParamAttrs(i),
5095 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5096 return false; // Attribute not compatible with transformed value.
5097
5098 if (Call.isInAllocaArgument(i) ||
5099 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5100 return false; // Cannot transform to and from inalloca/preallocated.
5101
5102 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5103 return false;
5104
5105 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5106 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5107 return false; // Cannot transform to or from byval.
5108 }
5109
5110 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5111 !CallerPAL.isEmpty()) {
5112 // In this case we have more arguments than the new function type, but we
5113 // won't be dropping them. Check that these extra arguments have attributes
5114 // that are compatible with being a vararg call argument.
5115 unsigned SRetIdx;
5116 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5117 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5118 return false;
5119 }
5120
5121 // Okay, we decided that this is a safe thing to do: go ahead and start
5122 // inserting cast instructions as necessary.
5123 SmallVector<Value *, 8> Args;
5125 Args.reserve(NumActualArgs);
5126 ArgAttrs.reserve(NumActualArgs);
5127
5128 // Get any return attributes.
5129 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5130
5131 // If the return value is not being used, the type may not be compatible
5132 // with the existing attributes. Wipe out any problematic attributes.
5133 RAttrs.remove(
5134 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5135
5136 LLVMContext &Ctx = Call.getContext();
5137 AI = Call.arg_begin();
5138 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5139 Type *ParamTy = FT->getParamType(i);
5140
5141 Value *NewArg = *AI;
5142 if ((*AI)->getType() != ParamTy)
5143 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5144 Args.push_back(NewArg);
5145
5146 // Add any parameter attributes except the ones incompatible with the new
5147 // type. Note that we made sure all incompatible ones are safe to drop.
5148 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5149 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5150 ArgAttrs.push_back(
5151 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5152 }
5153
5154 // If the function takes more arguments than the call was taking, add them
5155 // now.
5156 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5157 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5158 ArgAttrs.push_back(AttributeSet());
5159 }
5160
5161 // If we are removing arguments to the function, emit an obnoxious warning.
5162 if (FT->getNumParams() < NumActualArgs) {
5163 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5164 if (FT->isVarArg()) {
5165 // Add all of the arguments in their promoted form to the arg list.
5166 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5167 Type *PTy = getPromotedType((*AI)->getType());
5168 Value *NewArg = *AI;
5169 if (PTy != (*AI)->getType()) {
5170 // Must promote to pass through va_arg area!
5171 Instruction::CastOps opcode =
5172 CastInst::getCastOpcode(*AI, false, PTy, false);
5173 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5174 }
5175 Args.push_back(NewArg);
5176
5177 // Add any parameter attributes.
5178 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5179 }
5180 }
5181 }
5182
5183 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5184
5185 if (NewRetTy->isVoidTy())
5186 Caller->setName(""); // Void type should not have a name.
5187
5188 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5189 "missing argument attributes");
5190 AttributeList NewCallerPAL = AttributeList::get(
5191 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5192
5194 Call.getOperandBundlesAsDefs(OpBundles);
5195
5196 CallBase *NewCall;
5197 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5198 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5199 II->getUnwindDest(), Args, OpBundles);
5200 } else {
5201 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5202 cast<CallInst>(NewCall)->setTailCallKind(
5203 cast<CallInst>(Caller)->getTailCallKind());
5204 }
5205 NewCall->takeName(Caller);
5207 NewCall->setAttributes(NewCallerPAL);
5208
5209 // Preserve prof metadata if any.
5210 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5211
5212 // Insert a cast of the return type as necessary.
5213 Instruction *NC = NewCall;
5214 Value *NV = NC;
5215 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5216 assert(!NV->getType()->isVoidTy());
5218 NC->setDebugLoc(Caller->getDebugLoc());
5219
5220 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5221 assert(OptInsertPt && "No place to insert cast");
5222 InsertNewInstBefore(NC, *OptInsertPt);
5223 Worklist.pushUsersToWorkList(*Caller);
5224 }
5225
5226 if (!Caller->use_empty())
5227 replaceInstUsesWith(*Caller, NV);
5228 else if (Caller->hasValueHandle()) {
5229 if (OldRetTy == NV->getType())
5231 else
5232 // We cannot call ValueIsRAUWd with a different type, and the
5233 // actual tracked value will disappear.
5235 }
5236
5237 eraseInstFromFunction(*Caller);
5238 return true;
5239}
5240
5241/// Turn a call to a function created by init_trampoline / adjust_trampoline
5242/// intrinsic pair into a direct call to the underlying function.
5244InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5245 IntrinsicInst &Tramp) {
5246 FunctionType *FTy = Call.getFunctionType();
5247 AttributeList Attrs = Call.getAttributes();
5248
5249 // If the call already has the 'nest' attribute somewhere then give up -
5250 // otherwise 'nest' would occur twice after splicing in the chain.
5251 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5252 return nullptr;
5253
5255 FunctionType *NestFTy = NestF->getFunctionType();
5256
5257 AttributeList NestAttrs = NestF->getAttributes();
5258 if (!NestAttrs.isEmpty()) {
5259 unsigned NestArgNo = 0;
5260 Type *NestTy = nullptr;
5261 AttributeSet NestAttr;
5262
5263 // Look for a parameter marked with the 'nest' attribute.
5264 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5265 E = NestFTy->param_end();
5266 I != E; ++NestArgNo, ++I) {
5267 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5268 if (AS.hasAttribute(Attribute::Nest)) {
5269 // Record the parameter type and any other attributes.
5270 NestTy = *I;
5271 NestAttr = AS;
5272 break;
5273 }
5274 }
5275
5276 if (NestTy) {
5277 std::vector<Value*> NewArgs;
5278 std::vector<AttributeSet> NewArgAttrs;
5279 NewArgs.reserve(Call.arg_size() + 1);
5280 NewArgAttrs.reserve(Call.arg_size());
5281
5282 // Insert the nest argument into the call argument list, which may
5283 // mean appending it. Likewise for attributes.
5284
5285 {
5286 unsigned ArgNo = 0;
5287 auto I = Call.arg_begin(), E = Call.arg_end();
5288 do {
5289 if (ArgNo == NestArgNo) {
5290 // Add the chain argument and attributes.
5291 Value *NestVal = Tramp.getArgOperand(2);
5292 if (NestVal->getType() != NestTy)
5293 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5294 NewArgs.push_back(NestVal);
5295 NewArgAttrs.push_back(NestAttr);
5296 }
5297
5298 if (I == E)
5299 break;
5300
5301 // Add the original argument and attributes.
5302 NewArgs.push_back(*I);
5303 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5304
5305 ++ArgNo;
5306 ++I;
5307 } while (true);
5308 }
5309
5310 // The trampoline may have been bitcast to a bogus type (FTy).
5311 // Handle this by synthesizing a new function type, equal to FTy
5312 // with the chain parameter inserted.
5313
5314 std::vector<Type*> NewTypes;
5315 NewTypes.reserve(FTy->getNumParams()+1);
5316
5317 // Insert the chain's type into the list of parameter types, which may
5318 // mean appending it.
5319 {
5320 unsigned ArgNo = 0;
5321 FunctionType::param_iterator I = FTy->param_begin(),
5322 E = FTy->param_end();
5323
5324 do {
5325 if (ArgNo == NestArgNo)
5326 // Add the chain's type.
5327 NewTypes.push_back(NestTy);
5328
5329 if (I == E)
5330 break;
5331
5332 // Add the original type.
5333 NewTypes.push_back(*I);
5334
5335 ++ArgNo;
5336 ++I;
5337 } while (true);
5338 }
5339
5340 // Replace the trampoline call with a direct call. Let the generic
5341 // code sort out any function type mismatches.
5342 FunctionType *NewFTy =
5343 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5344 AttributeList NewPAL =
5345 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5346 Attrs.getRetAttrs(), NewArgAttrs);
5347
5349 Call.getOperandBundlesAsDefs(OpBundles);
5350
5351 Instruction *NewCaller;
5352 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5353 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5354 II->getUnwindDest(), NewArgs, OpBundles);
5355 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5356 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5357 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5358 NewCaller =
5359 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5360 CBI->getIndirectDests(), NewArgs, OpBundles);
5361 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5362 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5363 } else {
5364 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5365 cast<CallInst>(NewCaller)->setTailCallKind(
5366 cast<CallInst>(Call).getTailCallKind());
5367 cast<CallInst>(NewCaller)->setCallingConv(
5368 cast<CallInst>(Call).getCallingConv());
5369 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5370 }
5371 NewCaller->setDebugLoc(Call.getDebugLoc());
5372
5373 return NewCaller;
5374 }
5375 }
5376
5377 // Replace the trampoline call with a direct call. Since there is no 'nest'
5378 // parameter, there is no need to adjust the argument list. Let the generic
5379 // code sort out any function type mismatches.
5380 Call.setCalledFunction(FTy, NestF);
5381 return &Call;
5382}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:259
bool isNegative() const
Definition APFloat.h:1512
void clearSign()
Definition APFloat.h:1349
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1139
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1959
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1939
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1946
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2047
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1952
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
bool hasOperandBundles() const
Return true if this User has any operand bundles.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setNoSignedZeros(bool B=true)
Definition FMF.h:87
bool allowReassoc() const
Flag queries.
Definition FMF.h:67
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1423
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2054
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2583
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2418
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2181
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1230
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1283
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:831
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:830
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1706
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1661
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1692
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1606
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1777
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1642
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1679
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1719
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:258
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:290
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:305
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:111
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:264
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:296
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:302
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const