LLVM 23.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
42#include "llvm/ADT/Statistic.h"
47#include "llvm/Analysis/CFG.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
70#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
93#include "llvm/Support/Debug.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(true));
138
140 "instcombine-max-sink-users", cl::init(32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
144MaxArraySize("instcombine-maxarray-size", cl::init(1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
148 "instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048),
149 cl::desc("Maximum number of users to visit in alloc-site "
150 "removability analysis"));
151
152namespace llvm {
154} // end namespace llvm
155
156// FIXME: Remove this flag when it is no longer necessary to convert
157// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
158// increases variable availability at the cost of accuracy. Variables that
159// cannot be promoted by mem2reg or SROA will be described as living in memory
160// for their entire lifetime. However, passes like DSE and instcombine can
161// delete stores to the alloca, leading to misleading and inaccurate debug
162// information. This flag can be removed when those passes are fixed.
163static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
164 cl::Hidden, cl::init(true));
165
166InstCombiner::IRBuilderInstCombineInserter::~IRBuilderInstCombineInserter() =
167 default;
168
169void InstCombiner::IRBuilderInstCombineInserter::InsertHelper(
170 Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const {
172 IC.Worklist.add(I);
173 if (auto *Assume = dyn_cast<AssumeInst>(I))
174 IC.AC.registerAssumption(Assume);
175 if (IC.AnnotationMetadataSource)
176 I->copyMetadata(*IC.AnnotationMetadataSource, LLVMContext::MD_annotation);
177}
178
179std::optional<Instruction *>
181 // Handle target specific intrinsics
182 if (II.getCalledFunction()->isTargetIntrinsic()) {
183 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(*this, II);
184 }
185 return std::nullopt;
186}
187
189 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
190 bool &KnownBitsComputed) {
191 // Handle target specific intrinsics
192 if (II.getCalledFunction()->isTargetIntrinsic()) {
193 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
194 *this, II, DemandedMask, Known, KnownBitsComputed);
195 }
196 return std::nullopt;
197}
198
200 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
201 APInt &PoisonElts2, APInt &PoisonElts3,
202 std::function<void(Instruction *, unsigned, APInt, APInt &)>
203 SimplifyAndSetOp) {
204 // Handle target specific intrinsics
205 if (II.getCalledFunction()->isTargetIntrinsic()) {
206 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
207 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
208 SimplifyAndSetOp);
209 }
210 return std::nullopt;
211}
212
213bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
214 // Approved exception for TTI use: This queries a legality property of the
215 // target, not an profitability heuristic. Ideally this should be part of
216 // DataLayout instead.
217 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
218}
219
220Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
221 if (!RewriteGEP)
222 return llvm::emitGEPOffset(&Builder, DL, GEP);
223
224 IRBuilderBase::InsertPointGuard Guard(Builder);
225 auto *Inst = dyn_cast<Instruction>(GEP);
226 if (Inst)
227 Builder.SetInsertPoint(Inst);
228
229 Value *Offset = EmitGEPOffset(GEP);
230 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
231 if (Inst && !GEP->hasAllConstantIndices() &&
232 !GEP->getSourceElementType()->isIntegerTy(8)) {
234 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
235 Offset, "", GEP->getNoWrapFlags()));
237 }
238 return Offset;
239}
240
241Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
242 GEPNoWrapFlags NW, Type *IdxTy,
243 bool RewriteGEPs) {
244 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
245 if (Sum)
246 return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
247 NW.isInBounds());
248 else
249 return Offset;
250 };
251
252 Value *Sum = nullptr;
253 Value *OneUseSum = nullptr;
254 Value *OneUseBase = nullptr;
255 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
256 for (GEPOperator *GEP : reverse(GEPs)) {
257 Value *Offset;
258 {
259 // Expand the offset at the point of the previous GEP to enable rewriting.
260 // However, use the original insertion point for calculating Sum.
261 IRBuilderBase::InsertPointGuard Guard(Builder);
262 auto *Inst = dyn_cast<Instruction>(GEP);
263 if (RewriteGEPs && Inst)
264 Builder.SetInsertPoint(Inst);
265
267 if (Offset->getType() != IdxTy)
268 Offset = Builder.CreateVectorSplat(
269 cast<VectorType>(IdxTy)->getElementCount(), Offset);
270 if (GEP->hasOneUse()) {
271 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
272 OneUseSum = Add(OneUseSum, Offset);
273 OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
274 if (!OneUseBase)
275 OneUseBase = GEP->getPointerOperand();
276 continue;
277 }
278
279 if (OneUseSum)
280 Offset = Add(OneUseSum, Offset);
281
282 // Rewrite the GEP to reuse the computed offset. This also includes
283 // offsets from preceding one-use GEPs of matched type.
284 if (RewriteGEPs && Inst &&
285 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
286 !(GEP->getSourceElementType()->isIntegerTy(8) &&
287 GEP->getOperand(1) == Offset)) {
289 *Inst,
290 Builder.CreatePtrAdd(
291 OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
292 OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
294 }
295 }
296
297 Sum = Add(Sum, Offset);
298 OneUseSum = OneUseBase = nullptr;
299 OneUseFlags = GEPNoWrapFlags::all();
300 }
301 if (OneUseSum)
302 Sum = Add(Sum, OneUseSum);
303 if (!Sum)
304 return Constant::getNullValue(IdxTy);
305 return Sum;
306}
307
308/// Legal integers and common types are considered desirable. This is used to
309/// avoid creating instructions with types that may not be supported well by the
310/// the backend.
311/// NOTE: This treats i8, i16 and i32 specially because they are common
312/// types in frontend languages.
313bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
314 switch (BitWidth) {
315 case 8:
316 case 16:
317 case 32:
318 return true;
319 default:
320 return DL.isLegalInteger(BitWidth);
321 }
322}
323
324/// Return true if it is desirable to convert an integer computation from a
325/// given bit width to a new bit width.
326/// We don't want to convert from a legal or desirable type (like i8) to an
327/// illegal type or from a smaller to a larger illegal type. A width of '1'
328/// is always treated as a desirable type because i1 is a fundamental type in
329/// IR, and there are many specialized optimizations for i1 types.
330/// Common/desirable widths are equally treated as legal to convert to, in
331/// order to open up more combining opportunities.
332bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
333 unsigned ToWidth) const {
334 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
335 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
336
337 // Convert to desirable widths even if they are not legal types.
338 // Only shrink types, to prevent infinite loops.
339 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
340 return true;
341
342 // If this is a legal or desiable integer from type, and the result would be
343 // an illegal type, don't do the transformation.
344 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
345 return false;
346
347 // Otherwise, if both are illegal, do not increase the size of the result. We
348 // do allow things like i160 -> i64, but not i64 -> i160.
349 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
350 return false;
351
352 return true;
353}
354
355/// Return true if it is desirable to convert a computation from 'From' to 'To'.
356/// We don't want to convert from a legal to an illegal type or from a smaller
357/// to a larger illegal type. i1 is always treated as a legal type because it is
358/// a fundamental type in IR, and there are many specialized optimizations for
359/// i1 types.
360bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
361 // TODO: This could be extended to allow vectors. Datalayout changes might be
362 // needed to properly support that.
363 if (!From->isIntegerTy() || !To->isIntegerTy())
364 return false;
365
366 unsigned FromWidth = From->getPrimitiveSizeInBits();
367 unsigned ToWidth = To->getPrimitiveSizeInBits();
368 return shouldChangeType(FromWidth, ToWidth);
369}
370
371// Return true, if No Signed Wrap should be maintained for I.
372// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
373// where both B and C should be ConstantInts, results in a constant that does
374// not overflow. This function only handles the Add/Sub/Mul opcodes. For
375// all other opcodes, the function conservatively returns false.
378 if (!OBO || !OBO->hasNoSignedWrap())
379 return false;
380
381 const APInt *BVal, *CVal;
382 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
383 return false;
384
385 // We reason about Add/Sub/Mul Only.
386 bool Overflow = false;
387 switch (I.getOpcode()) {
388 case Instruction::Add:
389 (void)BVal->sadd_ov(*CVal, Overflow);
390 break;
391 case Instruction::Sub:
392 (void)BVal->ssub_ov(*CVal, Overflow);
393 break;
394 case Instruction::Mul:
395 (void)BVal->smul_ov(*CVal, Overflow);
396 break;
397 default:
398 // Conservatively return false for other opcodes.
399 return false;
400 }
401 return !Overflow;
402}
403
406 return OBO && OBO->hasNoUnsignedWrap();
407}
408
411 return OBO && OBO->hasNoSignedWrap();
412}
413
414/// Combine constant operands of associative operations either before or after a
415/// cast to eliminate one of the associative operations:
416/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
417/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
419 InstCombinerImpl &IC) {
420 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
421 if (!Cast || !Cast->hasOneUse())
422 return false;
423
424 // TODO: Enhance logic for other casts and remove this check.
425 auto CastOpcode = Cast->getOpcode();
426 if (CastOpcode != Instruction::ZExt)
427 return false;
428
429 // TODO: Enhance logic for other BinOps and remove this check.
430 if (!BinOp1->isBitwiseLogicOp())
431 return false;
432
433 auto AssocOpcode = BinOp1->getOpcode();
434 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
435 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
436 return false;
437
438 Constant *C1, *C2;
439 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
440 !match(BinOp2->getOperand(1), m_Constant(C2)))
441 return false;
442
443 // TODO: This assumes a zext cast.
444 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
445 // to the destination type might lose bits.
446
447 // Fold the constants together in the destination type:
448 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
449 const DataLayout &DL = IC.getDataLayout();
450 Type *DestTy = C1->getType();
451 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
452 if (!CastC2)
453 return false;
454 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
455 if (!FoldedC)
456 return false;
457
458 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
459 IC.replaceOperand(*BinOp1, 1, FoldedC);
461 Cast->dropPoisonGeneratingFlags();
462 return true;
463}
464
465// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
466// inttoptr ( ptrtoint (x) ) --> x
467Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
468 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
469 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
470 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
471 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
472 Type *CastTy = IntToPtr->getDestTy();
473 if (PtrToInt &&
474 CastTy->getPointerAddressSpace() ==
475 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
476 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
477 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
478 return PtrToInt->getOperand(0);
479 }
480 return nullptr;
481}
482
483/// This performs a few simplifications for operators that are associative or
484/// commutative:
485///
486/// Commutative operators:
487///
488/// 1. Order operands such that they are listed from right (least complex) to
489/// left (most complex). This puts constants before unary operators before
490/// binary operators.
491///
492/// Associative operators:
493///
494/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
495/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
496///
497/// Associative and commutative operators:
498///
499/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
500/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
501/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
502/// if C1 and C2 are constants.
504 Instruction::BinaryOps Opcode = I.getOpcode();
505 bool Changed = false;
506
507 do {
508 // Order operands such that they are listed from right (least complex) to
509 // left (most complex). This puts constants before unary operators before
510 // binary operators.
511 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
512 getComplexity(I.getOperand(1)))
513 Changed = !I.swapOperands();
514
515 if (I.isCommutative()) {
516 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
517 replaceOperand(I, 0, Pair->first);
518 replaceOperand(I, 1, Pair->second);
519 Changed = true;
520 }
521 }
522
523 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
524 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
525
526 if (I.isAssociative()) {
527 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
528 if (Op0 && Op0->getOpcode() == Opcode) {
529 Value *A = Op0->getOperand(0);
530 Value *B = Op0->getOperand(1);
531 Value *C = I.getOperand(1);
532
533 // Does "B op C" simplify?
534 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
535 // It simplifies to V. Form "A op V".
536 replaceOperand(I, 0, A);
537 replaceOperand(I, 1, V);
538 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
539 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
540
541 // Conservatively clear all optional flags since they may not be
542 // preserved by the reassociation. Reset nsw/nuw based on the above
543 // analysis.
544 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(&I))
545 PDI->setIsDisjoint(false);
546
547 // Note: this is only valid because SimplifyBinOp doesn't look at
548 // the operands to Op0.
550 I.setHasNoUnsignedWrap(IsNUW);
551 I.setHasNoSignedWrap(IsNSW);
552 }
553
554 Changed = true;
555 ++NumReassoc;
556 continue;
557 }
558 }
559
560 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
561 if (Op1 && Op1->getOpcode() == Opcode) {
562 Value *A = I.getOperand(0);
563 Value *B = Op1->getOperand(0);
564 Value *C = Op1->getOperand(1);
565
566 // Does "A op B" simplify?
567 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
568 // It simplifies to V. Form "V op C".
569 replaceOperand(I, 0, V);
570 replaceOperand(I, 1, C);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
574 I.dropPoisonGeneratingFlags();
575 Changed = true;
576 ++NumReassoc;
577 continue;
578 }
579 }
580 }
581
582 if (I.isAssociative() && I.isCommutative()) {
583 if (simplifyAssocCastAssoc(&I, *this)) {
584 Changed = true;
585 ++NumReassoc;
586 continue;
587 }
588
589 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
590 if (Op0 && Op0->getOpcode() == Opcode) {
591 Value *A = Op0->getOperand(0);
592 Value *B = Op0->getOperand(1);
593 Value *C = I.getOperand(1);
594
595 // Does "C op A" simplify?
596 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
597 // It simplifies to V. Form "V op B".
598 replaceOperand(I, 0, V);
599 replaceOperand(I, 1, B);
600 // Conservatively clear the optional flags, since they may not be
601 // preserved by the reassociation.
603 I.dropPoisonGeneratingFlags();
604 Changed = true;
605 ++NumReassoc;
606 continue;
607 }
608 }
609
610 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
611 if (Op1 && Op1->getOpcode() == Opcode) {
612 Value *A = I.getOperand(0);
613 Value *B = Op1->getOperand(0);
614 Value *C = Op1->getOperand(1);
615
616 // Does "C op A" simplify?
617 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
618 // It simplifies to V. Form "B op V".
619 replaceOperand(I, 0, B);
620 replaceOperand(I, 1, V);
621 // Conservatively clear the optional flags, since they may not be
622 // preserved by the reassociation.
624 I.dropPoisonGeneratingFlags();
625 Changed = true;
626 ++NumReassoc;
627 continue;
628 }
629 }
630
631 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
632 // if C1 and C2 are constants.
633 Value *A, *B;
634 Constant *C1, *C2, *CRes;
635 if (Op0 && Op1 &&
636 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
637 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
638 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
639 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
640 bool IsNUW = hasNoUnsignedWrap(I) &&
641 hasNoUnsignedWrap(*Op0) &&
642 hasNoUnsignedWrap(*Op1);
643 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
644 BinaryOperator::CreateNUW(Opcode, A, B) :
645 BinaryOperator::Create(Opcode, A, B);
646
647 if (isa<FPMathOperator>(NewBO)) {
648 FastMathFlags Flags = I.getFastMathFlags() &
649 Op0->getFastMathFlags() &
650 Op1->getFastMathFlags();
651 NewBO->setFastMathFlags(Flags);
652 }
653 InsertNewInstWith(NewBO, I.getIterator());
654 NewBO->takeName(Op1);
655 replaceOperand(I, 0, NewBO);
656 replaceOperand(I, 1, CRes);
657 // Conservatively clear the optional flags, since they may not be
658 // preserved by the reassociation.
660 I.dropPoisonGeneratingFlags();
661 if (IsNUW)
662 I.setHasNoUnsignedWrap(true);
663
664 Changed = true;
665 continue;
666 }
667 }
668
669 // No further simplifications.
670 return Changed;
671 } while (true);
672}
673
674/// Return whether "X LOp (Y ROp Z)" is always equal to
675/// "(X LOp Y) ROp (X LOp Z)".
678 // X & (Y | Z) <--> (X & Y) | (X & Z)
679 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
680 if (LOp == Instruction::And)
681 return ROp == Instruction::Or || ROp == Instruction::Xor;
682
683 // X | (Y & Z) <--> (X | Y) & (X | Z)
684 if (LOp == Instruction::Or)
685 return ROp == Instruction::And;
686
687 // X * (Y + Z) <--> (X * Y) + (X * Z)
688 // X * (Y - Z) <--> (X * Y) - (X * Z)
689 if (LOp == Instruction::Mul)
690 return ROp == Instruction::Add || ROp == Instruction::Sub;
691
692 return false;
693}
694
695/// Return whether "(X LOp Y) ROp Z" is always equal to
696/// "(X ROp Z) LOp (Y ROp Z)".
700 return leftDistributesOverRight(ROp, LOp);
701
702 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
704
705 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
706 // but this requires knowing that the addition does not overflow and other
707 // such subtleties.
708}
709
710/// This function returns identity value for given opcode, which can be used to
711/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
713 if (isa<Constant>(V))
714 return nullptr;
715
716 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
717}
718
719/// This function predicates factorization using distributive laws. By default,
720/// it just returns the 'Op' inputs. But for special-cases like
721/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
722/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
723/// allow more factorization opportunities.
726 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
727 assert(Op && "Expected a binary operator");
728 LHS = Op->getOperand(0);
729 RHS = Op->getOperand(1);
730 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
731 Constant *C;
732 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
733 // X << C --> X * (1 << C)
735 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
736 assert(RHS && "Constant folding of immediate constants failed");
737 return Instruction::Mul;
738 }
739 // TODO: We can add other conversions e.g. shr => div etc.
740 }
741 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
742 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
744 // lshr nneg C, X --> ashr nneg C, X
745 return Instruction::AShr;
746 }
747 }
748 return Op->getOpcode();
749}
750
751/// This tries to simplify binary operations by factorizing out common terms
752/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
755 Instruction::BinaryOps InnerOpcode, Value *A,
756 Value *B, Value *C, Value *D) {
757 assert(A && B && C && D && "All values must be provided");
758
759 Value *V = nullptr;
760 Value *RetVal = nullptr;
761 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
762 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
763
764 // Does "X op' Y" always equal "Y op' X"?
765 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
766
767 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
768 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
769 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
770 // commutative case, "(A op' B) op (C op' A)"?
771 if (A == C || (InnerCommutative && A == D)) {
772 if (A != C)
773 std::swap(C, D);
774 // Consider forming "A op' (B op D)".
775 // If "B op D" simplifies then it can be formed with no cost.
776 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
777
778 // If "B op D" doesn't simplify then only go on if one of the existing
779 // operations "A op' B" and "C op' D" will be zapped as no longer used.
780 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
781 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
782 if (V)
783 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
784 }
785 }
786
787 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
788 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
789 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
790 // commutative case, "(A op' B) op (B op' D)"?
791 if (B == D || (InnerCommutative && B == C)) {
792 if (B != D)
793 std::swap(C, D);
794 // Consider forming "(A op C) op' B".
795 // If "A op C" simplifies then it can be formed with no cost.
796 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
797
798 // If "A op C" doesn't simplify then only go on if one of the existing
799 // operations "A op' B" and "C op' D" will be zapped as no longer used.
800 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
801 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
802 if (V)
803 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
804 }
805 }
806
807 if (!RetVal)
808 return nullptr;
809
810 ++NumFactor;
811 RetVal->takeName(&I);
812
813 // Try to add no-overflow flags to the final value.
814 if (isa<BinaryOperator>(RetVal)) {
815 bool HasNSW = false;
816 bool HasNUW = false;
818 HasNSW = I.hasNoSignedWrap();
819 HasNUW = I.hasNoUnsignedWrap();
820 }
821 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
822 HasNSW &= LOBO->hasNoSignedWrap();
823 HasNUW &= LOBO->hasNoUnsignedWrap();
824 }
825
826 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
827 HasNSW &= ROBO->hasNoSignedWrap();
828 HasNUW &= ROBO->hasNoUnsignedWrap();
829 }
830
831 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
832 // We can propagate 'nsw' if we know that
833 // %Y = mul nsw i16 %X, C
834 // %Z = add nsw i16 %Y, %X
835 // =>
836 // %Z = mul nsw i16 %X, C+1
837 //
838 // iff C+1 isn't INT_MIN
839 const APInt *CInt;
840 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
841 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
842
843 // nuw can be propagated with any constant or nuw value.
844 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
845 }
846 }
847 return RetVal;
848}
849
850// If `I` has one Const operand and the other matches `(ctpop (not x))`,
851// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
852// This is only useful is the new subtract can fold so we only handle the
853// following cases:
854// 1) (add/sub/disjoint_or C, (ctpop (not x))
855// -> (add/sub/disjoint_or C', (ctpop x))
856// 1) (cmp pred C, (ctpop (not x))
857// -> (cmp pred C', (ctpop x))
859 unsigned Opc = I->getOpcode();
860 unsigned ConstIdx = 1;
861 switch (Opc) {
862 default:
863 return nullptr;
864 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
865 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
866 // is constant.
867 case Instruction::Sub:
868 ConstIdx = 0;
869 break;
870 case Instruction::ICmp:
871 // Signed predicates aren't correct in some edge cases like for i2 types, as
872 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
873 // comparisons against it are simplfied to unsigned.
874 if (cast<ICmpInst>(I)->isSigned())
875 return nullptr;
876 break;
877 case Instruction::Or:
878 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
879 return nullptr;
880 [[fallthrough]];
881 case Instruction::Add:
882 break;
883 }
884
885 Value *Op;
886 // Find ctpop.
887 if (!match(I->getOperand(1 - ConstIdx), m_OneUse(m_Ctpop(m_Value(Op)))))
888 return nullptr;
889
890 Constant *C;
891 // Check other operand is ImmConstant.
892 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
893 return nullptr;
894
895 Type *Ty = Op->getType();
896 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
897 // Need extra check for icmp. Note if this check is true, it generally means
898 // the icmp will simplify to true/false.
899 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
900 Constant *Cmp =
902 if (!Cmp || !Cmp->isNullValue())
903 return nullptr;
904 }
905
906 // Check we can invert `(not x)` for free.
907 bool Consumes = false;
908 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
909 return nullptr;
910 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
911 assert(NotOp != nullptr &&
912 "Desync between isFreeToInvert and getFreelyInverted");
913
914 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
915
916 Value *R = nullptr;
917
918 // Do the transformation here to avoid potentially introducing an infinite
919 // loop.
920 switch (Opc) {
921 case Instruction::Sub:
922 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
923 break;
924 case Instruction::Or:
925 case Instruction::Add:
926 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
927 break;
928 case Instruction::ICmp:
929 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
930 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
931 break;
932 default:
933 llvm_unreachable("Unhandled Opcode");
934 }
935 assert(R != nullptr);
936 return replaceInstUsesWith(*I, R);
937}
938
939// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
940// IFF
941// 1) the logic_shifts match
942// 2) either both binops are binops and one is `and` or
943// BinOp1 is `and`
944// (logic_shift (inv_logic_shift C1, C), C) == C1 or
945//
946// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
947//
948// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
949// IFF
950// 1) the logic_shifts match
951// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
952//
953// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
954//
955// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
956// IFF
957// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
958// 2) Binop2 is `not`
959//
960// -> (arithmetic_shift Binop1((not X), Y), Amt)
961
963 const DataLayout &DL = I.getDataLayout();
964 auto IsValidBinOpc = [](unsigned Opc) {
965 switch (Opc) {
966 default:
967 return false;
968 case Instruction::And:
969 case Instruction::Or:
970 case Instruction::Xor:
971 case Instruction::Add:
972 // Skip Sub as we only match constant masks which will canonicalize to use
973 // add.
974 return true;
975 }
976 };
977
978 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
979 // constraints.
980 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
981 unsigned ShOpc) {
982 assert(ShOpc != Instruction::AShr);
983 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
984 ShOpc == Instruction::Shl;
985 };
986
987 auto GetInvShift = [](unsigned ShOpc) {
988 assert(ShOpc != Instruction::AShr);
989 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
990 };
991
992 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
993 unsigned ShOpc, Constant *CMask,
994 Constant *CShift) {
995 // If the BinOp1 is `and` we don't need to check the mask.
996 if (BinOpc1 == Instruction::And)
997 return true;
998
999 // For all other possible transfers we need complete distributable
1000 // binop/shift (anything but `add` + `lshr`).
1001 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
1002 return false;
1003
1004 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
1005 // vecs, otherwise the mask will be simplified and the following check will
1006 // handle it).
1007 if (BinOpc2 == Instruction::And)
1008 return true;
1009
1010 // Otherwise, need mask that meets the below requirement.
1011 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1012 Constant *MaskInvShift =
1013 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1014 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
1015 CMask;
1016 };
1017
1018 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1019 Constant *CMask, *CShift;
1020 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1021 if (!match(I.getOperand(ShOpnum),
1022 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
1023 return nullptr;
1024 if (!match(
1025 I.getOperand(1 - ShOpnum),
1028 m_Value(ShiftedX)),
1029 m_Value(Mask)))))
1030 return nullptr;
1031 // Make sure we are matching instruction shifts and not ConstantExpr
1032 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
1033 auto *IX = dyn_cast<Instruction>(ShiftedX);
1034 if (!IY || !IX)
1035 return nullptr;
1036
1037 // LHS and RHS need same shift opcode
1038 unsigned ShOpc = IY->getOpcode();
1039 if (ShOpc != IX->getOpcode())
1040 return nullptr;
1041
1042 // Make sure binop is real instruction and not ConstantExpr
1043 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
1044 if (!BO2)
1045 return nullptr;
1046
1047 unsigned BinOpc = BO2->getOpcode();
1048 // Make sure we have valid binops.
1049 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1050 return nullptr;
1051
1052 if (ShOpc == Instruction::AShr) {
1053 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
1054 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
1055 Value *NotX = Builder.CreateNot(X);
1056 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
1058 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
1059 }
1060
1061 return nullptr;
1062 }
1063
1064 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1065 // distribute to drop the shift irrelevant of constants.
1066 if (BinOpc == I.getOpcode() &&
1067 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1068 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
1069 Value *NewBinOp1 = Builder.CreateBinOp(
1070 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
1071 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
1072 }
1073
1074 // Otherwise we can only distribute by constant shifting the mask, so
1075 // ensure we have constants.
1076 if (!match(Shift, m_ImmConstant(CShift)))
1077 return nullptr;
1078 if (!match(Mask, m_ImmConstant(CMask)))
1079 return nullptr;
1080
1081 // Check if we can distribute the binops.
1082 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1083 return nullptr;
1084
1085 Constant *NewCMask =
1086 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1087 Value *NewBinOp2 = Builder.CreateBinOp(
1088 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1089 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1090 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1091 NewBinOp1, CShift);
1092 };
1093
1094 if (Instruction *R = MatchBinOp(0))
1095 return R;
1096 return MatchBinOp(1);
1097}
1098
1099// (Binop (zext C), (select C, T, F))
1100// -> (select C, (binop 1, T), (binop 0, F))
1101//
1102// (Binop (sext C), (select C, T, F))
1103// -> (select C, (binop -1, T), (binop 0, F))
1104//
1105// Attempt to simplify binary operations into a select with folded args, when
1106// one operand of the binop is a select instruction and the other operand is a
1107// zext/sext extension, whose value is the select condition.
1110 // TODO: this simplification may be extended to any speculatable instruction,
1111 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1112 Instruction::BinaryOps Opc = I.getOpcode();
1113 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1114 Value *A, *CondVal, *TrueVal, *FalseVal;
1115 Value *CastOp;
1116 Constant *CastTrueVal, *CastFalseVal;
1117
1118 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1119 return match(CastOp, m_SelectLike(m_Value(A), m_Constant(CastTrueVal),
1120 m_Constant(CastFalseVal))) &&
1121 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1122 m_Value(FalseVal)));
1123 };
1124
1125 // Make sure one side of the binop is a select instruction, and the other is a
1126 // zero/sign extension operating on a i1.
1127 if (MatchSelectAndCast(LHS, RHS))
1128 CastOp = LHS;
1129 else if (MatchSelectAndCast(RHS, LHS))
1130 CastOp = RHS;
1131 else
1132 return nullptr;
1133
1135 ? nullptr
1136 : cast<SelectInst>(CastOp == LHS ? RHS : LHS);
1137
1138 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1139 bool IsCastOpRHS = (CastOp == RHS);
1140 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1141
1142 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, CastVal)
1143 : Builder.CreateBinOp(Opc, CastVal, V);
1144 };
1145
1146 // If the value used in the zext/sext is the select condition, or the negated
1147 // of the select condition, the binop can be simplified.
1148 if (CondVal == A) {
1149 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1150 return SelectInst::Create(CondVal, NewTrueVal,
1151 NewFoldedConst(true, FalseVal), "", nullptr, SI);
1152 }
1153 if (match(A, m_Not(m_Specific(CondVal)))) {
1154 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1155 return SelectInst::Create(CondVal, NewTrueVal,
1156 NewFoldedConst(false, FalseVal), "", nullptr, SI);
1157 }
1158
1159 return nullptr;
1160}
1161
1163 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1166 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1167 Value *A, *B, *C, *D;
1168 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1169
1170 if (Op0)
1171 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1172 if (Op1)
1173 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1174
1175 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1176 // a common term.
1177 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1178 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1179 return V;
1180
1181 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1182 // term.
1183 if (Op0)
1184 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1185 if (Value *V =
1186 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1187 return V;
1188
1189 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1190 // term.
1191 if (Op1)
1192 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1193 if (Value *V =
1194 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1195 return V;
1196
1197 return nullptr;
1198}
1199
1200/// This tries to simplify binary operations which some other binary operation
1201/// distributes over either by factorizing out common terms
1202/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1203/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1204/// Returns the simplified value, or null if it didn't simplify.
1206 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1209 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1210
1211 // Factorization.
1212 if (Value *R = tryFactorizationFolds(I))
1213 return R;
1214
1215 // Expansion.
1216 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1217 // The instruction has the form "(A op' B) op C". See if expanding it out
1218 // to "(A op C) op' (B op C)" results in simplifications.
1219 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1220 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1221
1222 // Disable the use of undef because it's not safe to distribute undef.
1223 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1224 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1225 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1226
1227 // Do "A op C" and "B op C" both simplify?
1228 if (L && R) {
1229 // They do! Return "L op' R".
1230 ++NumExpand;
1231 C = Builder.CreateBinOp(InnerOpcode, L, R);
1232 C->takeName(&I);
1233 return C;
1234 }
1235
1236 // Does "A op C" simplify to the identity value for the inner opcode?
1237 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1238 // They do! Return "B op C".
1239 ++NumExpand;
1240 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1241 C->takeName(&I);
1242 return C;
1243 }
1244
1245 // Does "B op C" simplify to the identity value for the inner opcode?
1246 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1247 // They do! Return "A op C".
1248 ++NumExpand;
1249 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1250 C->takeName(&I);
1251 return C;
1252 }
1253 }
1254
1255 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1256 // The instruction has the form "A op (B op' C)". See if expanding it out
1257 // to "(A op B) op' (A op C)" results in simplifications.
1258 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1259 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1260
1261 // Disable the use of undef because it's not safe to distribute undef.
1262 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1263 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1264 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1265
1266 // Do "A op B" and "A op C" both simplify?
1267 if (L && R) {
1268 // They do! Return "L op' R".
1269 ++NumExpand;
1270 A = Builder.CreateBinOp(InnerOpcode, L, R);
1271 A->takeName(&I);
1272 return A;
1273 }
1274
1275 // Does "A op B" simplify to the identity value for the inner opcode?
1276 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1277 // They do! Return "A op C".
1278 ++NumExpand;
1279 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1280 A->takeName(&I);
1281 return A;
1282 }
1283
1284 // Does "A op C" simplify to the identity value for the inner opcode?
1285 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1286 // They do! Return "A op B".
1287 ++NumExpand;
1288 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1289 A->takeName(&I);
1290 return A;
1291 }
1292 }
1293
1294 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1295}
1296
1297static std::optional<std::pair<Value *, Value *>>
1299 if (LHS->getParent() != RHS->getParent())
1300 return std::nullopt;
1301
1302 if (LHS->getNumIncomingValues() < 2)
1303 return std::nullopt;
1304
1305 if (!equal(LHS->blocks(), RHS->blocks()))
1306 return std::nullopt;
1307
1308 Value *L0 = LHS->getIncomingValue(0);
1309 Value *R0 = RHS->getIncomingValue(0);
1310
1311 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1312 Value *L1 = LHS->getIncomingValue(I);
1313 Value *R1 = RHS->getIncomingValue(I);
1314
1315 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1316 continue;
1317
1318 return std::nullopt;
1319 }
1320
1321 return std::optional(std::pair(L0, R0));
1322}
1323
1324std::optional<std::pair<Value *, Value *>>
1325InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1328 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1329 return std::nullopt;
1330 switch (LHSInst->getOpcode()) {
1331 case Instruction::PHI:
1333 case Instruction::Select: {
1334 Value *Cond = LHSInst->getOperand(0);
1335 Value *TrueVal = LHSInst->getOperand(1);
1336 Value *FalseVal = LHSInst->getOperand(2);
1337 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1338 FalseVal == RHSInst->getOperand(1))
1339 return std::pair(TrueVal, FalseVal);
1340 return std::nullopt;
1341 }
1342 case Instruction::Call: {
1343 // Match min(a, b) and max(a, b)
1344 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1345 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1346 if (LHSMinMax && RHSMinMax &&
1347 LHSMinMax->getPredicate() ==
1349 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1350 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1351 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1353 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1354 return std::nullopt;
1355 }
1356 default:
1357 return std::nullopt;
1358 }
1359}
1360
1362 Value *LHS,
1363 Value *RHS) {
1364 Value *A, *B, *C, *D, *E, *F;
1365 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1366 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1367 if (!LHSIsSelect && !RHSIsSelect)
1368 return nullptr;
1369
1371 ? nullptr
1372 : cast<SelectInst>(LHSIsSelect ? LHS : RHS);
1373
1374 FastMathFlags FMF;
1376 if (const auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
1377 FMF = FPOp->getFastMathFlags();
1378 Builder.setFastMathFlags(FMF);
1379 }
1380
1381 Instruction::BinaryOps Opcode = I.getOpcode();
1382 SimplifyQuery Q = SQ.getWithInstruction(&I);
1383
1384 Value *Cond, *True = nullptr, *False = nullptr;
1385
1386 // Special-case for add/negate combination. Replace the zero in the negation
1387 // with the trailing add operand:
1388 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1389 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1390 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1391 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1392 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1393 return nullptr;
1394 Value *N;
1395 if (True && match(FVal, m_Neg(m_Value(N)))) {
1396 Value *Sub = Builder.CreateSub(Z, N);
1397 return Builder.CreateSelect(Cond, True, Sub, I.getName(), SI);
1398 }
1399 if (False && match(TVal, m_Neg(m_Value(N)))) {
1400 Value *Sub = Builder.CreateSub(Z, N);
1401 return Builder.CreateSelect(Cond, Sub, False, I.getName(), SI);
1402 }
1403 return nullptr;
1404 };
1405
1406 if (LHSIsSelect && RHSIsSelect && A == D) {
1407 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1408 Cond = A;
1409 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1410 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1411
1412 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1413 if (False && !True)
1414 True = Builder.CreateBinOp(Opcode, B, E);
1415 else if (True && !False)
1416 False = Builder.CreateBinOp(Opcode, C, F);
1417 }
1418 } else if (LHSIsSelect && LHS->hasOneUse()) {
1419 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1420 Cond = A;
1421 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1422 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(B, C, RHS))
1424 return NewSel;
1425 } else if (RHSIsSelect && RHS->hasOneUse()) {
1426 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1427 Cond = D;
1428 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1429 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1430 if (Value *NewSel = foldAddNegate(E, F, LHS))
1431 return NewSel;
1432 }
1433
1434 if (!True || !False)
1435 return nullptr;
1436
1437 Value *NewSI = Builder.CreateSelect(Cond, True, False, I.getName(), SI);
1438 NewSI->takeName(&I);
1439 return NewSI;
1440}
1441
1442/// Freely adapt every user of V as-if V was changed to !V.
1443/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1445 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1446 for (User *U : make_early_inc_range(I->users())) {
1447 if (U == IgnoredUser)
1448 continue; // Don't consider this user.
1449 switch (cast<Instruction>(U)->getOpcode()) {
1450 case Instruction::Select: {
1451 auto *SI = cast<SelectInst>(U);
1452 SI->swapValues();
1453 SI->swapProfMetadata();
1454 break;
1455 }
1456 case Instruction::CondBr: {
1458 BI->swapSuccessors(); // swaps prof metadata too
1459 if (BPI)
1460 BPI->swapSuccEdgesProbabilities(BI->getParent());
1461 break;
1462 }
1463 case Instruction::Xor:
1465 // Add to worklist for DCE.
1467 break;
1468 default:
1469 llvm_unreachable("Got unexpected user - out of sync with "
1470 "canFreelyInvertAllUsersOf() ?");
1471 }
1472 }
1473
1474 // Update pre-existing debug value uses.
1475 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1476 llvm::findDbgValues(I, DbgVariableRecords);
1477
1478 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1479 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1480 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1481 Idx != End; ++Idx)
1482 if (DbgVal->getVariableLocationOp(Idx) == I)
1483 DbgVal->setExpression(
1484 DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
1485 }
1486}
1487
1488/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1489/// constant zero (which is the 'negate' form).
1490Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1491 Value *NegV;
1492 if (match(V, m_Neg(m_Value(NegV))))
1493 return NegV;
1494
1495 // Constants can be considered to be negated values if they can be folded.
1497 return ConstantExpr::getNeg(C);
1498
1500 if (C->getType()->getElementType()->isIntegerTy())
1501 return ConstantExpr::getNeg(C);
1502
1504 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1505 Constant *Elt = CV->getAggregateElement(i);
1506 if (!Elt)
1507 return nullptr;
1508
1509 if (isa<UndefValue>(Elt))
1510 continue;
1511
1512 if (!isa<ConstantInt>(Elt))
1513 return nullptr;
1514 }
1515 return ConstantExpr::getNeg(CV);
1516 }
1517
1518 // Negate integer vector splats.
1519 if (auto *CV = dyn_cast<Constant>(V))
1520 if (CV->getType()->isVectorTy() &&
1521 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1522 return ConstantExpr::getNeg(CV);
1523
1524 return nullptr;
1525}
1526
1527// Try to fold:
1528// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1529// -> ({s|u}itofp (int_binop x, y))
1530// 2) (fp_binop ({s|u}itofp x), FpC)
1531// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1532//
1533// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1534Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1535 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1537
1538 Type *FPTy = BO.getType();
1539 Type *IntTy = IntOps[0]->getType();
1540
1541 unsigned IntSz = IntTy->getScalarSizeInBits();
1542 // This is the maximum number of inuse bits by the integer where the int -> fp
1543 // casts are exact.
1544 unsigned MaxRepresentableBits =
1546
1547 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1548 // checks later on.
1549 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1550
1551 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1552 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1553 auto IsNonZero = [&](unsigned OpNo) -> bool {
1554 if (OpsKnown[OpNo].hasKnownBits() &&
1555 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1556 return true;
1557 return isKnownNonZero(IntOps[OpNo], SQ);
1558 };
1559
1560 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1561 // NB: This matches the impl in ValueTracking, we just try to use cached
1562 // knownbits here. If we ever start supporting WithCache for
1563 // `isKnownNonNegative`, change this to an explicit call.
1564 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1565 };
1566
1567 // Check if we know for certain that ({s|u}itofp op) is exact.
1568 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1569 // Can we treat this operand as the desired sign?
1570 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1571 !IsNonNeg(OpNo))
1572 return false;
1573
1574 // If fp precision >= bitwidth(op) then its exact.
1575 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1576 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1577 // handled specially. We can't, however, increase the bound arbitrarily for
1578 // `sitofp` as for larger sizes, it won't sign extend.
1579 if (MaxRepresentableBits < IntSz) {
1580 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1581 // numSignBits(op).
1582 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1583 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1584 if (OpsFromSigned)
1585 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1586 // Finally for unsigned check that fp precision >= bitwidth(op) -
1587 // numLeadingZeros(op).
1588 else {
1589 NumUsedLeadingBits[OpNo] =
1590 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1591 }
1592 }
1593 // NB: We could also check if op is known to be a power of 2 or zero (which
1594 // will always be representable). Its unlikely, however, that is we are
1595 // unable to bound op in any way we will be able to pass the overflow checks
1596 // later on.
1597
1598 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1599 return false;
1600 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1601 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1602 IsNonZero(OpNo);
1603 };
1604
1605 // If we have a constant rhs, see if we can losslessly convert it to an int.
1606 if (Op1FpC != nullptr) {
1607 // Signed + Mul req non-zero
1608 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1609 !match(Op1FpC, m_NonZeroFP()))
1610 return nullptr;
1611
1613 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1614 IntTy, DL);
1615 if (Op1IntC == nullptr)
1616 return nullptr;
1617 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1618 : Instruction::UIToFP,
1619 Op1IntC, FPTy, DL) != Op1FpC)
1620 return nullptr;
1621
1622 // First try to keep sign of cast the same.
1623 IntOps[1] = Op1IntC;
1624 }
1625
1626 // Ensure lhs/rhs integer types match.
1627 if (IntTy != IntOps[1]->getType())
1628 return nullptr;
1629
1630 if (Op1FpC == nullptr) {
1631 if (!IsValidPromotion(1))
1632 return nullptr;
1633 }
1634 if (!IsValidPromotion(0))
1635 return nullptr;
1636
1637 // Final we check if the integer version of the binop will not overflow.
1639 // Because of the precision check, we can often rule out overflows.
1640 bool NeedsOverflowCheck = true;
1641 // Try to conservatively rule out overflow based on the already done precision
1642 // checks.
1643 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1644 unsigned OverflowMaxCurBits =
1645 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1646 bool OutputSigned = OpsFromSigned;
1647 switch (BO.getOpcode()) {
1648 case Instruction::FAdd:
1649 IntOpc = Instruction::Add;
1650 OverflowMaxOutputBits += OverflowMaxCurBits;
1651 break;
1652 case Instruction::FSub:
1653 IntOpc = Instruction::Sub;
1654 OverflowMaxOutputBits += OverflowMaxCurBits;
1655 break;
1656 case Instruction::FMul:
1657 IntOpc = Instruction::Mul;
1658 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1659 break;
1660 default:
1661 llvm_unreachable("Unsupported binop");
1662 }
1663 // The precision check may have already ruled out overflow.
1664 if (OverflowMaxOutputBits < IntSz) {
1665 NeedsOverflowCheck = false;
1666 // We can bound unsigned overflow from sub to in range signed value (this is
1667 // what allows us to avoid the overflow check for sub).
1668 if (IntOpc == Instruction::Sub)
1669 OutputSigned = true;
1670 }
1671
1672 // Precision check did not rule out overflow, so need to check.
1673 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1674 // `IntOps[...]` arguments to `KnownOps[...]`.
1675 if (NeedsOverflowCheck &&
1676 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1677 return nullptr;
1678
1679 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1680 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1681 IntBO->setHasNoSignedWrap(OutputSigned);
1682 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1683 }
1684 if (OutputSigned)
1685 return new SIToFPInst(IntBinOp, FPTy);
1686 return new UIToFPInst(IntBinOp, FPTy);
1687}
1688
1689// Try to fold:
1690// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1691// -> ({s|u}itofp (int_binop x, y))
1692// 2) (fp_binop ({s|u}itofp x), FpC)
1693// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1694Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1695 // Don't perform the fold on vectors, as the integer operation may be much
1696 // more expensive than the float operation in that case.
1697 if (BO.getType()->isVectorTy())
1698 return nullptr;
1699
1700 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1701 Constant *Op1FpC = nullptr;
1702 // Check for:
1703 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1704 // 2) (binop ({s|u}itofp x), FpC)
1705 if (!match(BO.getOperand(0), m_IToFP(m_Value(IntOps[0]))))
1706 return nullptr;
1707
1708 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1709 !match(BO.getOperand(1), m_IToFP(m_Value(IntOps[1]))))
1710 return nullptr;
1711
1712 // Cache KnownBits a bit to potentially save some analysis.
1713 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1714
1715 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1716 // different constraints depending on the sign of the cast.
1717 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1718 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1719 IntOps, Op1FpC, OpsKnown))
1720 return R;
1721 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1722 Op1FpC, OpsKnown);
1723}
1724
1725/// A binop with a constant operand and a sign-extended boolean operand may be
1726/// converted into a select of constants by applying the binary operation to
1727/// the constant with the two possible values of the extended boolean (0 or -1).
1728Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1729 // TODO: Handle non-commutative binop (constant is operand 0).
1730 // TODO: Handle zext.
1731 // TODO: Peek through 'not' of cast.
1732 Value *BO0 = BO.getOperand(0);
1733 Value *BO1 = BO.getOperand(1);
1734 Value *X;
1735 Constant *C;
1736 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1737 !X->getType()->isIntOrIntVectorTy(1))
1738 return nullptr;
1739
1740 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1743 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1744 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1745 return createSelectInstWithUnknownProfile(X, TVal, FVal);
1746}
1747
1749 bool IsTrueArm) {
1751 for (Value *Op : I.operands()) {
1752 Value *V = nullptr;
1753 if (Op == SI) {
1754 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1755 } else if (match(SI->getCondition(),
1758 m_Specific(Op), m_Value(V))) &&
1760 // Pass
1761 } else if (match(Op, m_ZExt(m_Specific(SI->getCondition())))) {
1762 V = IsTrueArm ? ConstantInt::get(Op->getType(), 1)
1763 : ConstantInt::getNullValue(Op->getType());
1764 } else {
1765 V = Op;
1766 }
1767 Ops.push_back(V);
1768 }
1769
1770 return simplifyInstructionWithOperands(&I, Ops, I.getDataLayout());
1771}
1772
1774 Value *NewOp, InstCombiner &IC) {
1775 Instruction *Clone = I.clone();
1776 Clone->replaceUsesOfWith(SI, NewOp);
1778 IC.InsertNewInstBefore(Clone, I.getIterator());
1779 return Clone;
1780}
1781
1783 bool FoldWithMultiUse,
1784 bool SimplifyBothArms) {
1785 // Don't modify shared select instructions unless set FoldWithMultiUse
1786 if (!SI->hasOneUser() && !FoldWithMultiUse)
1787 return nullptr;
1788
1789 Value *TV = SI->getTrueValue();
1790 Value *FV = SI->getFalseValue();
1791
1792 // Bool selects with constant operands can be folded to logical ops.
1793 if (SI->getType()->isIntOrIntVectorTy(1))
1794 return nullptr;
1795
1796 // Avoid breaking min/max reduction pattern,
1797 // which is necessary for vectorization later.
1799 for (Value *IntrinOp : Op.operands())
1800 if (auto *PN = dyn_cast<PHINode>(IntrinOp))
1801 for (Value *PhiOp : PN->operands())
1802 if (PhiOp == &Op)
1803 return nullptr;
1804
1805 // Test if a FCmpInst instruction is used exclusively by a select as
1806 // part of a minimum or maximum operation. If so, refrain from doing
1807 // any other folding. This helps out other analyses which understand
1808 // non-obfuscated minimum and maximum idioms. And in this case, at
1809 // least one of the comparison operands has at least one user besides
1810 // the compare (the select), which would often largely negate the
1811 // benefit of folding anyway.
1812 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1813 if (CI->hasOneUse()) {
1814 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1815 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1816 !CI->isCommutative())
1817 return nullptr;
1818 }
1819 }
1820
1821 // Make sure that one of the select arms folds successfully.
1822 Value *NewTV = simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/true);
1823 Value *NewFV =
1824 simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/false);
1825 if (!NewTV && !NewFV)
1826 return nullptr;
1827
1828 if (SimplifyBothArms && !(NewTV && NewFV))
1829 return nullptr;
1830
1831 // Create an instruction for the arm that did not fold.
1832 if (!NewTV)
1833 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1834 if (!NewFV)
1835 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1836
1837 SelectInst *NewSel = SelectInst::Create(SI->getCondition(), NewTV, NewFV);
1838
1839 // Preserve metadata that remains valid for the transformed select.
1840 NewSel->copyMetadata(*SI,
1841 {LLVMContext::MD_prof, LLVMContext::MD_unpredictable});
1842
1843 // Preserve source location information.
1844 NewSel->setDebugLoc(SI->getDebugLoc());
1845
1846 return NewSel;
1847}
1848
1850 Value *InValue, BasicBlock *InBB,
1851 const DataLayout &DL,
1852 const SimplifyQuery SQ) {
1853 // NB: It is a precondition of this transform that the operands be
1854 // phi translatable!
1856 for (Value *Op : I.operands()) {
1857 if (Op == PN)
1858 Ops.push_back(InValue);
1859 else
1860 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1861 }
1862
1863 // Don't consider the simplification successful if we get back a constant
1864 // expression. That's just an instruction in hiding.
1865 // Also reject the case where we simplify back to the phi node. We wouldn't
1866 // be able to remove it in that case.
1868 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1869 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1870 return NewVal;
1871
1872 // Check if incoming PHI value can be replaced with constant
1873 // based on implied condition.
1874 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(InBB->getTerminator());
1875 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1876 if (TerminatorBI &&
1877 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1878 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1879 std::optional<bool> ImpliedCond = isImpliedCondition(
1880 TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1],
1881 DL, LHSIsTrue);
1882 if (ImpliedCond)
1883 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1884 }
1885
1886 return nullptr;
1887}
1888
1889/// In some cases it is beneficial to fold a select into a binary operator.
1890/// For example:
1891/// %1 = or %in, 4
1892/// %2 = select %cond, %1, %in
1893/// %3 = or %2, 1
1894/// =>
1895/// %1 = select i1 %cond, 5, 1
1896/// %2 = or %1, %in
1898 assert(Op.isAssociative() && "The operation must be associative!");
1899
1900 SelectInst *SI = dyn_cast<SelectInst>(Op.getOperand(0));
1901
1902 Constant *Const;
1903 if (!SI || !match(Op.getOperand(1), m_ImmConstant(Const)) ||
1904 !Op.hasOneUse() || !SI->hasOneUse())
1905 return nullptr;
1906
1907 Value *TV = SI->getTrueValue();
1908 Value *FV = SI->getFalseValue();
1909 Value *Input, *NewTV, *NewFV;
1910 Constant *Const2;
1911
1912 if (TV->hasOneUse() && match(TV, m_BinOp(Op.getOpcode(), m_Specific(FV),
1913 m_ImmConstant(Const2)))) {
1914 NewTV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1915 NewFV = Const;
1916 Input = FV;
1917 } else if (FV->hasOneUse() &&
1918 match(FV, m_BinOp(Op.getOpcode(), m_Specific(TV),
1919 m_ImmConstant(Const2)))) {
1920 NewTV = Const;
1921 NewFV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1922 Input = TV;
1923 } else
1924 return nullptr;
1925
1926 if (!NewTV || !NewFV)
1927 return nullptr;
1928
1929 Value *NewSI =
1930 Builder.CreateSelect(SI->getCondition(), NewTV, NewFV, "",
1931 ProfcheckDisableMetadataFixes ? nullptr : SI);
1932 return BinaryOperator::Create(Op.getOpcode(), NewSI, Input);
1933}
1934
1936 bool AllowMultipleUses) {
1937 unsigned NumPHIValues = PN->getNumIncomingValues();
1938 if (NumPHIValues == 0)
1939 return nullptr;
1940
1941 // We normally only transform phis with a single use. However, if a PHI has
1942 // multiple uses and they are all the same operation, we can fold *all* of the
1943 // uses into the PHI.
1944 bool OneUse = PN->hasOneUse();
1945 bool IdenticalUsers = false;
1946 if (!AllowMultipleUses && !OneUse) {
1947 // Walk the use list for the instruction, comparing them to I.
1948 for (User *U : PN->users()) {
1950 if (UI != &I && !I.isIdenticalTo(UI))
1951 return nullptr;
1952 }
1953 // Otherwise, we can replace *all* users with the new PHI we form.
1954 IdenticalUsers = true;
1955 }
1956
1957 // Check that all operands are phi-translatable.
1958 for (Value *Op : I.operands()) {
1959 if (Op == PN)
1960 continue;
1961
1962 // Non-instructions never require phi-translation.
1963 auto *I = dyn_cast<Instruction>(Op);
1964 if (!I)
1965 continue;
1966
1967 // Phi-translate can handle phi nodes in the same block.
1968 if (isa<PHINode>(I))
1969 if (I->getParent() == PN->getParent())
1970 continue;
1971
1972 // Operand dominates the block, no phi-translation necessary.
1973 if (DT.dominates(I, PN->getParent()))
1974 continue;
1975
1976 // Not phi-translatable, bail out.
1977 return nullptr;
1978 }
1979
1980 // Check to see whether the instruction can be folded into each phi operand.
1981 // If there is one operand that does not fold, remember the BB it is in.
1982 SmallVector<Value *> NewPhiValues;
1983 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1984 bool SeenNonSimplifiedInVal = false;
1985 for (unsigned i = 0; i != NumPHIValues; ++i) {
1986 Value *InVal = PN->getIncomingValue(i);
1987 BasicBlock *InBB = PN->getIncomingBlock(i);
1988
1989 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1990 NewPhiValues.push_back(NewVal);
1991 continue;
1992 }
1993
1994 // Handle some cases that can't be fully simplified, but where we know that
1995 // the two instructions will fold into one.
1996 auto WillFold = [&]() {
1997 if (!InVal->hasUseList() || !InVal->hasOneUser())
1998 return false;
1999
2000 // icmp of ucmp/scmp with constant will fold to icmp.
2001 const APInt *Ignored;
2002 if (isa<CmpIntrinsic>(InVal) &&
2003 match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
2004 return true;
2005
2006 // icmp eq zext(bool), 0 will fold to !bool.
2007 if (isa<ZExtInst>(InVal) &&
2008 cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
2009 match(&I,
2011 return true;
2012
2013 return false;
2014 };
2015
2016 if (WillFold()) {
2017 OpsToMoveUseToIncomingBB.push_back(i);
2018 NewPhiValues.push_back(nullptr);
2019 continue;
2020 }
2021
2022 if (!OneUse && !IdenticalUsers)
2023 return nullptr;
2024
2025 if (SeenNonSimplifiedInVal)
2026 return nullptr; // More than one non-simplified value.
2027 SeenNonSimplifiedInVal = true;
2028
2029 // If there is exactly one non-simplified value, we can insert a copy of the
2030 // operation in that block. However, if this is a critical edge, we would
2031 // be inserting the computation on some other paths (e.g. inside a loop).
2032 // Only do this if the pred block is unconditionally branching into the phi
2033 // block. Also, make sure that the pred block is not dead code.
2035 if (!BI || !DT.isReachableFromEntry(InBB))
2036 return nullptr;
2037
2038 NewPhiValues.push_back(nullptr);
2039 OpsToMoveUseToIncomingBB.push_back(i);
2040
2041 // Do not push the operation across a loop backedge. This could result in
2042 // an infinite combine loop, and is generally non-profitable (especially
2043 // if the operation was originally outside the loop).
2044 if (isBackEdge(InBB, PN->getParent()))
2045 return nullptr;
2046 }
2047
2048 // Clone the instruction that uses the phi node and move it into the incoming
2049 // BB because we know that the next iteration of InstCombine will simplify it.
2051 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2053 BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
2054
2055 Instruction *Clone = Clones.lookup(OpBB);
2056 if (!Clone) {
2057 Clone = I.clone();
2058 for (Use &U : Clone->operands()) {
2059 if (U == PN)
2060 U = Op;
2061 else
2062 U = U->DoPHITranslation(PN->getParent(), OpBB);
2063 }
2064 Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
2065 Clones.insert({OpBB, Clone});
2066 // We may have speculated the instruction.
2068 }
2069
2070 NewPhiValues[OpIndex] = Clone;
2071 }
2072
2073 // Okay, we can do the transformation: create the new PHI node.
2074 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
2075 InsertNewInstBefore(NewPN, PN->getIterator());
2076 NewPN->takeName(PN);
2077 NewPN->setDebugLoc(PN->getDebugLoc());
2078
2079 for (unsigned i = 0; i != NumPHIValues; ++i)
2080 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
2081
2082 if (IdenticalUsers) {
2083 // Collect and deduplicate users up-front to avoid iterator invalidation.
2085 for (User *U : PN->users()) {
2087 if (User == &I)
2088 continue;
2089 ToReplace.insert(User);
2090 }
2091 for (Instruction *I : ToReplace) {
2092 replaceInstUsesWith(*I, NewPN);
2094 }
2095 OneUse = true;
2096 }
2097
2098 if (OneUse) {
2099 replaceAllDbgUsesWith(*PN, *NewPN, *PN, DT);
2100 }
2101 return replaceInstUsesWith(I, NewPN);
2102}
2103
2105 if (!BO.isAssociative())
2106 return nullptr;
2107
2108 // Find the interleaved binary ops.
2109 auto Opc = BO.getOpcode();
2110 auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
2111 auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
2112 if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
2113 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2114 !BO0->isAssociative() || !BO1->isAssociative() ||
2115 BO0->getParent() != BO1->getParent())
2116 return nullptr;
2117
2118 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2119 "Expected commutative instructions!");
2120
2121 // Find the matching phis, forming the recurrences.
2122 PHINode *PN0, *PN1;
2123 Value *Start0, *Step0, *Start1, *Step1;
2124 if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
2125 !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
2126 PN0->getParent() != PN1->getParent())
2127 return nullptr;
2128
2129 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2130 "Expected PHIs with two incoming values!");
2131
2132 // Convert the start and step values to constants.
2133 auto *Init0 = dyn_cast<Constant>(Start0);
2134 auto *Init1 = dyn_cast<Constant>(Start1);
2135 auto *C0 = dyn_cast<Constant>(Step0);
2136 auto *C1 = dyn_cast<Constant>(Step1);
2137 if (!Init0 || !Init1 || !C0 || !C1)
2138 return nullptr;
2139
2140 // Fold the recurrence constants.
2141 auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
2142 auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
2143 if (!Init || !C)
2144 return nullptr;
2145
2146 // Create the reduced PHI.
2147 auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
2148 "reduced.phi");
2149
2150 // Create the new binary op.
2151 auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
2152 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2153 // Intersect FMF flags for FADD and FMUL.
2154 FastMathFlags Intersect = BO0->getFastMathFlags() &
2155 BO1->getFastMathFlags() & BO.getFastMathFlags();
2156 NewBO->setFastMathFlags(Intersect);
2157 } else {
2158 OverflowTracking Flags;
2159 Flags.AllKnownNonNegative = false;
2160 Flags.AllKnownNonZero = false;
2161 Flags.mergeFlags(*BO0);
2162 Flags.mergeFlags(*BO1);
2163 Flags.mergeFlags(BO);
2164 Flags.applyFlags(*NewBO);
2165 }
2166 NewBO->takeName(&BO);
2167
2168 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2169 auto *V = PN0->getIncomingValue(I);
2170 auto *BB = PN0->getIncomingBlock(I);
2171 if (V == Init0) {
2172 assert(((PN1->getIncomingValue(0) == Init1 &&
2173 PN1->getIncomingBlock(0) == BB) ||
2174 (PN1->getIncomingValue(1) == Init1 &&
2175 PN1->getIncomingBlock(1) == BB)) &&
2176 "Invalid incoming block!");
2177 NewPN->addIncoming(Init, BB);
2178 } else if (V == BO0) {
2179 assert(((PN1->getIncomingValue(0) == BO1 &&
2180 PN1->getIncomingBlock(0) == BB) ||
2181 (PN1->getIncomingValue(1) == BO1 &&
2182 PN1->getIncomingBlock(1) == BB)) &&
2183 "Invalid incoming block!");
2184 NewPN->addIncoming(NewBO, BB);
2185 } else
2186 llvm_unreachable("Unexpected incoming value!");
2187 }
2188
2189 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2190 << "\n with " << *PN1 << "\n " << *BO1
2191 << '\n');
2192
2193 // Insert the new recurrence and remove the old (dead) ones.
2194 InsertNewInstWith(NewPN, PN0->getIterator());
2195 InsertNewInstWith(NewBO, BO0->getIterator());
2196
2203
2204 return replaceInstUsesWith(BO, NewBO);
2205}
2206
2208 // Attempt to fold binary operators whose operands are simple recurrences.
2209 if (auto *NewBO = foldBinopWithRecurrence(BO))
2210 return NewBO;
2211
2212 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2213 // we are guarding against replicating the binop in >1 predecessor.
2214 // This could miss matching a phi with 2 constant incoming values.
2215 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
2216 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
2217 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2218 Phi0->getNumOperands() != Phi1->getNumOperands())
2219 return nullptr;
2220
2221 // TODO: Remove the restriction for binop being in the same block as the phis.
2222 if (BO.getParent() != Phi0->getParent() ||
2223 BO.getParent() != Phi1->getParent())
2224 return nullptr;
2225
2226 // Fold if there is at least one specific constant value in phi0 or phi1's
2227 // incoming values that comes from the same block and this specific constant
2228 // value can be used to do optimization for specific binary operator.
2229 // For example:
2230 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2231 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2232 // %add = add i32 %phi0, %phi1
2233 // ==>
2234 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2236 /*AllowRHSConstant*/ false);
2237 if (C) {
2238 SmallVector<Value *, 4> NewIncomingValues;
2239 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2240 auto &Phi0Use = std::get<0>(T);
2241 auto &Phi1Use = std::get<1>(T);
2242 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
2243 return false;
2244 Value *Phi0UseV = Phi0Use.get();
2245 Value *Phi1UseV = Phi1Use.get();
2246 if (Phi0UseV == C)
2247 NewIncomingValues.push_back(Phi1UseV);
2248 else if (Phi1UseV == C)
2249 NewIncomingValues.push_back(Phi0UseV);
2250 else
2251 return false;
2252 return true;
2253 };
2254
2255 if (all_of(zip(Phi0->operands(), Phi1->operands()),
2256 CanFoldIncomingValuePair)) {
2257 PHINode *NewPhi =
2258 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
2259 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2260 "The number of collected incoming values should equal the number "
2261 "of the original PHINode operands!");
2262 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2263 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
2264 return NewPhi;
2265 }
2266 }
2267
2268 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2269 return nullptr;
2270
2271 // Match a pair of incoming constants for one of the predecessor blocks.
2272 BasicBlock *ConstBB, *OtherBB;
2273 Constant *C0, *C1;
2274 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
2275 ConstBB = Phi0->getIncomingBlock(0);
2276 OtherBB = Phi0->getIncomingBlock(1);
2277 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
2278 ConstBB = Phi0->getIncomingBlock(1);
2279 OtherBB = Phi0->getIncomingBlock(0);
2280 } else {
2281 return nullptr;
2282 }
2283 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
2284 return nullptr;
2285
2286 // The block that we are hoisting to must reach here unconditionally.
2287 // Otherwise, we could be speculatively executing an expensive or
2288 // non-speculative op.
2289 auto *PredBlockBranch = dyn_cast<UncondBrInst>(OtherBB->getTerminator());
2290 if (!PredBlockBranch || !DT.isReachableFromEntry(OtherBB))
2291 return nullptr;
2292
2293 // TODO: This check could be tightened to only apply to binops (div/rem) that
2294 // are not safe to speculatively execute. But that could allow hoisting
2295 // potentially expensive instructions (fdiv for example).
2296 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2298 return nullptr;
2299
2300 // Fold constants for the predecessor block with constant incoming values.
2301 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
2302 if (!NewC)
2303 return nullptr;
2304
2305 // Make a new binop in the predecessor block with the non-constant incoming
2306 // values.
2307 Builder.SetInsertPoint(PredBlockBranch);
2308 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
2309 Phi0->getIncomingValueForBlock(OtherBB),
2310 Phi1->getIncomingValueForBlock(OtherBB));
2311 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
2312 NotFoldedNewBO->copyIRFlags(&BO);
2313
2314 // Replace the binop with a phi of the new values. The old phis are dead.
2315 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
2316 NewPhi->addIncoming(NewBO, OtherBB);
2317 NewPhi->addIncoming(NewC, ConstBB);
2318 return NewPhi;
2319}
2320
2322 auto TryFoldOperand = [&](unsigned OpIdx,
2323 bool IsOtherParamConst) -> Instruction * {
2324 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(OpIdx)))
2325 return FoldOpIntoSelect(I, Sel, false, !IsOtherParamConst);
2326 if (auto *PN = dyn_cast<PHINode>(I.getOperand(OpIdx)))
2327 return foldOpIntoPhi(I, PN);
2328 return nullptr;
2329 };
2330
2331 if (Instruction *NewI =
2332 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(I.getOperand(1))))
2333 return NewI;
2334 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(I.getOperand(0)));
2335}
2336
2338 // If this GEP has only 0 indices, it is the same pointer as
2339 // Src. If Src is not a trivial GEP too, don't combine
2340 // the indices.
2341 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2342 !Src.hasOneUse())
2343 return false;
2344 return true;
2345}
2346
2347/// Find a constant NewC that has property:
2348/// shuffle(NewC, poison, ShMask) = C
2349/// for lanes that select NewC. Lanes that select the poison operand are not
2350/// constrained.
2351/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2352///
2353/// A 1-to-1 mapping is not required. Example:
2354/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2356 VectorType *NewCTy) {
2357 if (isa<ScalableVectorType>(NewCTy)) {
2358 Constant *Splat = C->getSplatValue();
2359 if (!Splat)
2360 return nullptr;
2362 }
2363
2364 if (cast<FixedVectorType>(NewCTy)->getNumElements() >
2365 cast<FixedVectorType>(C->getType())->getNumElements())
2366 return nullptr;
2367
2368 unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
2369 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2370 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2371 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
2372 for (unsigned I = 0; I < NumElts; ++I) {
2373 Constant *CElt = C->getAggregateElement(I);
2374 if (ShMask[I] >= 0) {
2375 int MaskElt = ShMask[I];
2376 if (MaskElt >= (int)NewCNumElts)
2377 continue;
2378
2379 Constant *NewCElt = NewVecC[MaskElt];
2380 // Bail out if:
2381 // 1. The constant vector contains a constant expression.
2382 // 2. The shuffle needs an element of the constant vector that can't
2383 // be mapped to a new constant vector.
2384 // 3. This is a widening shuffle that copies elements of V1 into the
2385 // extended elements (extending with poison is allowed).
2386 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2387 I >= NewCNumElts)
2388 return nullptr;
2389 NewVecC[MaskElt] = CElt;
2390 }
2391 }
2392 return ConstantVector::get(NewVecC);
2393}
2394
2395// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2397 Constant *Splat, bool SplatLHS,
2398 const DataLayout &DL) {
2399 ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
2401 Constant *RHS = Vector;
2402 if (!SplatLHS)
2403 std::swap(LHS, RHS);
2404 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2405}
2406
2407template <Intrinsic::ID SpliceID>
2409 InstCombiner::BuilderTy &Builder) {
2410 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2411 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2412 Value *V = Builder.CreateBinOp(Inst.getOpcode(), X, Y, Inst.getName());
2413 if (auto *BO = dyn_cast<BinaryOperator>(V))
2414 BO->copyIRFlags(&Inst);
2415 Module *M = Inst.getModule();
2416 Function *F = Intrinsic::getOrInsertDeclaration(M, SpliceID, V->getType());
2417 return CallInst::Create(F, {V, PoisonValue::get(V->getType()), Offset});
2418 };
2419 Value *V1, *V2, *Offset;
2420 if (match(LHS,
2422 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2423 // -> splice(Op(V1, V2), poison, offset)
2425 m_Specific(Offset))) &&
2426 (LHS->hasOneUse() || RHS->hasOneUse() ||
2427 (LHS == RHS && LHS->hasNUses(2))))
2428 return CreateBinOpSplice(V1, V2, Offset);
2429
2430 // Op(splice(V1, poison, offset), RHSSplat)
2431 // -> splice(Op(V1, RHSSplat), poison, offset)
2432 if (LHS->hasOneUse() && isSplatValue(RHS))
2433 return CreateBinOpSplice(V1, RHS, Offset);
2434 }
2435 // Op(LHSSplat, splice(V2, poison, offset))
2436 // -> splice(Op(LHSSplat, V2), poison, offset)
2437 else if (isSplatValue(LHS) &&
2439 m_Value(Offset)))))
2440 return CreateBinOpSplice(LHS, V2, Offset);
2441
2442 // TODO: Fold binops of the form
2443 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2444 // -> splice(poison, Op(V1, V2), offset)
2445
2446 return nullptr;
2447}
2448
2450 if (!isa<VectorType>(Inst.getType()))
2451 return nullptr;
2452
2453 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2454 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2455 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2456 cast<VectorType>(Inst.getType())->getElementCount());
2457 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2458 cast<VectorType>(Inst.getType())->getElementCount());
2459
2460 auto foldConstantsThroughSubVectorInsertSplat =
2461 [&](Value *MaybeSubVector, Value *MaybeSplat,
2462 bool SplatLHS) -> Instruction * {
2463 Value *Idx;
2464 Constant *Splat, *SubVector, *Dest;
2465 if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
2466 !match(MaybeSubVector,
2467 m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
2468 m_Value(Idx))))
2469 return nullptr;
2470 SubVector =
2471 constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
2472 Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
2473 if (!SubVector || !Dest)
2474 return nullptr;
2475 auto *InsertVector =
2476 Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
2477 return replaceInstUsesWith(Inst, InsertVector);
2478 };
2479
2480 // If one operand is a constant splat and the other operand is a
2481 // `vector.insert` where both the destination and subvector are constant,
2482 // apply the operation to both the destination and subvector, returning a new
2483 // constant `vector.insert`. This helps constant folding for scalable vectors.
2484 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2485 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2486 return Folded;
2487 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2488 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2489 return Folded;
2490
2491 // If both operands of the binop are vector concatenations, then perform the
2492 // narrow binop on each pair of the source operands followed by concatenation
2493 // of the results.
2494 Value *L0, *L1, *R0, *R1;
2495 ArrayRef<int> Mask;
2496 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2497 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2498 LHS->hasOneUse() && RHS->hasOneUse() &&
2499 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2500 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2501 // This transform does not have the speculative execution constraint as
2502 // below because the shuffle is a concatenation. The new binops are
2503 // operating on exactly the same elements as the existing binop.
2504 // TODO: We could ease the mask requirement to allow different undef lanes,
2505 // but that requires an analysis of the binop-with-undef output value.
2506 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2507 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2508 BO->copyIRFlags(&Inst);
2509 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2510 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2511 BO->copyIRFlags(&Inst);
2512 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2513 }
2514
2515 auto createBinOpReverse = [&](Value *X, Value *Y) {
2516 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2517 if (auto *BO = dyn_cast<BinaryOperator>(V))
2518 BO->copyIRFlags(&Inst);
2519 Module *M = Inst.getModule();
2521 M, Intrinsic::vector_reverse, V->getType());
2522 return CallInst::Create(F, V);
2523 };
2524
2525 // NOTE: Reverse shuffles don't require the speculative execution protection
2526 // below because they don't affect which lanes take part in the computation.
2527
2528 Value *V1, *V2;
2529 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2530 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2531 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2532 (LHS->hasOneUse() || RHS->hasOneUse() ||
2533 (LHS == RHS && LHS->hasNUses(2))))
2534 return createBinOpReverse(V1, V2);
2535
2536 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2537 if (LHS->hasOneUse() && isSplatValue(RHS))
2538 return createBinOpReverse(V1, RHS);
2539 }
2540 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2541 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2542 return createBinOpReverse(LHS, V2);
2543
2544 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2545 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2546 if (auto *BO = dyn_cast<BinaryOperator>(V))
2547 BO->copyIRFlags(&Inst);
2548
2549 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
2550 Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
2551 Module *M = Inst.getModule();
2553 M, Intrinsic::experimental_vp_reverse, V->getType());
2554 return CallInst::Create(F, {V, AllTrueMask, EVL});
2555 };
2556
2557 Value *EVL;
2559 m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
2560 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2562 m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
2563 (LHS->hasOneUse() || RHS->hasOneUse() ||
2564 (LHS == RHS && LHS->hasNUses(2))))
2565 return createBinOpVPReverse(V1, V2, EVL);
2566
2567 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2568 if (LHS->hasOneUse() && isSplatValue(RHS))
2569 return createBinOpVPReverse(V1, RHS, EVL);
2570 }
2571 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2572 else if (isSplatValue(LHS) &&
2574 m_Value(V2), m_AllOnes(), m_Value(EVL))))
2575 return createBinOpVPReverse(LHS, V2, EVL);
2576
2577 if (Instruction *Folded =
2579 return Folded;
2580 if (Instruction *Folded =
2582 return Folded;
2583
2584 // It may not be safe to reorder shuffles and things like div, urem, etc.
2585 // because we may trap when executing those ops on unknown vector elements.
2586 // See PR20059.
2588 return nullptr;
2589
2590 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2591 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2592 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2593 BO->copyIRFlags(&Inst);
2594 return new ShuffleVectorInst(XY, M);
2595 };
2596
2597 // If both arguments of the binary operation are shuffles that use the same
2598 // mask and shuffle within a single vector, move the shuffle after the binop.
2599 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2600 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2601 V1->getType() == V2->getType() &&
2602 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2603 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2604 return createBinOpShuffle(V1, V2, Mask);
2605 }
2606
2607 // If both arguments of a commutative binop are select-shuffles that use the
2608 // same mask with commuted operands, the shuffles are unnecessary.
2609 if (Inst.isCommutative() &&
2610 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2611 match(RHS,
2613 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2614 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2615 // TODO: Allow shuffles that contain undefs in the mask?
2616 // That is legal, but it reduces undef knowledge.
2617 // TODO: Allow arbitrary shuffles by shuffling after binop?
2618 // That might be legal, but we have to deal with poison.
2619 if (LShuf->isSelect() &&
2620 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2621 RShuf->isSelect() &&
2622 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2623 // Example:
2624 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2625 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2626 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2627 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2628 NewBO->copyIRFlags(&Inst);
2629 return NewBO;
2630 }
2631 }
2632
2633 // If one argument is a shuffle within one vector and the other is a constant,
2634 // try moving the shuffle after the binary operation. This canonicalization
2635 // intends to move shuffles closer to other shuffles and binops closer to
2636 // other binops, so they can be folded. It may also enable demanded elements
2637 // transforms.
2638 Constant *C;
2640 m_Mask(Mask))),
2641 m_ImmConstant(C)))) {
2642 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2643 "Shuffle should not change scalar type");
2644
2645 bool ConstOp1 = isa<Constant>(RHS);
2646 if (Constant *NewC =
2647 unshuffleConstant(Mask, C, cast<VectorType>(V1->getType()))) {
2648 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2649 // which will cause UB for div/rem. Mask them with a safe constant.
2650 if (isa<FixedVectorType>(V1->getType()) && Inst.isIntDivRem())
2651 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2652
2653 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2654 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2655 Value *NewLHS = ConstOp1 ? V1 : NewC;
2656 Value *NewRHS = ConstOp1 ? NewC : V1;
2657 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2658 }
2659 }
2660
2661 // Try to reassociate to sink a splat shuffle after a binary operation.
2662 if (Inst.isAssociative() && Inst.isCommutative()) {
2663 // Canonicalize shuffle operand as LHS.
2664 if (isa<ShuffleVectorInst>(RHS))
2665 std::swap(LHS, RHS);
2666
2667 Value *X;
2668 ArrayRef<int> MaskC;
2669 int SplatIndex;
2670 Value *Y, *OtherOp;
2671 if (!match(LHS,
2672 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2673 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2674 X->getType() != Inst.getType() ||
2675 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2676 return nullptr;
2677
2678 // FIXME: This may not be safe if the analysis allows undef elements. By
2679 // moving 'Y' before the splat shuffle, we are implicitly assuming
2680 // that it is not undef/poison at the splat index.
2681 if (isSplatValue(OtherOp, SplatIndex)) {
2682 std::swap(Y, OtherOp);
2683 } else if (!isSplatValue(Y, SplatIndex)) {
2684 return nullptr;
2685 }
2686
2687 // X and Y are splatted values, so perform the binary operation on those
2688 // values followed by a splat followed by the 2nd binary operation:
2689 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2690 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2691 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2692 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2693 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2694
2695 // Intersect FMF on both new binops. Other (poison-generating) flags are
2696 // dropped to be safe.
2697 if (isa<FPMathOperator>(R)) {
2698 R->copyFastMathFlags(&Inst);
2699 R->andIRFlags(RHS);
2700 }
2701 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2702 NewInstBO->copyIRFlags(R);
2703 return R;
2704 }
2705
2706 return nullptr;
2707}
2708
2709/// Try to narrow the width of a binop if at least 1 operand is an extend of
2710/// of a value. This requires a potentially expensive known bits check to make
2711/// sure the narrow op does not overflow.
2712Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2713 // We need at least one extended operand.
2714 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2715
2716 // If this is a sub, we swap the operands since we always want an extension
2717 // on the RHS. The LHS can be an extension or a constant.
2718 if (BO.getOpcode() == Instruction::Sub)
2719 std::swap(Op0, Op1);
2720
2721 Value *X;
2722 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2723 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2724 return nullptr;
2725
2726 // If both operands are the same extension from the same source type and we
2727 // can eliminate at least one (hasOneUse), this might work.
2728 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2729 Value *Y;
2730 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2731 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2732 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2733 // If that did not match, see if we have a suitable constant operand.
2734 // Truncating and extending must produce the same constant.
2735 Constant *WideC;
2736 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2737 return nullptr;
2738 Constant *NarrowC = getLosslessInvCast(WideC, X->getType(), CastOpc, DL);
2739 if (!NarrowC)
2740 return nullptr;
2741 Y = NarrowC;
2742 }
2743
2744 // Swap back now that we found our operands.
2745 if (BO.getOpcode() == Instruction::Sub)
2746 std::swap(X, Y);
2747
2748 // Both operands have narrow versions. Last step: the math must not overflow
2749 // in the narrow width.
2750 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2751 return nullptr;
2752
2753 // bo (ext X), (ext Y) --> ext (bo X, Y)
2754 // bo (ext X), C --> ext (bo X, C')
2755 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2756 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2757 if (IsSext)
2758 NewBinOp->setHasNoSignedWrap();
2759 else
2760 NewBinOp->setHasNoUnsignedWrap();
2761 }
2762 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2763}
2764
2765/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2766/// transform.
2771
2772/// Thread a GEP operation with constant indices through the constant true/false
2773/// arms of a select.
2775 InstCombiner::BuilderTy &Builder) {
2776 if (!GEP.hasAllConstantIndices())
2777 return nullptr;
2778
2779 Instruction *Sel;
2780 Value *Cond;
2781 Constant *TrueC, *FalseC;
2782 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2783 !match(Sel,
2784 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2785 return nullptr;
2786
2787 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2788 // Propagate 'inbounds' and metadata from existing instructions.
2789 // Note: using IRBuilder to create the constants for efficiency.
2790 SmallVector<Value *, 4> IndexC(GEP.indices());
2791 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2792 Type *Ty = GEP.getSourceElementType();
2793 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2794 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2795 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2796}
2797
2798// Canonicalization:
2799// gep T, (gep i8, base, C1), (Index + C2) into
2800// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2802 GEPOperator *Src,
2803 InstCombinerImpl &IC) {
2804 if (GEP.getNumIndices() != 1)
2805 return nullptr;
2806 auto &DL = IC.getDataLayout();
2807 Value *Base;
2808 const APInt *C1;
2809 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2810 return nullptr;
2811 Value *VarIndex;
2812 const APInt *C2;
2813 Type *PtrTy = Src->getType()->getScalarType();
2814 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2815 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2816 return nullptr;
2817 if (C1->getBitWidth() != IndexSizeInBits ||
2818 C2->getBitWidth() != IndexSizeInBits)
2819 return nullptr;
2820 Type *BaseType = GEP.getSourceElementType();
2822 return nullptr;
2823 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2824 APInt NewOffset = TypeSize * *C2 + *C1;
2825 if (NewOffset.isZero() ||
2826 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2828 if (GEP.hasNoUnsignedWrap() &&
2829 cast<GEPOperator>(Src)->hasNoUnsignedWrap() &&
2830 match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) {
2832 if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds())
2833 Flags |= GEPNoWrapFlags::inBounds();
2834 }
2835
2836 Value *GEPConst =
2837 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags);
2838 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags);
2839 }
2840
2841 return nullptr;
2842}
2843
2844/// Combine constant offsets separated by variable offsets.
2845/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2847 InstCombinerImpl &IC) {
2848 if (!GEP.hasAllConstantIndices())
2849 return nullptr;
2850
2853 auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2854 while (true) {
2855 if (!InnerGEP)
2856 return nullptr;
2857
2858 NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2859 if (InnerGEP->hasAllConstantIndices())
2860 break;
2861
2862 if (!InnerGEP->hasOneUse())
2863 return nullptr;
2864
2865 Skipped.push_back(InnerGEP);
2866 InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2867 }
2868
2869 // The two constant offset GEPs are directly adjacent: Let normal offset
2870 // merging handle it.
2871 if (Skipped.empty())
2872 return nullptr;
2873
2874 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2875 // if profitable.
2876 if (!InnerGEP->hasOneUse())
2877 return nullptr;
2878
2879 // Don't bother with vector splats.
2880 Type *Ty = GEP.getType();
2881 if (InnerGEP->getType() != Ty)
2882 return nullptr;
2883
2884 const DataLayout &DL = IC.getDataLayout();
2885 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2886 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2887 !InnerGEP->accumulateConstantOffset(DL, Offset))
2888 return nullptr;
2889
2890 IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2891 for (GetElementPtrInst *SkippedGEP : Skipped)
2892 SkippedGEP->setNoWrapFlags(NW);
2893
2894 return IC.replaceInstUsesWith(
2895 GEP,
2896 IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2897 NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2898}
2899
2901 GEPOperator *Src) {
2902 // Combine Indices - If the source pointer to this getelementptr instruction
2903 // is a getelementptr instruction with matching element type, combine the
2904 // indices of the two getelementptr instructions into a single instruction.
2905 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2906 return nullptr;
2907
2908 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2909 return I;
2910
2911 if (auto *I = combineConstantOffsets(GEP, *this))
2912 return I;
2913
2914 if (Src->getResultElementType() != GEP.getSourceElementType())
2915 return nullptr;
2916
2917 // Fold chained GEP with constant base into single GEP:
2918 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2919 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2920 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2921 Src->getNumIndices() == 1) {
2922 Value *SrcIdx = *Src->idx_begin();
2923 Value *GEPIdx = *GEP.idx_begin();
2924 const APInt *ConstOffset, *TrueVal, *FalseVal;
2925 Value *Cond;
2926
2927 if ((match(SrcIdx, m_APInt(ConstOffset)) &&
2928 match(GEPIdx,
2929 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal)))) ||
2930 (match(GEPIdx, m_APInt(ConstOffset)) &&
2931 match(SrcIdx,
2932 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal))))) {
2933 auto *Select = isa<SelectInst>(GEPIdx) ? cast<SelectInst>(GEPIdx)
2934 : cast<SelectInst>(SrcIdx);
2935
2936 // Make sure the select has only one use.
2937 if (!Select->hasOneUse())
2938 return nullptr;
2939
2940 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2941 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2942 return nullptr;
2943
2944 APInt NewTrueVal = *ConstOffset + *TrueVal;
2945 APInt NewFalseVal = *ConstOffset + *FalseVal;
2946 Constant *NewTrue = ConstantInt::get(Select->getType(), NewTrueVal);
2947 Constant *NewFalse = ConstantInt::get(Select->getType(), NewFalseVal);
2948 Value *NewSelect = Builder.CreateSelect(
2949 Cond, NewTrue, NewFalse, /*Name=*/"",
2950 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2951 GEPNoWrapFlags Flags =
2953 return replaceInstUsesWith(GEP,
2954 Builder.CreateGEP(GEP.getResultElementType(),
2955 Src->getPointerOperand(),
2956 NewSelect, "", Flags));
2957 }
2958 }
2959
2960 // Find out whether the last index in the source GEP is a sequential idx.
2961 bool EndsWithSequential = false;
2962 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2963 I != E; ++I)
2964 EndsWithSequential = I.isSequential();
2965 if (!EndsWithSequential)
2966 return nullptr;
2967
2968 // Replace: gep (gep %P, long B), long A, ...
2969 // With: T = long A+B; gep %P, T, ...
2970 Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2971 Value *GO1 = GEP.getOperand(1);
2972
2973 // If they aren't the same type, then the input hasn't been processed
2974 // by the loop above yet (which canonicalizes sequential index types to
2975 // intptr_t). Just avoid transforming this until the input has been
2976 // normalized.
2977 if (SO1->getType() != GO1->getType())
2978 return nullptr;
2979
2980 Value *Sum =
2981 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2982 // Only do the combine when we are sure the cost after the
2983 // merge is never more than that before the merge.
2984 if (Sum == nullptr)
2985 return nullptr;
2986
2988 Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2989 Indices.push_back(Sum);
2990 Indices.append(GEP.op_begin() + 2, GEP.op_end());
2991
2992 // Don't create GEPs with more than one non-zero index.
2993 unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2994 auto *C = dyn_cast<Constant>(Idx);
2995 return !C || !C->isNullValue();
2996 });
2997 if (NumNonZeroIndices > 1)
2998 return nullptr;
2999
3000 return replaceInstUsesWith(
3001 GEP, Builder.CreateGEP(
3002 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
3004}
3005
3008 bool &DoesConsume, unsigned Depth) {
3009 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
3010 // ~(~(X)) -> X.
3011 Value *A, *B;
3012 if (match(V, m_Not(m_Value(A)))) {
3013 DoesConsume = true;
3014 return A;
3015 }
3016
3017 Constant *C;
3018 // Constants can be considered to be not'ed values.
3019 if (match(V, m_ImmConstant(C)))
3020 return ConstantExpr::getNot(C);
3021
3023 return nullptr;
3024
3025 // The rest of the cases require that we invert all uses so don't bother
3026 // doing the analysis if we know we can't use the result.
3027 if (!WillInvertAllUses)
3028 return nullptr;
3029
3030 // Compares can be inverted if all of their uses are being modified to use
3031 // the ~V.
3032 if (auto *I = dyn_cast<CmpInst>(V)) {
3033 if (Builder != nullptr)
3034 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
3035 I->getOperand(1));
3036 return NonNull;
3037 }
3038
3039 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3040 // `(-1 - B) - A` if we are willing to invert all of the uses.
3041 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
3042 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3043 DoesConsume, Depth))
3044 return Builder ? Builder->CreateSub(BV, A) : NonNull;
3045 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3046 DoesConsume, Depth))
3047 return Builder ? Builder->CreateSub(AV, B) : NonNull;
3048 return nullptr;
3049 }
3050
3051 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3052 // into `A ^ B` if we are willing to invert all of the uses.
3053 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
3054 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3055 DoesConsume, Depth))
3056 return Builder ? Builder->CreateXor(A, BV) : NonNull;
3057 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3058 DoesConsume, Depth))
3059 return Builder ? Builder->CreateXor(AV, B) : NonNull;
3060 return nullptr;
3061 }
3062
3063 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3064 // `A + (-1 - B)` if we are willing to invert all of the uses.
3065 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
3066 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3067 DoesConsume, Depth))
3068 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
3069 return nullptr;
3070 }
3071
3072 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3073 // into `A s>> B` if we are willing to invert all of the uses.
3074 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
3075 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3076 DoesConsume, Depth))
3077 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
3078 return nullptr;
3079 }
3080
3081 Value *Cond;
3082 // LogicOps are special in that we canonicalize them at the cost of an
3083 // instruction.
3084 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
3086 // Selects/min/max with invertible operands are freely invertible
3087 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
3088 bool LocalDoesConsume = DoesConsume;
3089 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
3090 LocalDoesConsume, Depth))
3091 return nullptr;
3092 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3093 LocalDoesConsume, Depth)) {
3094 DoesConsume = LocalDoesConsume;
3095 if (Builder != nullptr) {
3096 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3097 DoesConsume, Depth);
3098 assert(NotB != nullptr &&
3099 "Unable to build inverted value for known freely invertable op");
3100 if (auto *II = dyn_cast<IntrinsicInst>(V))
3101 return Builder->CreateBinaryIntrinsic(
3102 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
3103 return Builder->CreateSelect(
3104 Cond, NotA, NotB, "",
3106 }
3107 return NonNull;
3108 }
3109 }
3110
3111 if (PHINode *PN = dyn_cast<PHINode>(V)) {
3112 bool LocalDoesConsume = DoesConsume;
3114 for (Use &U : PN->operands()) {
3115 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3116 Value *NewIncomingVal = getFreelyInvertedImpl(
3117 U.get(), /*WillInvertAllUses=*/false,
3118 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
3119 if (NewIncomingVal == nullptr)
3120 return nullptr;
3121 // Make sure that we can safely erase the original PHI node.
3122 if (NewIncomingVal == V)
3123 return nullptr;
3124 if (Builder != nullptr)
3125 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
3126 }
3127
3128 DoesConsume = LocalDoesConsume;
3129 if (Builder != nullptr) {
3131 Builder->SetInsertPoint(PN);
3132 PHINode *NewPN =
3133 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
3134 for (auto [Val, Pred] : IncomingValues)
3135 NewPN->addIncoming(Val, Pred);
3136 return NewPN;
3137 }
3138 return NonNull;
3139 }
3140
3141 if (match(V, m_SExtLike(m_Value(A)))) {
3142 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3143 DoesConsume, Depth))
3144 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
3145 return nullptr;
3146 }
3147
3148 if (match(V, m_Trunc(m_Value(A)))) {
3149 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3150 DoesConsume, Depth))
3151 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
3152 return nullptr;
3153 }
3154
3155 // De Morgan's Laws:
3156 // (~(A | B)) -> (~A & ~B)
3157 // (~(A & B)) -> (~A | ~B)
3158 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3159 bool IsLogical, Value *A,
3160 Value *B) -> Value * {
3161 bool LocalDoesConsume = DoesConsume;
3162 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
3163 LocalDoesConsume, Depth))
3164 return nullptr;
3165 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3166 LocalDoesConsume, Depth)) {
3167 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3168 LocalDoesConsume, Depth);
3169 DoesConsume = LocalDoesConsume;
3170 if (IsLogical)
3171 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
3172 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
3173 }
3174
3175 return nullptr;
3176 };
3177
3178 if (match(V, m_Or(m_Value(A), m_Value(B))))
3179 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3180 B);
3181
3182 if (match(V, m_And(m_Value(A), m_Value(B))))
3183 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3184 B);
3185
3186 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
3187 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3188 B);
3189
3190 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
3191 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3192 B);
3193
3194 return nullptr;
3195}
3196
3197/// Return true if we should canonicalize the gep to an i8 ptradd.
3199 Value *PtrOp = GEP.getOperand(0);
3200 Type *GEPEltType = GEP.getSourceElementType();
3201 if (GEPEltType->isIntegerTy(8))
3202 return false;
3203
3204 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3205 // intrinsic. This has better support in BasicAA.
3206 if (GEPEltType->isScalableTy())
3207 return true;
3208
3209 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3210 // together.
3211 if (GEP.getNumIndices() == 1 &&
3212 match(GEP.getOperand(1),
3214 m_Shl(m_Value(), m_ConstantInt())))))
3215 return true;
3216
3217 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3218 // possibly be merged together.
3219 auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
3220 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3221 any_of(GEP.indices(), [](Value *V) {
3222 const APInt *C;
3223 return match(V, m_APInt(C)) && !C->isZero();
3224 });
3225}
3226
3228 IRBuilderBase &Builder) {
3229 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
3230 if (!Op1)
3231 return nullptr;
3232
3233 // Don't fold a GEP into itself through a PHI node. This can only happen
3234 // through the back-edge of a loop. Folding a GEP into itself means that
3235 // the value of the previous iteration needs to be stored in the meantime,
3236 // thus requiring an additional register variable to be live, but not
3237 // actually achieving anything (the GEP still needs to be executed once per
3238 // loop iteration).
3239 if (Op1 == &GEP)
3240 return nullptr;
3241 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3242
3243 int DI = -1;
3244
3245 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3246 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
3247 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3248 Op1->getSourceElementType() != Op2->getSourceElementType())
3249 return nullptr;
3250
3251 // As for Op1 above, don't try to fold a GEP into itself.
3252 if (Op2 == &GEP)
3253 return nullptr;
3254
3255 // Keep track of the type as we walk the GEP.
3256 Type *CurTy = nullptr;
3257
3258 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3259 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
3260 return nullptr;
3261
3262 if (Op1->getOperand(J) != Op2->getOperand(J)) {
3263 if (DI == -1) {
3264 // We have not seen any differences yet in the GEPs feeding the
3265 // PHI yet, so we record this one if it is allowed to be a
3266 // variable.
3267
3268 // The first two arguments can vary for any GEP, the rest have to be
3269 // static for struct slots
3270 if (J > 1) {
3271 assert(CurTy && "No current type?");
3272 if (CurTy->isStructTy())
3273 return nullptr;
3274 }
3275
3276 DI = J;
3277 } else {
3278 // The GEP is different by more than one input. While this could be
3279 // extended to support GEPs that vary by more than one variable it
3280 // doesn't make sense since it greatly increases the complexity and
3281 // would result in an R+R+R addressing mode which no backend
3282 // directly supports and would need to be broken into several
3283 // simpler instructions anyway.
3284 return nullptr;
3285 }
3286 }
3287
3288 // Sink down a layer of the type for the next iteration.
3289 if (J > 0) {
3290 if (J == 1) {
3291 CurTy = Op1->getSourceElementType();
3292 } else {
3293 CurTy =
3294 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
3295 }
3296 }
3297 }
3298
3299 NW &= Op2->getNoWrapFlags();
3300 }
3301
3302 // If not all GEPs are identical we'll have to create a new PHI node.
3303 // Check that the old PHI node has only one use so that it will get
3304 // removed.
3305 if (DI != -1 && !PN->hasOneUse())
3306 return nullptr;
3307
3308 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
3309 NewGEP->setNoWrapFlags(NW);
3310
3311 if (DI == -1) {
3312 // All the GEPs feeding the PHI are identical. Clone one down into our
3313 // BB so that it can be merged with the current GEP.
3314 } else {
3315 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3316 // into the current block so it can be merged, and create a new PHI to
3317 // set that index.
3318 PHINode *NewPN;
3319 {
3320 IRBuilderBase::InsertPointGuard Guard(Builder);
3321 Builder.SetInsertPoint(PN);
3322 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
3323 PN->getNumOperands());
3324 }
3325
3326 for (auto &I : PN->operands())
3327 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
3328 PN->getIncomingBlock(I));
3329
3330 NewGEP->setOperand(DI, NewPN);
3331 }
3332
3333 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
3334 return NewGEP;
3335}
3336
3338 Value *PtrOp = GEP.getOperand(0);
3339 SmallVector<Value *, 8> Indices(GEP.indices());
3340 Type *GEPType = GEP.getType();
3341 Type *GEPEltType = GEP.getSourceElementType();
3342 if (Value *V =
3343 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
3344 SQ.getWithInstruction(&GEP)))
3345 return replaceInstUsesWith(GEP, V);
3346
3347 // For vector geps, use the generic demanded vector support.
3348 // Skip if GEP return type is scalable. The number of elements is unknown at
3349 // compile-time.
3350 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
3351 auto VWidth = GEPFVTy->getNumElements();
3352 APInt PoisonElts(VWidth, 0);
3353 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3354 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
3355 PoisonElts)) {
3356 if (V != &GEP)
3357 return replaceInstUsesWith(GEP, V);
3358 return &GEP;
3359 }
3360 }
3361
3362 // Eliminate unneeded casts for indices, and replace indices which displace
3363 // by multiples of a zero size type with zero.
3364 bool MadeChange = false;
3365
3366 // Index width may not be the same width as pointer width.
3367 // Data layout chooses the right type based on supported integer types.
3368 Type *NewScalarIndexTy =
3369 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
3370
3372 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3373 ++I, ++GTI) {
3374 // Skip indices into struct types.
3375 if (GTI.isStruct())
3376 continue;
3377
3378 Type *IndexTy = (*I)->getType();
3379 Type *NewIndexType =
3380 IndexTy->isVectorTy()
3381 ? VectorType::get(NewScalarIndexTy,
3382 cast<VectorType>(IndexTy)->getElementCount())
3383 : NewScalarIndexTy;
3384
3385 // If the element type has zero size then any index over it is equivalent
3386 // to an index of zero, so replace it with zero if it is not zero already.
3387 Type *EltTy = GTI.getIndexedType();
3388 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
3389 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
3390 *I = Constant::getNullValue(NewIndexType);
3391 MadeChange = true;
3392 }
3393
3394 if (IndexTy != NewIndexType) {
3395 // If we are using a wider index than needed for this platform, shrink
3396 // it to what we need. If narrower, sign-extend it to what we need.
3397 // This explicit cast can make subsequent optimizations more obvious.
3398 if (IndexTy->getScalarSizeInBits() <
3399 NewIndexType->getScalarSizeInBits()) {
3400 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3401 *I = Builder.CreateZExt(*I, NewIndexType, "", /*IsNonNeg=*/true);
3402 else
3403 *I = Builder.CreateSExt(*I, NewIndexType);
3404 } else {
3405 *I = Builder.CreateTrunc(*I, NewIndexType, "", GEP.hasNoUnsignedWrap(),
3406 GEP.hasNoUnsignedSignedWrap());
3407 }
3408 MadeChange = true;
3409 }
3410 }
3411 if (MadeChange)
3412 return &GEP;
3413
3414 // Canonicalize constant GEPs to i8 type.
3415 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3416 APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
3417 if (GEP.accumulateConstantOffset(DL, Offset))
3418 return replaceInstUsesWith(
3419 GEP, Builder.CreatePtrAdd(PtrOp, Builder.getInt(Offset), "",
3420 GEP.getNoWrapFlags()));
3421 }
3422
3424 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
3425 Value *NewGEP =
3426 Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
3427 return replaceInstUsesWith(GEP, NewGEP);
3428 }
3429
3430 // Strip trailing zero indices.
3431 auto *LastIdx = dyn_cast<Constant>(Indices.back());
3432 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3433 return replaceInstUsesWith(
3434 GEP, Builder.CreateGEP(GEP.getSourceElementType(), PtrOp,
3435 drop_end(Indices), "", GEP.getNoWrapFlags()));
3436 }
3437
3438 // Strip leading zero indices.
3439 auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3440 if (FirstIdx && FirstIdx->isNullValue() &&
3441 !FirstIdx->getType()->isVectorTy()) {
3443 ++GTI;
3444 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3445 DL.getTypeAllocSize(GTI.getIndexedType()))
3446 return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3447 GEP.getPointerOperand(),
3448 drop_begin(Indices), "",
3449 GEP.getNoWrapFlags()));
3450 }
3451
3452 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3453 // Note that this looses information about undef lanes; we run it after
3454 // demanded bits to partially mitigate that loss.
3455 if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3456 return Op->getType()->isVectorTy() && getSplatValue(Op);
3457 })) {
3458 SmallVector<Value *> NewOps;
3459 for (auto &Op : GEP.operands()) {
3460 if (Op->getType()->isVectorTy())
3461 if (Value *Scalar = getSplatValue(Op)) {
3462 NewOps.push_back(Scalar);
3463 continue;
3464 }
3465 NewOps.push_back(Op);
3466 }
3467
3468 Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3469 ArrayRef(NewOps).drop_front(), GEP.getName(),
3470 GEP.getNoWrapFlags());
3471 if (!Res->getType()->isVectorTy()) {
3472 ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3473 Res = Builder.CreateVectorSplat(EC, Res);
3474 }
3475 return replaceInstUsesWith(GEP, Res);
3476 }
3477
3478 bool SeenNonZeroIndex = false;
3479 for (auto [IdxNum, Idx] : enumerate(Indices)) {
3480 // Ignore one leading zero index.
3481 auto *C = dyn_cast<Constant>(Idx);
3482 if (C && C->isNullValue() && IdxNum == 0)
3483 continue;
3484
3485 if (!SeenNonZeroIndex) {
3486 SeenNonZeroIndex = true;
3487 continue;
3488 }
3489
3490 // GEP has multiple non-zero indices: Split it.
3491 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
3492 Value *FrontGEP =
3493 Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
3494 GEP.getName() + ".split", GEP.getNoWrapFlags());
3495
3496 SmallVector<Value *> BackIndices;
3497 BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
3498 append_range(BackIndices, drop_begin(Indices, IdxNum));
3500 GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
3501 BackIndices, GEP.getNoWrapFlags());
3502 }
3503
3504 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3505 auto IsCanonicalType = [](Type *Ty) {
3506 if (auto *AT = dyn_cast<ArrayType>(Ty))
3507 Ty = AT->getElementType();
3508 return Ty->isIntegerTy(8);
3509 };
3510 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3511 TypeSize Scale = DL.getTypeAllocSize(GEPEltType);
3512 assert(!Scale.isScalable() && "Should have been handled earlier");
3513 Type *NewElemTy = Builder.getInt8Ty();
3514 if (Scale.getFixedValue() != 1)
3515 NewElemTy = ArrayType::get(NewElemTy, Scale.getFixedValue());
3516 GEP.setSourceElementType(NewElemTy);
3517 GEP.setResultElementType(NewElemTy);
3518 // Don't bother revisiting the GEP after this change.
3519 MadeIRChange = true;
3520 }
3521
3522 // Check to see if the inputs to the PHI node are getelementptr instructions.
3523 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
3524 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3525 return replaceOperand(GEP, 0, NewPtrOp);
3526 }
3527
3528 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
3529 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3530 return I;
3531
3532 if (GEP.getNumIndices() == 1) {
3533 unsigned AS = GEP.getPointerAddressSpace();
3534 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
3535 DL.getIndexSizeInBits(AS)) {
3536 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
3537
3538 if (TyAllocSize == 1) {
3539 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3540 // but only if the result pointer is only used as if it were an integer.
3541 // (The case where the underlying object is the same is handled by
3542 // InstSimplify.)
3543 Value *X = GEP.getPointerOperand();
3544 Value *Y;
3545 if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
3547 GEPType == Y->getType()) {
3548 bool HasNonAddressBits =
3549 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3550 bool Changed = GEP.replaceUsesWithIf(Y, [&](Use &U) {
3551 return isa<PtrToAddrInst, ICmpInst>(U.getUser()) ||
3552 (!HasNonAddressBits && isa<PtrToIntInst>(U.getUser()));
3553 });
3554 return Changed ? &GEP : nullptr;
3555 }
3556 } else if (auto *ExactIns =
3557 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
3558 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3559 Value *V;
3560 if (ExactIns->isExact()) {
3561 if ((has_single_bit(TyAllocSize) &&
3562 match(GEP.getOperand(1),
3563 m_Shr(m_Value(V),
3564 m_SpecificInt(countr_zero(TyAllocSize))))) ||
3565 match(GEP.getOperand(1),
3566 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
3567 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3568 GEP.getPointerOperand(), V,
3569 GEP.getNoWrapFlags());
3570 }
3571 }
3572 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3573 // Try to canonicalize non-i8 element type to i8 if the index is an
3574 // exact instruction. If the index is an exact instruction (div/shr)
3575 // with a constant RHS, we can fold the non-i8 element scale into the
3576 // div/shr (similiar to the mul case, just inverted).
3577 const APInt *C;
3578 std::optional<APInt> NewC;
3579 if (has_single_bit(TyAllocSize) &&
3580 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
3581 C->uge(countr_zero(TyAllocSize)))
3582 NewC = *C - countr_zero(TyAllocSize);
3583 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
3584 APInt Quot;
3585 uint64_t Rem;
3586 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
3587 if (Rem == 0)
3588 NewC = Quot;
3589 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
3590 APInt Quot;
3591 int64_t Rem;
3592 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
3593 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3594 if (!Quot.isAllOnes() && Rem == 0)
3595 NewC = Quot;
3596 }
3597
3598 if (NewC.has_value()) {
3599 Value *NewOp = Builder.CreateExactBinOp(
3600 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
3601 ConstantInt::get(V->getType(), *NewC), /*IsExact=*/true);
3602 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3603 GEP.getPointerOperand(), NewOp,
3604 GEP.getNoWrapFlags());
3605 }
3606 }
3607 }
3608 }
3609 }
3610 // We do not handle pointer-vector geps here.
3611 if (GEPType->isVectorTy())
3612 return nullptr;
3613
3614 if (!GEP.isInBounds()) {
3615 unsigned IdxWidth =
3616 DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
3617 APInt BasePtrOffset(IdxWidth, 0);
3618 Value *UnderlyingPtrOp =
3619 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
3620 bool CanBeNull;
3621 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3622 DL, CanBeNull, /*CanBeFreed=*/nullptr);
3623 // We can ignore CanBeFreed here, because inbounds is explicitly allowed to
3624 // refer to a deallocated object.
3625 if (!CanBeNull && DerefBytes != 0) {
3626 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3627 BasePtrOffset.isNonNegative()) {
3628 APInt AllocSize(IdxWidth, DerefBytes);
3629 if (BasePtrOffset.ule(AllocSize)) {
3631 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3632 }
3633 }
3634 }
3635 }
3636
3637 // nusw + nneg -> nuw
3638 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3639 all_of(GEP.indices(), [&](Value *Idx) {
3640 return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
3641 })) {
3642 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3643 return &GEP;
3644 }
3645
3646 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3647 // to do this after having tried to derive "nuw" above.
3648 if (GEP.getNumIndices() == 1) {
3649 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3650 // geps if transforming into (gep (gep p, x), y).
3651 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3652 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3653 // that x + y does not have unsigned wrap.
3654 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3655 return GEP.getNoWrapFlags();
3656 return GEPNoWrapFlags::none();
3657 };
3658
3659 // Try to replace ADD + GEP with GEP + GEP.
3660 Value *Idx1, *Idx2;
3661 if (match(GEP.getOperand(1),
3662 m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
3663 // %idx = add i64 %idx1, %idx2
3664 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3665 // as:
3666 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3667 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3668 bool NUW = match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()));
3669 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3670 auto *NewPtr =
3671 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3672 Idx1, "", NWFlags);
3673 return replaceInstUsesWith(GEP,
3674 Builder.CreateGEP(GEP.getSourceElementType(),
3675 NewPtr, Idx2, "", NWFlags));
3676 }
3677 ConstantInt *C;
3678 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAddLike(
3679 m_Value(Idx1), m_ConstantInt(C))))))) {
3680 // %add = add nsw i32 %idx1, idx2
3681 // %sidx = sext i32 %add to i64
3682 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3683 // as:
3684 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3685 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3686 bool NUW = match(GEP.getOperand(1),
3688 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3689 auto *NewPtr = Builder.CreateGEP(
3690 GEP.getSourceElementType(), GEP.getPointerOperand(),
3691 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "", NWFlags);
3692 return replaceInstUsesWith(
3693 GEP,
3694 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3695 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3696 "", NWFlags));
3697 }
3698 }
3699
3701 return R;
3702
3703 // srem -> (and/urem) for inbounds+nuw GEP
3704 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3705 Value *X, *Y;
3706
3707 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3708 if (match(Indices[0], m_OneUse(m_SRem(m_Value(X), m_Value(Y)))) &&
3709 isKnownToBeAPowerOfTwo(Y, /*OrZero=*/true, &GEP)) {
3710 // If GEP is inbounds+nuw, the offset cannot be negative
3711 // -> srem by power-of-two can be treated as urem,
3712 // and urem by power-of-two folds to 'and' later.
3713 // OrZero=true is fine here because division by zero is UB.
3714 Instruction *OldIdxI = cast<Instruction>(Indices[0]);
3715 Value *NewIdx = Builder.CreateURem(X, Y, OldIdxI->getName());
3716
3717 return GetElementPtrInst::Create(GEPEltType, PtrOp, {NewIdx},
3718 GEP.getNoWrapFlags());
3719 }
3720 }
3721
3722 return nullptr;
3723}
3724
3726 Instruction *AI) {
3728 return true;
3729 if (auto *LI = dyn_cast<LoadInst>(V))
3730 return isa<GlobalVariable>(LI->getPointerOperand());
3731 // Two distinct allocations will never be equal.
3732 return isAllocLikeFn(V, &TLI) && V != AI;
3733}
3734
3735/// Given a call CB which uses an address UsedV, return true if we can prove the
3736/// call's only possible effect is storing to V.
3737static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3738 const TargetLibraryInfo &TLI) {
3739 if (!CB.use_empty())
3740 // TODO: add recursion if returned attribute is present
3741 return false;
3742
3743 if (CB.isTerminator())
3744 // TODO: remove implementation restriction
3745 return false;
3746
3747 if (!CB.willReturn() || !CB.doesNotThrow())
3748 return false;
3749
3750 // If the only possible side effect of the call is writing to the alloca,
3751 // and the result isn't used, we can safely remove any reads implied by the
3752 // call including those which might read the alloca itself.
3753 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3754 return Dest && Dest->Ptr == UsedV;
3755}
3756
3757static std::optional<ModRefInfo>
3759 const TargetLibraryInfo &TLI, bool KnowInit) {
3761 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3762 Worklist.push_back(AI);
3764
3765 do {
3766 Instruction *PI = Worklist.pop_back_val();
3767 for (User *U : PI->users()) {
3769 if (Users.size() >= MaxAllocSiteRemovableUsers)
3770 return std::nullopt;
3771 switch (I->getOpcode()) {
3772 default:
3773 // Give up the moment we see something we can't handle.
3774 return std::nullopt;
3775
3776 case Instruction::AddrSpaceCast:
3777 case Instruction::BitCast:
3778 case Instruction::GetElementPtr:
3779 Users.emplace_back(I);
3780 Worklist.push_back(I);
3781 continue;
3782
3783 case Instruction::ICmp: {
3784 ICmpInst *ICI = cast<ICmpInst>(I);
3785 // We can fold eq/ne comparisons with null to false/true, respectively.
3786 // We also fold comparisons in some conditions provided the alloc has
3787 // not escaped (see isNeverEqualToUnescapedAlloc).
3788 if (!ICI->isEquality())
3789 return std::nullopt;
3790 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3791 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3792 return std::nullopt;
3793
3794 // Do not fold compares to aligned_alloc calls, as they may have to
3795 // return null in case the required alignment cannot be satisfied,
3796 // unless we can prove that both alignment and size are valid.
3797 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3798 // Check if alignment and size of a call to aligned_alloc is valid,
3799 // that is alignment is a power-of-2 and the size is a multiple of the
3800 // alignment.
3801 const APInt *Alignment;
3802 const APInt *Size;
3803 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3804 match(CB->getArgOperand(1), m_APInt(Size)) &&
3805 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3806 };
3807 auto *CB = dyn_cast<CallBase>(AI);
3808 LibFunc TheLibFunc;
3809 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3810 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3811 !AlignmentAndSizeKnownValid(CB))
3812 return std::nullopt;
3813 Users.emplace_back(I);
3814 continue;
3815 }
3816
3817 case Instruction::Call:
3818 // Ignore no-op and store intrinsics.
3820 switch (II->getIntrinsicID()) {
3821 default:
3822 return std::nullopt;
3823
3824 case Intrinsic::memmove:
3825 case Intrinsic::memcpy:
3826 case Intrinsic::memset: {
3828 if (MI->isVolatile())
3829 return std::nullopt;
3830 // Note: this could also be ModRef, but we can still interpret that
3831 // as just Mod in that case.
3832 ModRefInfo NewAccess =
3833 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3834 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3835 return std::nullopt;
3836 Access |= NewAccess;
3837 [[fallthrough]];
3838 }
3839 case Intrinsic::assume:
3840 case Intrinsic::invariant_start:
3841 case Intrinsic::invariant_end:
3842 case Intrinsic::lifetime_start:
3843 case Intrinsic::lifetime_end:
3844 case Intrinsic::objectsize:
3845 Users.emplace_back(I);
3846 continue;
3847 case Intrinsic::launder_invariant_group:
3848 case Intrinsic::strip_invariant_group:
3849 Users.emplace_back(I);
3850 Worklist.push_back(I);
3851 continue;
3852 }
3853 }
3854
3855 if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3856 getAllocationFamily(I, &TLI) == Family) {
3857 Users.emplace_back(I);
3858 continue;
3859 }
3860
3861 if (Family && getReallocatedOperand(cast<CallBase>(I)) == PI &&
3862 getAllocationFamily(I, &TLI) == Family) {
3863 Users.emplace_back(I);
3864 Worklist.push_back(I);
3865 continue;
3866 }
3867
3868 if (!isRefSet(Access) &&
3869 isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3871 Users.emplace_back(I);
3872 continue;
3873 }
3874
3875 return std::nullopt;
3876
3877 case Instruction::Store: {
3879 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3880 return std::nullopt;
3881 if (isRefSet(Access))
3882 return std::nullopt;
3884 Users.emplace_back(I);
3885 continue;
3886 }
3887
3888 case Instruction::Load: {
3889 LoadInst *LI = cast<LoadInst>(I);
3890 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3891 return std::nullopt;
3892 if (isModSet(Access))
3893 return std::nullopt;
3895 Users.emplace_back(I);
3896 continue;
3897 }
3898 }
3899 llvm_unreachable("missing a return?");
3900 }
3901 } while (!Worklist.empty());
3902
3904 return Access;
3905}
3906
3909
3910 // If we have a malloc call which is only used in any amount of comparisons to
3911 // null and free calls, delete the calls and replace the comparisons with true
3912 // or false as appropriate.
3913
3914 // This is based on the principle that we can substitute our own allocation
3915 // function (which will never return null) rather than knowledge of the
3916 // specific function being called. In some sense this can change the permitted
3917 // outputs of a program (when we convert a malloc to an alloca, the fact that
3918 // the allocation is now on the stack is potentially visible, for example),
3919 // but we believe in a permissible manner.
3920 //
3921 // Collect into Instruction* first to avoid expensive WeakTrackingVH
3922 // register/unregister overhead; convert to WeakTrackingVH only when the
3923 // site is actually removable.
3925
3926 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3927 // before each store.
3929 std::unique_ptr<DIBuilder> DIB;
3930 if (isa<AllocaInst>(MI)) {
3931 findDbgUsers(&MI, DVRs);
3932 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3933 }
3934
3935 // Determine what getInitialValueOfAllocation would return without actually
3936 // allocating the result.
3937 bool KnowInitUndef = false;
3938 bool KnowInitZero = false;
3939 Constant *Init =
3941 if (Init) {
3942 if (isa<UndefValue>(Init))
3943 KnowInitUndef = true;
3944 else if (Init->isNullValue())
3945 KnowInitZero = true;
3946 }
3947 // The various sanitizers don't actually return undef memory, but rather
3948 // memory initialized with special forms of runtime poison
3949 auto &F = *MI.getFunction();
3950 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
3951 F.hasFnAttribute(Attribute::SanitizeAddress))
3952 KnowInitUndef = false;
3953
3954 auto Removable =
3955 isAllocSiteRemovable(&MI, RawUsers, TLI, KnowInitZero | KnowInitUndef);
3956 if (Removable) {
3957 SmallVector<WeakTrackingVH, 64> Users(RawUsers.begin(), RawUsers.end());
3958 for (WeakTrackingVH &User : Users) {
3959 // Lowering all @llvm.objectsize and MTI calls first because they may use
3960 // a bitcast/GEP of the alloca we are removing.
3961 if (!User)
3962 continue;
3963
3965
3967 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3968 SmallVector<Instruction *> InsertedInstructions;
3969 Value *Result = lowerObjectSizeCall(
3970 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3971 for (Instruction *Inserted : InsertedInstructions)
3972 Worklist.add(Inserted);
3973 replaceInstUsesWith(*I, Result);
3975 User = nullptr; // Skip examining in the next loop.
3976 continue;
3977 }
3978 if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
3979 if (KnowInitZero && isRefSet(*Removable)) {
3981 Builder.SetInsertPoint(MTI);
3982 auto *M = Builder.CreateMemSet(
3983 MTI->getRawDest(),
3984 ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
3985 MTI->getLength(), MTI->getDestAlign());
3986 M->copyMetadata(*MTI);
3987 }
3988 }
3989 }
3990 }
3991 for (WeakTrackingVH &User : Users) {
3992 if (!User)
3993 continue;
3994
3996
3997 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3999 *C, ConstantInt::get(C->getType(), C->isFalseWhenEqual()));
4000 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4001 for (auto *DVR : DVRs)
4002 if (DVR->isAddressOfVariable())
4004 } else {
4005 // Casts, GEP, or anything else: we're about to delete this instruction,
4006 // so it can not have any valid uses.
4008 if (isa<LoadInst>(I)) {
4009 assert(KnowInitZero || KnowInitUndef);
4010 Replace = KnowInitUndef ? UndefValue::get(I->getType())
4011 : Constant::getNullValue(I->getType());
4012 } else
4013 Replace = PoisonValue::get(I->getType());
4015 }
4017 }
4018
4020 // Replace invoke with a NOP intrinsic to maintain the original CFG
4021 Module *M = II->getModule();
4022 Function *F = Intrinsic::getOrInsertDeclaration(M, Intrinsic::donothing);
4023 auto *NewII = InvokeInst::Create(
4024 F, II->getNormalDest(), II->getUnwindDest(), {}, "", II->getParent());
4025 NewII->setDebugLoc(II->getDebugLoc());
4026 }
4027
4028 // Remove debug intrinsics which describe the value contained within the
4029 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4030 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4031 //
4032 // ```
4033 // define void @foo(i32 %0) {
4034 // %a = alloca i32 ; Deleted.
4035 // store i32 %0, i32* %a
4036 // dbg.value(i32 %0, "arg0") ; Not deleted.
4037 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4038 // call void @trivially_inlinable_no_op(i32* %a)
4039 // ret void
4040 // }
4041 // ```
4042 //
4043 // This may not be required if we stop describing the contents of allocas
4044 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4045 // the LowerDbgDeclare utility.
4046 //
4047 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4048 // "arg0" dbg.value may be stale after the call. However, failing to remove
4049 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4050 //
4051 // FIXME: the Assignment Tracking project has now likely made this
4052 // redundant (and it's sometimes harmful).
4053 for (auto *DVR : DVRs)
4054 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4055 DVR->eraseFromParent();
4056
4057 return eraseInstFromFunction(MI);
4058 }
4059 return nullptr;
4060}
4061
4062/// Move the call to free before a NULL test.
4063///
4064/// Check if this free is accessed after its argument has been test
4065/// against NULL (property 0).
4066/// If yes, it is legal to move this call in its predecessor block.
4067///
4068/// The move is performed only if the block containing the call to free
4069/// will be removed, i.e.:
4070/// 1. it has only one predecessor P, and P has two successors
4071/// 2. it contains the call, noops, and an unconditional branch
4072/// 3. its successor is the same as its predecessor's successor
4073///
4074/// The profitability is out-of concern here and this function should
4075/// be called only if the caller knows this transformation would be
4076/// profitable (e.g., for code size).
4078 const DataLayout &DL) {
4079 Value *Op = FI.getArgOperand(0);
4080 BasicBlock *FreeInstrBB = FI.getParent();
4081 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4082
4083 // Validate part of constraint #1: Only one predecessor
4084 // FIXME: We can extend the number of predecessor, but in that case, we
4085 // would duplicate the call to free in each predecessor and it may
4086 // not be profitable even for code size.
4087 if (!PredBB)
4088 return nullptr;
4089
4090 // Validate constraint #2: Does this block contains only the call to
4091 // free, noops, and an unconditional branch?
4092 BasicBlock *SuccBB;
4093 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4094 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
4095 return nullptr;
4096
4097 // If there are only 2 instructions in the block, at this point,
4098 // this is the call to free and unconditional.
4099 // If there are more than 2 instructions, check that they are noops
4100 // i.e., they won't hurt the performance of the generated code.
4101 if (FreeInstrBB->size() != 2) {
4102 for (const Instruction &Inst : *FreeInstrBB) {
4103 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4105 continue;
4106 auto *Cast = dyn_cast<CastInst>(&Inst);
4107 if (!Cast || !Cast->isNoopCast(DL))
4108 return nullptr;
4109 }
4110 }
4111 // Validate the rest of constraint #1 by matching on the pred branch.
4112 Instruction *TI = PredBB->getTerminator();
4113 BasicBlock *TrueBB, *FalseBB;
4114 CmpPredicate Pred;
4115 if (!match(TI, m_Br(m_ICmp(Pred,
4117 m_Specific(Op->stripPointerCasts())),
4118 m_Zero()),
4119 TrueBB, FalseBB)))
4120 return nullptr;
4121 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4122 return nullptr;
4123
4124 // Validate constraint #3: Ensure the null case just falls through.
4125 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4126 return nullptr;
4127 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4128 "Broken CFG: missing edge from predecessor to successor");
4129
4130 // At this point, we know that everything in FreeInstrBB can be moved
4131 // before TI.
4132 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
4133 if (&Instr == FreeInstrBBTerminator)
4134 break;
4135 Instr.moveBeforePreserving(TI->getIterator());
4136 }
4137 assert(FreeInstrBB->size() == 1 &&
4138 "Only the branch instruction should remain");
4139
4140 // Now that we've moved the call to free before the NULL check, we have to
4141 // remove any attributes on its parameter that imply it's non-null, because
4142 // those attributes might have only been valid because of the NULL check, and
4143 // we can get miscompiles if we keep them. This is conservative if non-null is
4144 // also implied by something other than the NULL check, but it's guaranteed to
4145 // be correct, and the conservativeness won't matter in practice, since the
4146 // attributes are irrelevant for the call to free itself and the pointer
4147 // shouldn't be used after the call.
4148 AttributeList Attrs = FI.getAttributes();
4149 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
4150 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
4151 if (Dereferenceable.isValid()) {
4152 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4153 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
4154 Attribute::Dereferenceable);
4155 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
4156 }
4157 FI.setAttributes(Attrs);
4158
4159 return &FI;
4160}
4161
4163 // free undef -> unreachable.
4164 if (isa<UndefValue>(Op)) {
4165 // Leave a marker since we can't modify the CFG here.
4167 return eraseInstFromFunction(FI);
4168 }
4169
4170 // If we have 'free null' delete the instruction. This can happen in stl code
4171 // when lots of inlining happens.
4173 return eraseInstFromFunction(FI);
4174
4175 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4176 // realloc() entirely.
4178 if (CI && CI->hasOneUse())
4179 if (Value *ReallocatedOp = getReallocatedOperand(CI))
4180 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
4181
4182 // If we optimize for code size, try to move the call to free before the null
4183 // test so that simplify cfg can remove the empty block and dead code
4184 // elimination the branch. I.e., helps to turn something like:
4185 // if (foo) free(foo);
4186 // into
4187 // free(foo);
4188 //
4189 // Note that we can only do this for 'free' and not for any flavor of
4190 // 'operator delete'; there is no 'operator delete' symbol for which we are
4191 // permitted to invent a call, even if we're passing in a null pointer.
4192 if (MinimizeSize) {
4193 LibFunc Func;
4194 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
4196 return I;
4197 }
4198
4199 return nullptr;
4200}
4201
4203 Value *RetVal = RI.getReturnValue();
4204 if (!RetVal)
4205 return nullptr;
4206
4207 Function *F = RI.getFunction();
4208 Type *RetTy = RetVal->getType();
4209 if (RetTy->isPointerTy()) {
4210 bool HasDereferenceable =
4211 F->getAttributes().getRetDereferenceableBytes() > 0;
4212 if (F->hasRetAttribute(Attribute::NonNull) ||
4213 (HasDereferenceable &&
4215 if (Value *V = simplifyNonNullOperand(RetVal, HasDereferenceable))
4216 return replaceOperand(RI, 0, V);
4217 }
4218 }
4219
4220 if (!AttributeFuncs::isNoFPClassCompatibleType(RetTy))
4221 return nullptr;
4222
4223 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4224 if (ReturnClass == fcNone)
4225 return nullptr;
4226
4227 KnownFPClass KnownClass;
4228 if (SimplifyDemandedFPClass(&RI, 0, ~ReturnClass, KnownClass,
4229 SQ.getWithInstruction(&RI)))
4230 return &RI;
4231
4232 return nullptr;
4233}
4234
4235// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4237 // Try to remove the previous instruction if it must lead to unreachable.
4238 // This includes instructions like stores and "llvm.assume" that may not get
4239 // removed by simple dead code elimination.
4240 bool Changed = false;
4241 while (Instruction *Prev = I.getPrevNode()) {
4242 // While we theoretically can erase EH, that would result in a block that
4243 // used to start with an EH no longer starting with EH, which is invalid.
4244 // To make it valid, we'd need to fixup predecessors to no longer refer to
4245 // this block, but that changes CFG, which is not allowed in InstCombine.
4246 if (Prev->isEHPad())
4247 break; // Can not drop any more instructions. We're done here.
4248
4250 break; // Can not drop any more instructions. We're done here.
4251 // Otherwise, this instruction can be freely erased,
4252 // even if it is not side-effect free.
4253
4254 // A value may still have uses before we process it here (for example, in
4255 // another unreachable block), so convert those to poison.
4256 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
4257 eraseInstFromFunction(*Prev);
4258 Changed = true;
4259 }
4260 return Changed;
4261}
4262
4267
4269 // If this store is the second-to-last instruction in the basic block
4270 // (excluding debug info) and if the block ends with
4271 // an unconditional branch, try to move the store to the successor block.
4272
4273 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4274 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4275 do {
4276 if (BBI != FirstInstr)
4277 --BBI;
4278 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4279
4280 return dyn_cast<StoreInst>(BBI);
4281 };
4282
4283 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4285 return &BI;
4286
4287 return nullptr;
4288}
4289
4292 if (!DeadEdges.insert({From, To}).second)
4293 return;
4294
4295 // Replace phi node operands in successor with poison.
4296 for (PHINode &PN : To->phis())
4297 for (Use &U : PN.incoming_values())
4298 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
4299 replaceUse(U, PoisonValue::get(PN.getType()));
4300 addToWorklist(&PN);
4301 MadeIRChange = true;
4302 }
4303
4304 Worklist.push_back(To);
4305}
4306
4307// Under the assumption that I is unreachable, remove it and following
4308// instructions. Changes are reported directly to MadeIRChange.
4311 BasicBlock *BB = I->getParent();
4312 for (Instruction &Inst : make_early_inc_range(
4313 make_range(std::next(BB->getTerminator()->getReverseIterator()),
4314 std::next(I->getReverseIterator())))) {
4315 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4316 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
4317 MadeIRChange = true;
4318 }
4319 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4320 continue;
4321 // RemoveDIs: erase debug-info on this instruction manually.
4322 Inst.dropDbgRecords();
4324 MadeIRChange = true;
4325 }
4326
4329 MadeIRChange = true;
4330 for (Value *V : Changed)
4332 }
4333
4334 // Handle potentially dead successors.
4335 for (BasicBlock *Succ : successors(BB))
4336 addDeadEdge(BB, Succ, Worklist);
4337}
4338
4341 while (!Worklist.empty()) {
4342 BasicBlock *BB = Worklist.pop_back_val();
4343 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
4344 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4345 }))
4346 continue;
4347
4349 }
4350}
4351
4353 BasicBlock *LiveSucc) {
4355 for (BasicBlock *Succ : successors(BB)) {
4356 // The live successor isn't dead.
4357 if (Succ == LiveSucc)
4358 continue;
4359
4360 addDeadEdge(BB, Succ, Worklist);
4361 }
4362
4364}
4365
4367 // Change br (not X), label True, label False to: br X, label False, True
4368 Value *Cond = BI.getCondition();
4369 Value *X;
4370 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
4371 // Swap Destinations and condition...
4372 BI.swapSuccessors();
4373 if (BPI)
4374 BPI->swapSuccEdgesProbabilities(BI.getParent());
4375 return replaceOperand(BI, 0, X);
4376 }
4377
4378 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4379 // This is done by inverting the condition and swapping successors:
4380 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4381 Value *Y;
4382 if (isa<SelectInst>(Cond) &&
4383 match(Cond,
4385 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
4386 Value *Or = Builder.CreateLogicalOr(NotX, Y);
4387
4388 // Set weights for the new OR select instruction too.
4390 if (auto *OrInst = dyn_cast<Instruction>(Or)) {
4391 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
4392 SmallVector<uint32_t> Weights;
4393 if (extractBranchWeights(*CondInst, Weights)) {
4394 assert(Weights.size() == 2 &&
4395 "Unexpected number of branch weights!");
4396 std::swap(Weights[0], Weights[1]);
4397 setBranchWeights(*OrInst, Weights, /*IsExpected=*/false);
4398 }
4399 }
4400 }
4401 }
4402 BI.swapSuccessors();
4403 if (BPI)
4404 BPI->swapSuccEdgesProbabilities(BI.getParent());
4405 return replaceOperand(BI, 0, Or);
4406 }
4407
4408 // If the condition is irrelevant, remove the use so that other
4409 // transforms on the condition become more effective.
4410 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
4411 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
4412
4413 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4414 CmpPredicate Pred;
4415 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
4416 !isCanonicalPredicate(Pred)) {
4417 // Swap destinations and condition.
4418 auto *Cmp = cast<CmpInst>(Cond);
4419 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
4420 BI.swapSuccessors();
4421 if (BPI)
4422 BPI->swapSuccEdgesProbabilities(BI.getParent());
4423 Worklist.push(Cmp);
4424 return &BI;
4425 }
4426
4427 if (isa<UndefValue>(Cond)) {
4428 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
4429 return nullptr;
4430 }
4431 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4432 handlePotentiallyDeadSuccessors(BI.getParent(),
4433 BI.getSuccessor(!CI->getZExtValue()));
4434 return nullptr;
4435 }
4436
4437 // Replace all dominated uses of the condition with true/false
4438 // Ignore constant expressions to avoid iterating over uses on other
4439 // functions.
4440 if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
4441 for (auto &U : make_early_inc_range(Cond->uses())) {
4442 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
4443 if (DT.dominates(Edge0, U)) {
4444 replaceUse(U, ConstantInt::getTrue(Cond->getType()));
4445 addToWorklist(cast<Instruction>(U.getUser()));
4446 continue;
4447 }
4448 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(1));
4449 if (DT.dominates(Edge1, U)) {
4450 replaceUse(U, ConstantInt::getFalse(Cond->getType()));
4451 addToWorklist(cast<Instruction>(U.getUser()));
4452 }
4453 }
4454 }
4455
4456 DC.registerBranch(&BI);
4457 return nullptr;
4458}
4459
4460// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4461// we can prove that both (switch C) and (switch X) go to the default when cond
4462// is false/true.
4465 bool IsTrueArm) {
4466 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4467 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
4468 if (!C)
4469 return nullptr;
4470
4471 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4472 if (CstBB != SI.getDefaultDest())
4473 return nullptr;
4474 Value *X = Select->getOperand(3 - CstOpIdx);
4475 CmpPredicate Pred;
4476 const APInt *RHSC;
4477 if (!match(Select->getCondition(),
4478 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
4479 return nullptr;
4480 if (IsTrueArm)
4481 Pred = ICmpInst::getInversePredicate(Pred);
4482
4483 // See whether we can replace the select with X
4485 for (auto Case : SI.cases())
4486 if (!CR.contains(Case.getCaseValue()->getValue()))
4487 return nullptr;
4488
4489 return X;
4490}
4491
4493 Value *Cond = SI.getCondition();
4494 Value *Op0;
4495 const APInt *CondOpC;
4496 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4497
4498 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4499 if (match(Cond, m_Add(m_Value(Op0), m_APInt(CondOpC))))
4500 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4501 return [](const APInt &Case, const APInt &C) { return Case - C; };
4502
4503 if (match(Cond, m_Sub(m_APInt(CondOpC), m_Value(Op0))))
4504 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4505 return [](const APInt &Case, const APInt &C) { return C - Case; };
4506
4507 if (match(Cond, m_Xor(m_Value(Op0), m_APInt(CondOpC))) &&
4508 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4509 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4510 // Prevent creation of large case values by excluding extremes.
4511 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4512
4513 return nullptr;
4514 };
4515
4516 // Attempt to invert and simplify the switch condition, as long as the
4517 // condition is not used further, as it may not be profitable otherwise.
4518 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4519 for (auto &Case : SI.cases()) {
4520 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4521 Case.setValue(ConstantInt::get(SI.getContext(), New));
4522 }
4523 return replaceOperand(SI, 0, Op0);
4524 }
4525
4526 uint64_t ShiftAmt;
4527 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
4528 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4529 all_of(SI.cases(), [&](const auto &Case) {
4530 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4531 })) {
4532 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4534 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4535 Shl->hasOneUse()) {
4536 Value *NewCond = Op0;
4537 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4538 // If the shift may wrap, we need to mask off the shifted bits.
4539 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4540 NewCond = Builder.CreateAnd(
4541 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
4542 }
4543 for (auto Case : SI.cases()) {
4544 const APInt &CaseVal = Case.getCaseValue()->getValue();
4545 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4546 : CaseVal.lshr(ShiftAmt);
4547 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
4548 }
4549 return replaceOperand(SI, 0, NewCond);
4550 }
4551 }
4552
4553 // Fold switch(zext/sext(X)) into switch(X) if possible.
4554 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
4555 bool IsZExt = isa<ZExtInst>(Cond);
4556 Type *SrcTy = Op0->getType();
4557 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4558
4559 if (all_of(SI.cases(), [&](const auto &Case) {
4560 const APInt &CaseVal = Case.getCaseValue()->getValue();
4561 return IsZExt ? CaseVal.isIntN(NewWidth)
4562 : CaseVal.isSignedIntN(NewWidth);
4563 })) {
4564 for (auto &Case : SI.cases()) {
4565 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4566 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4567 }
4568 return replaceOperand(SI, 0, Op0);
4569 }
4570 }
4571
4572 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4573 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
4574 if (Value *V =
4575 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4576 return replaceOperand(SI, 0, V);
4577 if (Value *V =
4578 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4579 return replaceOperand(SI, 0, V);
4580 }
4581
4582 KnownBits Known = computeKnownBits(Cond, &SI);
4583 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4584 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4585
4586 // Compute the number of leading bits we can ignore.
4587 // TODO: A better way to determine this would use ComputeNumSignBits().
4588 for (const auto &C : SI.cases()) {
4589 LeadingKnownZeros =
4590 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
4591 LeadingKnownOnes =
4592 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
4593 }
4594
4595 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
4596
4597 // Shrink the condition operand if the new type is smaller than the old type.
4598 // But do not shrink to a non-standard type, because backend can't generate
4599 // good code for that yet.
4600 // TODO: We can make it aggressive again after fixing PR39569.
4601 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4602 shouldChangeType(Known.getBitWidth(), NewWidth)) {
4603 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
4604 Builder.SetInsertPoint(&SI);
4605 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
4606
4607 for (auto Case : SI.cases()) {
4608 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4609 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4610 }
4611 return replaceOperand(SI, 0, NewCond);
4612 }
4613
4614 if (isa<UndefValue>(Cond)) {
4615 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
4616 return nullptr;
4617 }
4618 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4620 SI.findCaseValue(CI)->getCaseSuccessor());
4621 return nullptr;
4622 }
4623
4624 return nullptr;
4625}
4626
4628InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4630 if (!WO)
4631 return nullptr;
4632
4633 Intrinsic::ID OvID = WO->getIntrinsicID();
4634 const APInt *C = nullptr;
4635 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
4636 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4637 OvID == Intrinsic::umul_with_overflow)) {
4638 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4639 if (C->isAllOnes())
4640 return BinaryOperator::CreateNeg(WO->getLHS());
4641 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4642 if (C->isPowerOf2()) {
4643 return BinaryOperator::CreateShl(
4644 WO->getLHS(),
4645 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
4646 }
4647 }
4648 }
4649
4650 // We're extracting from an overflow intrinsic. See if we're the only user.
4651 // That allows us to simplify multiple result intrinsics to simpler things
4652 // that just get one value.
4653 if (!WO->hasOneUse())
4654 return nullptr;
4655
4656 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4657 // and replace it with a traditional binary instruction.
4658 if (*EV.idx_begin() == 0) {
4659 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4660 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4661 // Replace the old instruction's uses with poison.
4662 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
4664 return BinaryOperator::Create(BinOp, LHS, RHS);
4665 }
4666
4667 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4668
4669 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4670 if (OvID == Intrinsic::usub_with_overflow)
4671 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4672
4673 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4674 // +1 is not possible because we assume signed values.
4675 if (OvID == Intrinsic::smul_with_overflow &&
4676 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
4677 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
4678
4679 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4680 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4681 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4682 // Only handle even bitwidths for performance reasons.
4683 if (BitWidth % 2 == 0)
4684 return new ICmpInst(
4685 ICmpInst::ICMP_UGT, WO->getLHS(),
4686 ConstantInt::get(WO->getLHS()->getType(),
4688 }
4689
4690 // If only the overflow result is used, and the right hand side is a
4691 // constant (or constant splat), we can remove the intrinsic by directly
4692 // checking for overflow.
4693 if (C) {
4694 // Compute the no-wrap range for LHS given RHS=C, then construct an
4695 // equivalent icmp, potentially using an offset.
4696 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4697 WO->getBinaryOp(), *C, WO->getNoWrapKind());
4698
4699 CmpInst::Predicate Pred;
4700 APInt NewRHSC, Offset;
4701 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
4702 auto *OpTy = WO->getRHS()->getType();
4703 auto *NewLHS = WO->getLHS();
4704 if (Offset != 0)
4705 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
4706 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
4707 ConstantInt::get(OpTy, NewRHSC));
4708 }
4709
4710 return nullptr;
4711}
4712
4715 InstCombiner::BuilderTy &Builder) {
4716 // Helper to fold frexp of select to select of frexp.
4717
4718 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4719 return nullptr;
4721 Value *TrueVal = SelectInst->getTrueValue();
4722 Value *FalseVal = SelectInst->getFalseValue();
4723
4724 const APFloat *ConstVal = nullptr;
4725 Value *VarOp = nullptr;
4726 bool ConstIsTrue = false;
4727
4728 if (match(TrueVal, m_APFloat(ConstVal))) {
4729 VarOp = FalseVal;
4730 ConstIsTrue = true;
4731 } else if (match(FalseVal, m_APFloat(ConstVal))) {
4732 VarOp = TrueVal;
4733 ConstIsTrue = false;
4734 } else {
4735 return nullptr;
4736 }
4737
4738 Builder.SetInsertPoint(&EV);
4739
4740 CallInst *NewFrexp =
4741 Builder.CreateCall(FrexpCall->getCalledFunction(), {VarOp}, "frexp");
4742 NewFrexp->copyIRFlags(FrexpCall);
4743
4744 Value *NewEV = Builder.CreateExtractValue(NewFrexp, 0, "mantissa");
4745
4746 int Exp;
4747 APFloat Mantissa = frexp(*ConstVal, Exp, APFloat::rmNearestTiesToEven);
4748
4749 Constant *ConstantMantissa = ConstantFP::get(TrueVal->getType(), Mantissa);
4750
4751 Value *NewSel = Builder.CreateSelectFMF(
4752 Cond, ConstIsTrue ? ConstantMantissa : NewEV,
4753 ConstIsTrue ? NewEV : ConstantMantissa, SelectInst, "select.frexp");
4754 return NewSel;
4755}
4757 Value *Agg = EV.getAggregateOperand();
4758
4759 if (!EV.hasIndices())
4760 return replaceInstUsesWith(EV, Agg);
4761
4762 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
4763 SQ.getWithInstruction(&EV)))
4764 return replaceInstUsesWith(EV, V);
4765
4766 Value *Cond, *TrueVal, *FalseVal;
4768 m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal)))))) {
4769 auto *SelInst =
4770 cast<SelectInst>(cast<IntrinsicInst>(Agg)->getArgOperand(0));
4771 if (Value *Result =
4772 foldFrexpOfSelect(EV, cast<IntrinsicInst>(Agg), SelInst, Builder))
4773 return replaceInstUsesWith(EV, Result);
4774 }
4776 // We're extracting from an insertvalue instruction, compare the indices
4777 const unsigned *exti, *exte, *insi, *inse;
4778 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4779 exte = EV.idx_end(), inse = IV->idx_end();
4780 exti != exte && insi != inse;
4781 ++exti, ++insi) {
4782 if (*insi != *exti)
4783 // The insert and extract both reference distinctly different elements.
4784 // This means the extract is not influenced by the insert, and we can
4785 // replace the aggregate operand of the extract with the aggregate
4786 // operand of the insert. i.e., replace
4787 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4788 // %E = extractvalue { i32, { i32 } } %I, 0
4789 // with
4790 // %E = extractvalue { i32, { i32 } } %A, 0
4791 return ExtractValueInst::Create(IV->getAggregateOperand(),
4792 EV.getIndices());
4793 }
4794 if (exti == exte && insi == inse)
4795 // Both iterators are at the end: Index lists are identical. Replace
4796 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4797 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4798 // with "i32 42"
4799 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
4800 if (exti == exte) {
4801 // The extract list is a prefix of the insert list. i.e. replace
4802 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4803 // %E = extractvalue { i32, { i32 } } %I, 1
4804 // with
4805 // %X = extractvalue { i32, { i32 } } %A, 1
4806 // %E = insertvalue { i32 } %X, i32 42, 0
4807 // by switching the order of the insert and extract (though the
4808 // insertvalue should be left in, since it may have other uses).
4809 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4810 EV.getIndices());
4811 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4812 ArrayRef(insi, inse));
4813 }
4814 if (insi == inse)
4815 // The insert list is a prefix of the extract list
4816 // We can simply remove the common indices from the extract and make it
4817 // operate on the inserted value instead of the insertvalue result.
4818 // i.e., replace
4819 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4820 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4821 // with
4822 // %E extractvalue { i32 } { i32 42 }, 0
4823 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4824 ArrayRef(exti, exte));
4825 }
4826
4827 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4828 return R;
4829
4830 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4831 // Bail out if the aggregate contains scalable vector type
4832 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4833 STy && STy->isScalableTy())
4834 return nullptr;
4835
4836 // If the (non-volatile) load only has one use, we can rewrite this to a
4837 // load from a GEP. This reduces the size of the load. If a load is used
4838 // only by extractvalue instructions then this either must have been
4839 // optimized before, or it is a struct with padding, in which case we
4840 // don't want to do the transformation as it loses padding knowledge.
4841 if (L->isSimple() && L->hasOneUse()) {
4842 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4843 SmallVector<Value*, 4> Indices;
4844 // Prefix an i32 0 since we need the first element.
4845 Indices.push_back(Builder.getInt32(0));
4846 for (unsigned Idx : EV.indices())
4847 Indices.push_back(Builder.getInt32(Idx));
4848
4849 // We need to insert these at the location of the old load, not at that of
4850 // the extractvalue.
4851 Builder.SetInsertPoint(L);
4852 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4853 L->getPointerOperand(), Indices);
4854 Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
4855 // Whatever aliasing information we had for the orignal load must also
4856 // hold for the smaller load, so propagate the annotations.
4857 NL->setAAMetadata(L->getAAMetadata());
4858 // Returning the load directly will cause the main loop to insert it in
4859 // the wrong spot, so use replaceInstUsesWith().
4860 return replaceInstUsesWith(EV, NL);
4861 }
4862 }
4863
4864 if (auto *PN = dyn_cast<PHINode>(Agg))
4865 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4866 return Res;
4867
4868 // Canonicalize extract (select Cond, TV, FV)
4869 // -> select cond, (extract TV), (extract FV)
4870 if (auto *SI = dyn_cast<SelectInst>(Agg))
4871 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4872 return R;
4873
4874 // We could simplify extracts from other values. Note that nested extracts may
4875 // already be simplified implicitly by the above: extract (extract (insert) )
4876 // will be translated into extract ( insert ( extract ) ) first and then just
4877 // the value inserted, if appropriate. Similarly for extracts from single-use
4878 // loads: extract (extract (load)) will be translated to extract (load (gep))
4879 // and if again single-use then via load (gep (gep)) to load (gep).
4880 // However, double extracts from e.g. function arguments or return values
4881 // aren't handled yet.
4882 return nullptr;
4883}
4884
4885/// Return 'true' if the given typeinfo will match anything.
4886static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4887 switch (Personality) {
4891 // The GCC C EH and Rust personality only exists to support cleanups, so
4892 // it's not clear what the semantics of catch clauses are.
4893 return false;
4895 return false;
4897 // While __gnat_all_others_value will match any Ada exception, it doesn't
4898 // match foreign exceptions (or didn't, before gcc-4.7).
4899 return false;
4910 return isa<ConstantPointerNull>(TypeInfo);
4911 }
4912 llvm_unreachable("invalid enum");
4913}
4914
4915static bool shorter_filter(const Value *LHS, const Value *RHS) {
4916 return
4917 cast<ArrayType>(LHS->getType())->getNumElements()
4918 <
4919 cast<ArrayType>(RHS->getType())->getNumElements();
4920}
4921
4923 // The logic here should be correct for any real-world personality function.
4924 // However if that turns out not to be true, the offending logic can always
4925 // be conditioned on the personality function, like the catch-all logic is.
4926 EHPersonality Personality =
4927 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4928
4929 // Simplify the list of clauses, eg by removing repeated catch clauses
4930 // (these are often created by inlining).
4931 bool MakeNewInstruction = false; // If true, recreate using the following:
4932 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4933 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4934
4935 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4936 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4937 bool isLastClause = i + 1 == e;
4938 if (LI.isCatch(i)) {
4939 // A catch clause.
4940 Constant *CatchClause = LI.getClause(i);
4941 Constant *TypeInfo = CatchClause->stripPointerCasts();
4942
4943 // If we already saw this clause, there is no point in having a second
4944 // copy of it.
4945 if (AlreadyCaught.insert(TypeInfo).second) {
4946 // This catch clause was not already seen.
4947 NewClauses.push_back(CatchClause);
4948 } else {
4949 // Repeated catch clause - drop the redundant copy.
4950 MakeNewInstruction = true;
4951 }
4952
4953 // If this is a catch-all then there is no point in keeping any following
4954 // clauses or marking the landingpad as having a cleanup.
4955 if (isCatchAll(Personality, TypeInfo)) {
4956 if (!isLastClause)
4957 MakeNewInstruction = true;
4958 CleanupFlag = false;
4959 break;
4960 }
4961 } else {
4962 // A filter clause. If any of the filter elements were already caught
4963 // then they can be dropped from the filter. It is tempting to try to
4964 // exploit the filter further by saying that any typeinfo that does not
4965 // occur in the filter can't be caught later (and thus can be dropped).
4966 // However this would be wrong, since typeinfos can match without being
4967 // equal (for example if one represents a C++ class, and the other some
4968 // class derived from it).
4969 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4970 Constant *FilterClause = LI.getClause(i);
4971 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4972 unsigned NumTypeInfos = FilterType->getNumElements();
4973
4974 // An empty filter catches everything, so there is no point in keeping any
4975 // following clauses or marking the landingpad as having a cleanup. By
4976 // dealing with this case here the following code is made a bit simpler.
4977 if (!NumTypeInfos) {
4978 NewClauses.push_back(FilterClause);
4979 if (!isLastClause)
4980 MakeNewInstruction = true;
4981 CleanupFlag = false;
4982 break;
4983 }
4984
4985 bool MakeNewFilter = false; // If true, make a new filter.
4986 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4987 if (isa<ConstantAggregateZero>(FilterClause)) {
4988 // Not an empty filter - it contains at least one null typeinfo.
4989 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4990 Constant *TypeInfo =
4992 // If this typeinfo is a catch-all then the filter can never match.
4993 if (isCatchAll(Personality, TypeInfo)) {
4994 // Throw the filter away.
4995 MakeNewInstruction = true;
4996 continue;
4997 }
4998
4999 // There is no point in having multiple copies of this typeinfo, so
5000 // discard all but the first copy if there is more than one.
5001 NewFilterElts.push_back(TypeInfo);
5002 if (NumTypeInfos > 1)
5003 MakeNewFilter = true;
5004 } else {
5005 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
5006 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
5007 NewFilterElts.reserve(NumTypeInfos);
5008
5009 // Remove any filter elements that were already caught or that already
5010 // occurred in the filter. While there, see if any of the elements are
5011 // catch-alls. If so, the filter can be discarded.
5012 bool SawCatchAll = false;
5013 for (unsigned j = 0; j != NumTypeInfos; ++j) {
5014 Constant *Elt = Filter->getOperand(j);
5015 Constant *TypeInfo = Elt->stripPointerCasts();
5016 if (isCatchAll(Personality, TypeInfo)) {
5017 // This element is a catch-all. Bail out, noting this fact.
5018 SawCatchAll = true;
5019 break;
5020 }
5021
5022 // Even if we've seen a type in a catch clause, we don't want to
5023 // remove it from the filter. An unexpected type handler may be
5024 // set up for a call site which throws an exception of the same
5025 // type caught. In order for the exception thrown by the unexpected
5026 // handler to propagate correctly, the filter must be correctly
5027 // described for the call site.
5028 //
5029 // Example:
5030 //
5031 // void unexpected() { throw 1;}
5032 // void foo() throw (int) {
5033 // std::set_unexpected(unexpected);
5034 // try {
5035 // throw 2.0;
5036 // } catch (int i) {}
5037 // }
5038
5039 // There is no point in having multiple copies of the same typeinfo in
5040 // a filter, so only add it if we didn't already.
5041 if (SeenInFilter.insert(TypeInfo).second)
5042 NewFilterElts.push_back(cast<Constant>(Elt));
5043 }
5044 // A filter containing a catch-all cannot match anything by definition.
5045 if (SawCatchAll) {
5046 // Throw the filter away.
5047 MakeNewInstruction = true;
5048 continue;
5049 }
5050
5051 // If we dropped something from the filter, make a new one.
5052 if (NewFilterElts.size() < NumTypeInfos)
5053 MakeNewFilter = true;
5054 }
5055 if (MakeNewFilter) {
5056 FilterType = ArrayType::get(FilterType->getElementType(),
5057 NewFilterElts.size());
5058 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
5059 MakeNewInstruction = true;
5060 }
5061
5062 NewClauses.push_back(FilterClause);
5063
5064 // If the new filter is empty then it will catch everything so there is
5065 // no point in keeping any following clauses or marking the landingpad
5066 // as having a cleanup. The case of the original filter being empty was
5067 // already handled above.
5068 if (MakeNewFilter && !NewFilterElts.size()) {
5069 assert(MakeNewInstruction && "New filter but not a new instruction!");
5070 CleanupFlag = false;
5071 break;
5072 }
5073 }
5074 }
5075
5076 // If several filters occur in a row then reorder them so that the shortest
5077 // filters come first (those with the smallest number of elements). This is
5078 // advantageous because shorter filters are more likely to match, speeding up
5079 // unwinding, but mostly because it increases the effectiveness of the other
5080 // filter optimizations below.
5081 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5082 unsigned j;
5083 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5084 for (j = i; j != e; ++j)
5085 if (!isa<ArrayType>(NewClauses[j]->getType()))
5086 break;
5087
5088 // Check whether the filters are already sorted by length. We need to know
5089 // if sorting them is actually going to do anything so that we only make a
5090 // new landingpad instruction if it does.
5091 for (unsigned k = i; k + 1 < j; ++k)
5092 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
5093 // Not sorted, so sort the filters now. Doing an unstable sort would be
5094 // correct too but reordering filters pointlessly might confuse users.
5095 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
5097 MakeNewInstruction = true;
5098 break;
5099 }
5100
5101 // Look for the next batch of filters.
5102 i = j + 1;
5103 }
5104
5105 // If typeinfos matched if and only if equal, then the elements of a filter L
5106 // that occurs later than a filter F could be replaced by the intersection of
5107 // the elements of F and L. In reality two typeinfos can match without being
5108 // equal (for example if one represents a C++ class, and the other some class
5109 // derived from it) so it would be wrong to perform this transform in general.
5110 // However the transform is correct and useful if F is a subset of L. In that
5111 // case L can be replaced by F, and thus removed altogether since repeating a
5112 // filter is pointless. So here we look at all pairs of filters F and L where
5113 // L follows F in the list of clauses, and remove L if every element of F is
5114 // an element of L. This can occur when inlining C++ functions with exception
5115 // specifications.
5116 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5117 // Examine each filter in turn.
5118 Value *Filter = NewClauses[i];
5119 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
5120 if (!FTy)
5121 // Not a filter - skip it.
5122 continue;
5123 unsigned FElts = FTy->getNumElements();
5124 // Examine each filter following this one. Doing this backwards means that
5125 // we don't have to worry about filters disappearing under us when removed.
5126 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5127 Value *LFilter = NewClauses[j];
5128 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
5129 if (!LTy)
5130 // Not a filter - skip it.
5131 continue;
5132 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5133 // an element of LFilter, then discard LFilter.
5134 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5135 // If Filter is empty then it is a subset of LFilter.
5136 if (!FElts) {
5137 // Discard LFilter.
5138 NewClauses.erase(J);
5139 MakeNewInstruction = true;
5140 // Move on to the next filter.
5141 continue;
5142 }
5143 unsigned LElts = LTy->getNumElements();
5144 // If Filter is longer than LFilter then it cannot be a subset of it.
5145 if (FElts > LElts)
5146 // Move on to the next filter.
5147 continue;
5148 // At this point we know that LFilter has at least one element.
5149 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
5150 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5151 // already know that Filter is not longer than LFilter).
5153 assert(FElts <= LElts && "Should have handled this case earlier!");
5154 // Discard LFilter.
5155 NewClauses.erase(J);
5156 MakeNewInstruction = true;
5157 }
5158 // Move on to the next filter.
5159 continue;
5160 }
5161 ConstantArray *LArray = cast<ConstantArray>(LFilter);
5162 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
5163 // Since Filter is non-empty and contains only zeros, it is a subset of
5164 // LFilter iff LFilter contains a zero.
5165 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5166 for (unsigned l = 0; l != LElts; ++l)
5167 if (isa<ConstantPointerNull>(LArray->getOperand(l))) {
5168 // LFilter contains a zero - discard it.
5169 NewClauses.erase(J);
5170 MakeNewInstruction = true;
5171 break;
5172 }
5173 // Move on to the next filter.
5174 continue;
5175 }
5176 // At this point we know that both filters are ConstantArrays. Loop over
5177 // operands to see whether every element of Filter is also an element of
5178 // LFilter. Since filters tend to be short this is probably faster than
5179 // using a method that scales nicely.
5181 bool AllFound = true;
5182 for (unsigned f = 0; f != FElts; ++f) {
5183 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
5184 AllFound = false;
5185 for (unsigned l = 0; l != LElts; ++l) {
5186 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
5187 if (LTypeInfo == FTypeInfo) {
5188 AllFound = true;
5189 break;
5190 }
5191 }
5192 if (!AllFound)
5193 break;
5194 }
5195 if (AllFound) {
5196 // Discard LFilter.
5197 NewClauses.erase(J);
5198 MakeNewInstruction = true;
5199 }
5200 // Move on to the next filter.
5201 }
5202 }
5203
5204 // If we changed any of the clauses, replace the old landingpad instruction
5205 // with a new one.
5206 if (MakeNewInstruction) {
5208 NewClauses.size());
5209 for (Constant *C : NewClauses)
5210 NLI->addClause(C);
5211 // A landing pad with no clauses must have the cleanup flag set. It is
5212 // theoretically possible, though highly unlikely, that we eliminated all
5213 // clauses. If so, force the cleanup flag to true.
5214 if (NewClauses.empty())
5215 CleanupFlag = true;
5216 NLI->setCleanup(CleanupFlag);
5217 return NLI;
5218 }
5219
5220 // Even if none of the clauses changed, we may nonetheless have understood
5221 // that the cleanup flag is pointless. Clear it if so.
5222 if (LI.isCleanup() != CleanupFlag) {
5223 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5224 LI.setCleanup(CleanupFlag);
5225 return &LI;
5226 }
5227
5228 return nullptr;
5229}
5230
5231Value *
5233 // Try to push freeze through instructions that propagate but don't produce
5234 // poison as far as possible. If an operand of freeze follows three
5235 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5236 // guaranteed-non-poison operands then push the freeze through to the one
5237 // operand that is not guaranteed non-poison. The actual transform is as
5238 // follows.
5239 // Op1 = ... ; Op1 can be posion
5240 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5241 // ; single guaranteed-non-poison operands
5242 // ... = Freeze(Op0)
5243 // =>
5244 // Op1 = ...
5245 // Op1.fr = Freeze(Op1)
5246 // ... = Inst(Op1.fr, NonPoisonOps...)
5247 auto *OrigOp = OrigFI.getOperand(0);
5248 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
5249
5250 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5251 // potentially reduces their optimization potential, so let's only do this iff
5252 // the OrigOp is only used by the freeze.
5253 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
5254 return nullptr;
5255
5256 // We can't push the freeze through an instruction which can itself create
5257 // poison. If the only source of new poison is flags, we can simply
5258 // strip them (since we know the only use is the freeze and nothing can
5259 // benefit from them.)
5261 /*ConsiderFlagsAndMetadata*/ false))
5262 return nullptr;
5263
5264 // If operand is guaranteed not to be poison, there is no need to add freeze
5265 // to the operand. So we first find the operand that is not guaranteed to be
5266 // poison.
5267 Value *MaybePoisonOperand = nullptr;
5268 for (Value *V : OrigOpInst->operands()) {
5270 // Treat identical operands as a single operand.
5271 (MaybePoisonOperand && MaybePoisonOperand == V))
5272 continue;
5273 if (!MaybePoisonOperand)
5274 MaybePoisonOperand = V;
5275 else
5276 return nullptr;
5277 }
5278
5279 OrigOpInst->dropPoisonGeneratingAnnotations();
5280
5281 // If all operands are guaranteed to be non-poison, we can drop freeze.
5282 if (!MaybePoisonOperand)
5283 return OrigOp;
5284
5285 Builder.SetInsertPoint(OrigOpInst);
5286 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5287 MaybePoisonOperand, MaybePoisonOperand->getName() + ".fr");
5288
5289 OrigOpInst->replaceUsesOfWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
5290 return OrigOp;
5291}
5292
5294 PHINode *PN) {
5295 // Detect whether this is a recurrence with a start value and some number of
5296 // backedge values. We'll check whether we can push the freeze through the
5297 // backedge values (possibly dropping poison flags along the way) until we
5298 // reach the phi again. In that case, we can move the freeze to the start
5299 // value.
5300 Use *StartU = nullptr;
5302 for (Use &U : PN->incoming_values()) {
5303 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
5304 // Add backedge value to worklist.
5305 Worklist.push_back(U.get());
5306 continue;
5307 }
5308
5309 // Don't bother handling multiple start values.
5310 if (StartU)
5311 return nullptr;
5312 StartU = &U;
5313 }
5314
5315 if (!StartU || Worklist.empty())
5316 return nullptr; // Not a recurrence.
5317
5318 Value *StartV = StartU->get();
5319 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
5320 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
5321 // We can't insert freeze if the start value is the result of the
5322 // terminator (e.g. an invoke).
5323 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5324 return nullptr;
5325
5328 while (!Worklist.empty()) {
5329 Value *V = Worklist.pop_back_val();
5330 if (!Visited.insert(V).second)
5331 continue;
5332
5333 if (Visited.size() > 32)
5334 return nullptr; // Limit the total number of values we inspect.
5335
5336 // Assume that PN is non-poison, because it will be after the transform.
5337 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5338 continue;
5339
5342 /*ConsiderFlagsAndMetadata*/ false))
5343 return nullptr;
5344
5345 DropFlags.push_back(I);
5346 append_range(Worklist, I->operands());
5347 }
5348
5349 for (Instruction *I : DropFlags)
5350 I->dropPoisonGeneratingAnnotations();
5351
5352 if (StartNeedsFreeze) {
5353 Builder.SetInsertPoint(StartBB->getTerminator());
5354 Value *FrozenStartV = Builder.CreateFreeze(StartV,
5355 StartV->getName() + ".fr");
5356 replaceUse(*StartU, FrozenStartV);
5357 }
5358 return replaceInstUsesWith(FI, PN);
5359}
5360
5362 Value *Op = FI.getOperand(0);
5363
5364 if (isa<Constant>(Op) || Op->hasOneUse())
5365 return false;
5366
5367 // Move the freeze directly after the definition of its operand, so that
5368 // it dominates the maximum number of uses. Note that it may not dominate
5369 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5370 // the normal/default destination. This is why the domination check in the
5371 // replacement below is still necessary.
5372 BasicBlock::iterator MoveBefore;
5373 if (isa<Argument>(Op)) {
5374 MoveBefore =
5376 } else {
5377 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
5378 if (!MoveBeforeOpt)
5379 return false;
5380 MoveBefore = *MoveBeforeOpt;
5381 }
5382
5383 // Re-point iterator to come after any debug-info records.
5384 MoveBefore.setHeadBit(false);
5385
5386 bool Changed = false;
5387 if (&FI != &*MoveBefore) {
5388 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
5389 Changed = true;
5390 }
5391
5393 Changed |= Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
5394 if (!DT.dominates(&FI, U))
5395 return false;
5396
5397 Users.push_back(U.getUser());
5398 return true;
5399 });
5400
5401 for (auto *U : Users) {
5402 // Re-queue U and its users: freezing U's operand can expose a fold on a
5403 // user of U (e.g. a freeze of U can now be pushed through it) that would
5404 // otherwise only fire on a later iteration, tripping the fixpoint verifier.
5405 auto *UI = cast<Instruction>(U);
5406 Worklist.pushUsersToWorkList(*UI);
5407 Worklist.push(UI);
5408 }
5409
5410 return Changed;
5411}
5412
5413// Check if any direct or bitcast user of this value is a shuffle instruction.
5415 for (auto *U : V->users()) {
5417 return true;
5418 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
5419 return true;
5420 }
5421 return false;
5422}
5423
5425 Value *Op0 = I.getOperand(0);
5426
5427 if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
5428 return replaceInstUsesWith(I, V);
5429
5430 // freeze (phi const, x) --> phi const, (freeze x)
5431 if (auto *PN = dyn_cast<PHINode>(Op0)) {
5432 if (Instruction *NV = foldOpIntoPhi(I, PN))
5433 return NV;
5434 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
5435 return NV;
5436 }
5437
5439 return replaceInstUsesWith(I, NI);
5440
5441 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5442 // - or: pick -1
5443 // - select's condition: if the true value is constant, choose it by making
5444 // the condition true.
5445 // - phi: pick the common constant across operands
5446 // - default: pick 0
5447 //
5448 // Note that this transform is intentionally done here rather than
5449 // via an analysis in InstSimplify or at individual user sites. That is
5450 // because we must produce the same value for all uses of the freeze -
5451 // it's the reason "freeze" exists!
5452 //
5453 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5454 // duplicating logic for binops at least.
5455 auto getUndefReplacement = [&](Type *Ty) {
5456 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5457 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5458 // removed.
5459 Constant *BestValue = nullptr;
5460 for (Value *V : PN.incoming_values()) {
5461 if (match(V, m_Freeze(m_Undef())))
5462 continue;
5463
5465 if (!C)
5466 return nullptr;
5467
5469 return nullptr;
5470
5471 if (BestValue && BestValue != C)
5472 return nullptr;
5473
5474 BestValue = C;
5475 }
5476 return BestValue;
5477 };
5478
5479 Value *NullValue = Constant::getNullValue(Ty);
5480 Value *BestValue = nullptr;
5481 for (auto *U : I.users()) {
5482 Value *V = NullValue;
5483 if (match(U, m_Or(m_Value(), m_Value())))
5485 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
5486 V = ConstantInt::getTrue(Ty);
5487 else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
5488 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
5489 V = NullValue;
5490 } else if (auto *PHI = dyn_cast<PHINode>(U)) {
5491 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5492 V = MaybeV;
5493 }
5494
5495 if (!BestValue)
5496 BestValue = V;
5497 else if (BestValue != V)
5498 BestValue = NullValue;
5499 }
5500 assert(BestValue && "Must have at least one use");
5501 assert(BestValue != &I && "Cannot replace with itself");
5502 return BestValue;
5503 };
5504
5505 if (match(Op0, m_Undef())) {
5506 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5507 // a shuffle. This may improve codegen for shuffles that allow
5508 // unspecified inputs.
5510 return nullptr;
5511 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
5512 }
5513
5514 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5515 Type *Ty = C->getType();
5516 auto *VTy = dyn_cast<FixedVectorType>(Ty);
5517 if (!VTy)
5518 return nullptr;
5519 Constant *BestValue;
5521 m_Unless(m_Undef()), m_Constant(BestValue)))))
5522 BestValue = Constant::getNullValue(VTy->getScalarType());
5523 return Constant::replaceUndefsWith(C, BestValue);
5524 };
5525
5526 Constant *C;
5527 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5528 !C->containsConstantExpression()) {
5529 if (Constant *Repl = getFreezeVectorReplacement(C))
5530 return replaceInstUsesWith(I, Repl);
5531 }
5532
5533 // Replace uses of Op with freeze(Op).
5534 if (freezeOtherUses(I))
5535 return &I;
5536
5537 return nullptr;
5538}
5539
5540/// Check for case where the call writes to an otherwise dead alloca. This
5541/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5542/// helper *only* analyzes the write; doesn't check any other legality aspect.
5544 auto *CB = dyn_cast<CallBase>(I);
5545 if (!CB)
5546 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5547 // to allow reload along used path as described below. Otherwise, this
5548 // is simply a store to a dead allocation which will be removed.
5549 return false;
5550 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
5551 if (!Dest)
5552 return false;
5553 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
5554 if (!AI)
5555 // TODO: allow malloc?
5556 return false;
5557 // TODO: allow memory access dominated by move point? Note that since AI
5558 // could have a reference to itself captured by the call, we would need to
5559 // account for cycles in doing so.
5560 SmallVector<const User *> AllocaUsers;
5562 auto pushUsers = [&](const Instruction &I) {
5563 for (const User *U : I.users()) {
5564 if (Visited.insert(U).second)
5565 AllocaUsers.push_back(U);
5566 }
5567 };
5568 pushUsers(*AI);
5569 while (!AllocaUsers.empty()) {
5570 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
5571 if (isa<GetElementPtrInst>(UserI) || isa<AddrSpaceCastInst>(UserI)) {
5572 pushUsers(*UserI);
5573 continue;
5574 }
5575 if (UserI == CB)
5576 continue;
5577 // TODO: support lifetime.start/end here
5578 return false;
5579 }
5580 return true;
5581}
5582
5583/// Try to move the specified instruction from its current block into the
5584/// beginning of DestBlock, which can only happen if it's safe to move the
5585/// instruction past all of the instructions between it and the end of its
5586/// block.
5588 BasicBlock *DestBlock) {
5589 BasicBlock *SrcBlock = I->getParent();
5590
5591 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5592 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5593 I->isTerminator())
5594 return false;
5595
5596 // Do not sink static or dynamic alloca instructions. Static allocas must
5597 // remain in the entry block, and dynamic allocas must not be sunk in between
5598 // a stacksave / stackrestore pair, which would incorrectly shorten its
5599 // lifetime.
5600 if (isa<AllocaInst>(I))
5601 return false;
5602
5603 // Do not sink into catchswitch blocks.
5604 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
5605 return false;
5606
5607 // Do not sink convergent call instructions.
5608 if (auto *CI = dyn_cast<CallInst>(I)) {
5609 if (CI->isConvergent())
5610 return false;
5611 }
5612
5613 // Unless we can prove that the memory write isn't visibile except on the
5614 // path we're sinking to, we must bail.
5615 if (I->mayWriteToMemory()) {
5616 if (!SoleWriteToDeadLocal(I, TLI))
5617 return false;
5618 }
5619
5620 // We can only sink load instructions if there is nothing between the load and
5621 // the end of block that could change the value.
5622 if (I->mayReadFromMemory() &&
5623 !I->hasMetadata(LLVMContext::MD_invariant_load)) {
5624 // We don't want to do any sophisticated alias analysis, so we only check
5625 // the instructions after I in I's parent block if we try to sink to its
5626 // successor block.
5627 if (DestBlock->getUniquePredecessor() != I->getParent())
5628 return false;
5629 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
5630 E = I->getParent()->end();
5631 Scan != E; ++Scan)
5632 if (Scan->mayWriteToMemory())
5633 return false;
5634 }
5635
5636 I->dropDroppableUses([&](const Use *U) {
5637 auto *I = dyn_cast<Instruction>(U->getUser());
5638 if (I && I->getParent() != DestBlock) {
5639 Worklist.add(I);
5640 return true;
5641 }
5642 return false;
5643 });
5644 /// FIXME: We could remove droppable uses that are not dominated by
5645 /// the new position.
5646
5647 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5648 I->moveBefore(*DestBlock, InsertPos);
5649 ++NumSunkInst;
5650
5651 // Also sink all related debug uses from the source basic block. Otherwise we
5652 // get debug use before the def. Attempt to salvage debug uses first, to
5653 // maximise the range variables have location for. If we cannot salvage, then
5654 // mark the location undef: we know it was supposed to receive a new location
5655 // here, but that computation has been sunk.
5656 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5657 findDbgUsers(I, DbgVariableRecords);
5658 if (!DbgVariableRecords.empty())
5659 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5660 DbgVariableRecords);
5661
5662 // PS: there are numerous flaws with this behaviour, not least that right now
5663 // assignments can be re-ordered past other assignments to the same variable
5664 // if they use different Values. Creating more undef assignements can never be
5665 // undone. And salvaging all users outside of this block can un-necessarily
5666 // alter the lifetime of the live-value that the variable refers to.
5667 // Some of these things can be resolved by tolerating debug use-before-defs in
5668 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5669 // being used for more architectures.
5670
5671 return true;
5672}
5673
5675 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5676 BasicBlock *DestBlock,
5677 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5678 // For all debug values in the destination block, the sunk instruction
5679 // will still be available, so they do not need to be dropped.
5680
5681 // Fetch all DbgVariableRecords not already in the destination.
5682 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5683 for (auto &DVR : DbgVariableRecords)
5684 if (DVR->getParent() != DestBlock)
5685 DbgVariableRecordsToSalvage.push_back(DVR);
5686
5687 // Fetch a second collection, of DbgVariableRecords in the source block that
5688 // we're going to sink.
5689 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5690 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5691 if (DVR->getParent() == SrcBlock)
5692 DbgVariableRecordsToSink.push_back(DVR);
5693
5694 // Sort DbgVariableRecords according to their position in the block. This is a
5695 // partial order: DbgVariableRecords attached to different instructions will
5696 // be ordered by the instruction order, but DbgVariableRecords attached to the
5697 // same instruction won't have an order.
5698 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5699 return B->getInstruction()->comesBefore(A->getInstruction());
5700 };
5701 llvm::stable_sort(DbgVariableRecordsToSink, Order);
5702
5703 // If there are two assignments to the same variable attached to the same
5704 // instruction, the ordering between the two assignments is important. Scan
5705 // for this (rare) case and establish which is the last assignment.
5706 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5708 if (DbgVariableRecordsToSink.size() > 1) {
5710 // Count how many assignments to each variable there is per instruction.
5711 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5712 DebugVariable DbgUserVariable =
5713 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5714 DVR->getDebugLoc()->getInlinedAt());
5715 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
5716 }
5717
5718 // If there are any instructions with two assignments, add them to the
5719 // FilterOutMap to record that they need extra filtering.
5721 for (auto It : CountMap) {
5722 if (It.second > 1) {
5723 FilterOutMap[It.first] = nullptr;
5724 DupSet.insert(It.first.first);
5725 }
5726 }
5727
5728 // For all instruction/variable pairs needing extra filtering, find the
5729 // latest assignment.
5730 for (const Instruction *Inst : DupSet) {
5731 for (DbgVariableRecord &DVR :
5732 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
5733 DebugVariable DbgUserVariable =
5734 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5735 DVR.getDebugLoc()->getInlinedAt());
5736 auto FilterIt =
5737 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
5738 if (FilterIt == FilterOutMap.end())
5739 continue;
5740 if (FilterIt->second != nullptr)
5741 continue;
5742 FilterIt->second = &DVR;
5743 }
5744 }
5745 }
5746
5747 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5748 // out any duplicate assignments identified above.
5750 SmallSet<DebugVariable, 4> SunkVariables;
5751 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5753 continue;
5754
5755 DebugVariable DbgUserVariable =
5756 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5757 DVR->getDebugLoc()->getInlinedAt());
5758
5759 // For any variable where there were multiple assignments in the same place,
5760 // ignore all but the last assignment.
5761 if (!FilterOutMap.empty()) {
5762 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
5763 auto It = FilterOutMap.find(IVP);
5764
5765 // Filter out.
5766 if (It != FilterOutMap.end() && It->second != DVR)
5767 continue;
5768 }
5769
5770 if (!SunkVariables.insert(DbgUserVariable).second)
5771 continue;
5772
5773 if (DVR->isDbgAssign())
5774 continue;
5775
5776 DVRClones.emplace_back(DVR->clone());
5777 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5778 }
5779
5780 // Perform salvaging without the clones, then sink the clones.
5781 if (DVRClones.empty())
5782 return;
5783
5784 salvageDebugInfoForDbgValues(*I, DbgVariableRecordsToSalvage);
5785
5786 // The clones are in reverse order of original appearance. Assert that the
5787 // head bit is set on the iterator as we _should_ have received it via
5788 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5789 // we'll repeatedly insert at the head, such as:
5790 // DVR-3 (third insertion goes here)
5791 // DVR-2 (second insertion goes here)
5792 // DVR-1 (first insertion goes here)
5793 // Any-Prior-DVRs
5794 // InsertPtInst
5795 assert(InsertPos.getHeadBit());
5796 for (DbgVariableRecord *DVRClone : DVRClones) {
5797 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
5798 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5799 }
5800}
5801
5803 while (!Worklist.isEmpty()) {
5804 // Walk deferred instructions in reverse order, and push them to the
5805 // worklist, which means they'll end up popped from the worklist in-order.
5806 while (Instruction *I = Worklist.popDeferred()) {
5807 // Check to see if we can DCE the instruction. We do this already here to
5808 // reduce the number of uses and thus allow other folds to trigger.
5809 // Note that eraseInstFromFunction() may push additional instructions on
5810 // the deferred worklist, so this will DCE whole instruction chains.
5813 ++NumDeadInst;
5814 continue;
5815 }
5816
5817 Worklist.push(I);
5818 }
5819
5820 Instruction *I = Worklist.removeOne();
5821 if (I == nullptr) continue; // skip null values.
5822
5823 // Check to see if we can DCE the instruction.
5826 ++NumDeadInst;
5827 continue;
5828 }
5829
5830 if (!DebugCounter::shouldExecute(VisitCounter))
5831 continue;
5832
5833 // See if we can trivially sink this instruction to its user if we can
5834 // prove that the successor is not executed more frequently than our block.
5835 // Return the UserBlock if successful.
5836 auto getOptionalSinkBlockForInst =
5837 [this](Instruction *I) -> std::optional<BasicBlock *> {
5838 if (!EnableCodeSinking)
5839 return std::nullopt;
5840
5841 BasicBlock *BB = I->getParent();
5842 BasicBlock *UserParent = nullptr;
5843 unsigned NumUsers = 0;
5844
5845 for (Use &U : I->uses()) {
5846 User *User = U.getUser();
5847 if (User->isDroppable()) {
5848 // Do not sink if there are dereferenceable assumes that would be
5849 // removed.
5851 if (II->getIntrinsicID() != Intrinsic::assume ||
5852 !II->getOperandBundle("dereferenceable"))
5853 continue;
5854 }
5855
5856 if (NumUsers > MaxSinkNumUsers)
5857 return std::nullopt;
5858
5859 Instruction *UserInst = cast<Instruction>(User);
5860 // Special handling for Phi nodes - get the block the use occurs in.
5861 BasicBlock *UserBB = UserInst->getParent();
5862 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5863 UserBB = PN->getIncomingBlock(U);
5864 // Bail out if we have uses in different blocks. We don't do any
5865 // sophisticated analysis (i.e finding NearestCommonDominator of these
5866 // use blocks).
5867 if (UserParent && UserParent != UserBB)
5868 return std::nullopt;
5869 UserParent = UserBB;
5870
5871 // Make sure these checks are done only once, naturally we do the checks
5872 // the first time we get the userparent, this will save compile time.
5873 if (NumUsers == 0) {
5874 // Try sinking to another block. If that block is unreachable, then do
5875 // not bother. SimplifyCFG should handle it.
5876 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5877 return std::nullopt;
5878
5879 auto *Term = UserParent->getTerminator();
5880 // See if the user is one of our successors that has only one
5881 // predecessor, so that we don't have to split the critical edge.
5882 // Another option where we can sink is a block that ends with a
5883 // terminator that does not pass control to other block (such as
5884 // return or unreachable or resume). In this case:
5885 // - I dominates the User (by SSA form);
5886 // - the User will be executed at most once.
5887 // So sinking I down to User is always profitable or neutral.
5888 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5889 return std::nullopt;
5890
5891 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5892 }
5893
5894 NumUsers++;
5895 }
5896
5897 // No user or only has droppable users.
5898 if (!UserParent)
5899 return std::nullopt;
5900
5901 return UserParent;
5902 };
5903
5904 auto OptBB = getOptionalSinkBlockForInst(I);
5905 if (OptBB) {
5906 auto *UserParent = *OptBB;
5907 // Okay, the CFG is simple enough, try to sink this instruction.
5908 if (tryToSinkInstruction(I, UserParent)) {
5909 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5910 MadeIRChange = true;
5911 // We'll add uses of the sunk instruction below, but since
5912 // sinking can expose opportunities for it's *operands* add
5913 // them to the worklist
5914 for (Use &U : I->operands())
5915 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5916 Worklist.push(OpI);
5917 }
5918 }
5919
5920 // Now that we have an instruction, try combining it to simplify it.
5921 Builder.SetInsertPoint(I);
5922 Builder.SetCurrentDebugLocation(I->getDebugLoc());
5923 // Used by our IRBuilder inserter to copy annotation metadata.
5925
5926#ifndef NDEBUG
5927 std::string OrigI;
5928#endif
5929 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5930 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5931
5932 if (Instruction *Result = visit(*I)) {
5933 ++NumCombined;
5934 // Should we replace the old instruction with a new one?
5935 if (Result != I) {
5936 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5937 << " New = " << *Result << '\n');
5938
5939 // We copy the old instruction's DebugLoc to the new instruction, unless
5940 // InstCombine already assigned a DebugLoc to it, in which case we
5941 // should trust the more specifically selected DebugLoc.
5942 Result->setDebugLoc(Result->getDebugLoc().orElse(I->getDebugLoc()));
5943 // We also copy annotation metadata to the new instruction.
5944 Result->copyMetadata(*I, LLVMContext::MD_annotation);
5945 // Everything uses the new instruction now.
5946 I->replaceAllUsesWith(Result);
5947
5948 // Move the name to the new instruction first.
5949 Result->takeName(I);
5950
5951 // Insert the new instruction into the basic block...
5952 BasicBlock *InstParent = I->getParent();
5953 BasicBlock::iterator InsertPos = I->getIterator();
5954
5955 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5956 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5957 // We need to fix up the insertion point.
5958 if (isa<PHINode>(I)) // PHI -> Non-PHI
5959 InsertPos = InstParent->getFirstInsertionPt();
5960 else // Non-PHI -> PHI
5961 InsertPos = InstParent->getFirstNonPHIIt();
5962 }
5963
5964 Result->insertInto(InstParent, InsertPos);
5965
5966 // Register newly created assumptions.
5967 if (auto *Assume = dyn_cast<AssumeInst>(Result))
5968 AC.registerAssumption(Assume);
5969
5970 // Push the new instruction and any users onto the worklist.
5971 Worklist.pushUsersToWorkList(*Result);
5972 Worklist.push(Result);
5973
5975 } else {
5976 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5977 << " New = " << *I << '\n');
5978
5979 // If the instruction was modified, it's possible that it is now dead.
5980 // if so, remove it.
5983 } else {
5984 Worklist.pushUsersToWorkList(*I);
5985 Worklist.push(I);
5986 }
5987 }
5988 MadeIRChange = true;
5989 }
5990 }
5991
5992 Worklist.zap();
5993 return MadeIRChange;
5994}
5995
5996// Track the scopes used by !alias.scope and !noalias. In a function, a
5997// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5998// by both sets. If not, the declaration of the scope can be safely omitted.
5999// The MDNode of the scope can be omitted as well for the instructions that are
6000// part of this function. We do not do that at this point, as this might become
6001// too time consuming to do.
6003 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
6004 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
6005
6006public:
6008 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
6009 if (!I->hasMetadataOtherThanDebugLoc())
6010 return;
6011
6012 auto Track = [](Metadata *ScopeList, auto &Container) {
6013 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
6014 if (!MDScopeList || !Container.insert(MDScopeList).second)
6015 return;
6016 for (const auto &MDOperand : MDScopeList->operands())
6017 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
6018 Container.insert(MDScope);
6019 };
6020
6021 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
6022 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
6023 }
6024
6027 if (!Decl)
6028 return false;
6029
6030 assert(Decl->use_empty() &&
6031 "llvm.experimental.noalias.scope.decl in use ?");
6032 const MDNode *MDSL = Decl->getScopeList();
6033 assert(MDSL->getNumOperands() == 1 &&
6034 "llvm.experimental.noalias.scope should refer to a single scope");
6035 auto &MDOperand = MDSL->getOperand(0);
6036 if (auto *MD = dyn_cast<MDNode>(MDOperand))
6037 return !UsedAliasScopesAndLists.contains(MD) ||
6038 !UsedNoAliasScopesAndLists.contains(MD);
6039
6040 // Not an MDNode ? throw away.
6041 return true;
6042 }
6043};
6044
6045/// Populate the IC worklist from a function, by walking it in reverse
6046/// post-order and adding all reachable code to the worklist.
6047///
6048/// This has a couple of tricks to make the code faster and more powerful. In
6049/// particular, we constant fold and DCE instructions as we go, to avoid adding
6050/// them to the worklist (this significantly speeds up instcombine on code where
6051/// many instructions are dead or constant). Additionally, if we find a branch
6052/// whose condition is a known constant, we only visit the reachable successors.
6054 bool MadeIRChange = false;
6056 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6057 DenseMap<Constant *, Constant *> FoldedConstants;
6058 AliasScopeTracker SeenAliasScopes;
6059
6060 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6061 for (BasicBlock *Succ : successors(BB))
6062 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
6063 for (PHINode &PN : Succ->phis())
6064 for (Use &U : PN.incoming_values())
6065 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
6066 U.set(PoisonValue::get(PN.getType()));
6067 MadeIRChange = true;
6068 }
6069 };
6070
6071 for (BasicBlock *BB : RPOT) {
6072 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
6073 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
6074 })) {
6075 HandleOnlyLiveSuccessor(BB, nullptr);
6076 continue;
6077 }
6078 LiveBlocks.insert(BB);
6079
6080 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
6081 // ConstantProp instruction if trivially constant.
6082 if (!Inst.use_empty() &&
6083 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
6084 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
6085 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6086 << '\n');
6087 Inst.replaceAllUsesWith(C);
6088 ++NumConstProp;
6089 if (isInstructionTriviallyDead(&Inst, &TLI))
6090 Inst.eraseFromParent();
6091 MadeIRChange = true;
6092 continue;
6093 }
6094
6095 // See if we can constant fold its operands.
6096 for (Use &U : Inst.operands()) {
6098 continue;
6099
6100 auto *C = cast<Constant>(U);
6101 Constant *&FoldRes = FoldedConstants[C];
6102 if (!FoldRes)
6103 FoldRes = ConstantFoldConstant(C, DL, &TLI);
6104
6105 if (FoldRes != C) {
6106 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6107 << "\n Old = " << *C
6108 << "\n New = " << *FoldRes << '\n');
6109 U = FoldRes;
6110 MadeIRChange = true;
6111 }
6112 }
6113
6114 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6115 // these call instructions consumes non-trivial amount of time and
6116 // provides no value for the optimization.
6117 if (!Inst.isDebugOrPseudoInst()) {
6118 InstrsForInstructionWorklist.push_back(&Inst);
6119 SeenAliasScopes.analyse(&Inst);
6120 }
6121 }
6122
6123 // If this is a branch or switch on a constant, mark only the single
6124 // live successor. Otherwise assume all successors are live.
6125 Instruction *TI = BB->getTerminator();
6126 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
6127 if (isa<UndefValue>(BI->getCondition())) {
6128 // Branch on undef is UB.
6129 HandleOnlyLiveSuccessor(BB, nullptr);
6130 continue;
6131 }
6132 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
6133 bool CondVal = Cond->getZExtValue();
6134 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
6135 continue;
6136 }
6137 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
6138 if (isa<UndefValue>(SI->getCondition())) {
6139 // Switch on undef is UB.
6140 HandleOnlyLiveSuccessor(BB, nullptr);
6141 continue;
6142 }
6143 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
6144 HandleOnlyLiveSuccessor(BB,
6145 SI->findCaseValue(Cond)->getCaseSuccessor());
6146 continue;
6147 }
6148 }
6149 }
6150
6151 // Remove instructions inside unreachable blocks. This prevents the
6152 // instcombine code from having to deal with some bad special cases, and
6153 // reduces use counts of instructions.
6154 for (BasicBlock &BB : F) {
6155 if (LiveBlocks.count(&BB))
6156 continue;
6157
6158 unsigned NumDeadInstInBB;
6159 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
6160
6161 MadeIRChange |= NumDeadInstInBB != 0;
6162 NumDeadInst += NumDeadInstInBB;
6163 }
6164
6165 // Once we've found all of the instructions to add to instcombine's worklist,
6166 // add them in reverse order. This way instcombine will visit from the top
6167 // of the function down. This jives well with the way that it adds all uses
6168 // of instructions to the worklist after doing a transformation, thus avoiding
6169 // some N^2 behavior in pathological cases.
6170 Worklist.reserve(InstrsForInstructionWorklist.size());
6171 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
6172 // DCE instruction if trivially dead. As we iterate in reverse program
6173 // order here, we will clean up whole chains of dead instructions.
6174 if (isInstructionTriviallyDead(Inst, &TLI) ||
6175 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6176 ++NumDeadInst;
6177 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6178 salvageDebugInfo(*Inst);
6179 Inst->eraseFromParent();
6180 MadeIRChange = true;
6181 continue;
6182 }
6183
6184 Worklist.push(Inst);
6185 }
6186
6187 return MadeIRChange;
6188}
6189
6191 // Collect backedges.
6192 SmallVector<bool> Visited(F.getMaxBlockNumber());
6193 for (BasicBlock *BB : RPOT) {
6194 Visited[BB->getNumber()] = true;
6195 for (BasicBlock *Succ : successors(BB))
6196 if (Visited[Succ->getNumber()])
6197 BackEdges.insert({BB, Succ});
6198 }
6199 ComputedBackEdges = true;
6200}
6201
6207 const InstCombineOptions &Opts) {
6208 auto &DL = F.getDataLayout();
6209 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6210 !F.hasFnAttribute("instcombine-no-verify-fixpoint");
6211
6213
6214 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6215 // by instcombiner.
6216 bool MadeIRChange = false;
6218 MadeIRChange = LowerDbgDeclare(F);
6219
6220 // Iterate while there is work to do.
6221 unsigned Iteration = 0;
6222 while (true) {
6223 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6224 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6225 << " on " << F.getName()
6226 << " reached; stopping without verifying fixpoint\n");
6227 break;
6228 }
6229
6230 ++Iteration;
6231 ++NumWorklistIterations;
6232 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6233 << F.getName() << "\n");
6234
6235 InstCombinerImpl IC(Worklist, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, PSI,
6236 DL, RPOT);
6238 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6239 MadeChangeInThisIteration |= IC.run();
6240 if (!MadeChangeInThisIteration)
6241 break;
6242
6243 MadeIRChange = true;
6244 if (Iteration > Opts.MaxIterations) {
6246 "Instruction Combining on " + Twine(F.getName()) +
6247 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6248 " iterations. " +
6249 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6250 "'instcombine-no-verify-fixpoint' to suppress this error.");
6251 }
6252 }
6253
6254 if (Iteration == 1)
6255 ++NumOneIteration;
6256 else if (Iteration == 2)
6257 ++NumTwoIterations;
6258 else if (Iteration == 3)
6259 ++NumThreeIterations;
6260 else
6261 ++NumFourOrMoreIterations;
6262
6263 return MadeIRChange;
6264}
6265
6267
6269 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6270 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6271 OS, MapClassName2PassName);
6272 OS << '<';
6273 OS << "max-iterations=" << Options.MaxIterations << ";";
6274 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6275 OS << '>';
6276}
6277
6278char InstCombinePass::ID = 0;
6279
6282 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(F);
6283 // No changes since last InstCombine pass, exit early.
6284 if (LRT.shouldSkip(&ID))
6285 return PreservedAnalyses::all();
6286
6287 auto &AC = AM.getResult<AssumptionAnalysis>(F);
6288 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
6289 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
6291 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
6292
6293 auto *AA = &AM.getResult<AAManager>(F);
6294 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
6295 ProfileSummaryInfo *PSI =
6296 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
6297 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6298 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
6300
6301 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6302 BFI, BPI, PSI, Options)) {
6303 // No changes, all analyses are preserved.
6304 LRT.update(&ID, /*Changed=*/false);
6305 return PreservedAnalyses::all();
6306 }
6307
6308 // Mark all the analyses that instcombine updates as preserved.
6310 LRT.update(&ID, /*Changed=*/true);
6313 return PA;
6314}
6315
6331
6333 if (skipFunction(F))
6334 return false;
6335
6336 // Required analyses.
6337 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6338 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6339 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6341 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6343
6344 // Optional analyses.
6345 ProfileSummaryInfo *PSI =
6347 BlockFrequencyInfo *BFI =
6348 (PSI && PSI->hasProfileSummary()) ?
6350 nullptr;
6351 BranchProbabilityInfo *BPI = nullptr;
6352 if (auto *WrapperPass =
6354 BPI = &WrapperPass->getBPI();
6355
6356 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6357 BFI, BPI, PSI, InstCombineOptions());
6358}
6359
6361
6363
6365 "Combine redundant instructions", false, false)
6376 "Combine redundant instructions", false, false)
6377
6378// Initialization Routines.
6382
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
DXIL Resource Access
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static Constant * constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, Constant *Splat, bool SplatLHS, const DataLayout &DL)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * combineConstantOffsets(GetElementPtrInst &GEP, InstCombinerImpl &IC)
Combine constant offsets separated by variable offsets.
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static Instruction * foldSpliceBinOp(BinaryOperator &Inst, InstCombiner::BuilderTy &Builder)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts)
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP)
Return true if we should canonicalize the gep to an i8 ptradd.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static Value * foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall, SelectInst *SelectInst, InstCombiner::BuilderTy &Builder)
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static std::optional< ModRefInfo > isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< Instruction * > &Users, const TargetLibraryInfo &TLI, bool KnowInit)
static cl::opt< unsigned > MaxAllocSiteRemovableUsers("instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048), cl::desc("Maximum number of users to visit in alloc-site " "removability analysis"))
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static Value * simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Instruction * foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, IRBuilderBase &Builder)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, GEPOperator &GEP2)
Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) transform.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
unsigned OpIndex
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
The Input class is used to parse a yaml document into in-memory structs and vectors.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:229
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1793
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1925
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1963
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1995
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:275
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:218
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
uint64_t getNumElements() const
Type * getElementType() const
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
ConstantArray - Constant Array Declarations.
Definition Constants.h:590
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:951
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNot(Constant *C)
static LLVM_ABI Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition Constants.h:674
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
const Constant * stripPointerCasts() const
Definition Constant.h:233
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool empty() const
Definition DenseMap.h:173
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:306
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:193
const BasicBlock & getEntryBlock() const
Definition Function.h:783
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags all()
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep (gep, p, y), x).
bool hasNoUnsignedWrap() const
bool isInBounds() const
GEPNoWrapFlags intersectForOffsetAdd(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep p, x+y).
static GEPNoWrapFlags none()
GEPNoWrapFlags getNoWrapFlags() const
Definition Operator.h:385
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2081
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition IRBuilder.h:492
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI InstCombinePass(InstCombineOptions Opts={})
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * visitCondBrInst(CondBrInst &BI)
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
Instruction * foldBinOpSelectBinOp(BinaryOperator &Op)
In some cases it is beneficial to fold a select into a binary operator.
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
bool prepareWorklist(Function &F)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * foldBinopWithRecurrence(BinaryOperator &BO)
Try to fold binary operators whose operands are simple interleaved recurrences to a single recurrence...
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, const SimplifyQuery &Q, unsigned Depth=0)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
Instruction * visitUncondBrInst(UncondBrInst &BI)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, poison, ShMask) = C for lanes that select NewC.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
TargetLibraryInfo & TLI
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
static bool isCanonicalPredicate(CmpPredicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Instruction * AnnotationMetadataSource
Source for annotation metadata, used by the IRBuilder inserter.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
BranchProbabilityInfo * BPI
ReversePostOrderTraversal< BasicBlock * > & RPOT
const DataLayout & DL
DomConditionCache DC
const bool MinimizeSize
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
void addToWorklist(Instruction *I)
LLVM_ABI Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
SmallDenseSet< std::pair< const BasicBlock *, const BasicBlock * >, 8 > BackEdges
Backedges, used to avoid pushing instructions across backedges in cases where this may result in infi...
LLVM_ABI std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
LLVM_ABI std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
LLVM_ABI bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
bool isBackEdge(const BasicBlock *From, const BasicBlock *To)
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition InstCombine.h:68
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
bool isIntDivRem() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
static LLVM_ABI LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
LLVM_ABI void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
bool isCatch(unsigned Idx) const
Return 'true' if the clause and index Idx is a catch clause.
bool isFilter(unsigned Idx) const
Return 'true' if the clause and index Idx is a filter clause.
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
A function/module analysis which provides an empty LastRunTrackingInfo.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1426
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1432
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
This is the common base class for memset/memcpy/memmove.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition Metadata.h:64
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1679
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:116
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
const Value * getTrueValue() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
Unconditional Branch instruction.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Use * op_iterator
Definition User.h:254
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:727
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:163
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:147
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool *CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:909
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
OneOps_match< OpTy, Instruction::Freeze > m_Freeze(const OpTy &Op)
Matches FreezeInst.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
auto m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
ContainsMatchingVectorElement_match< SPTy > m_ContainsMatchingVectorElement(const SPTy &SubPattern)
Match a vector constant where at least one of its elements matches the subpattern.
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Splat_match< T > m_ConstantSplat(const T &SubPattern)
Match a constant splat. TODO: Extend this to non-constant splats.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
m_Intrinsic_Ty< Opnd0 >::Ty m_Ctpop(const Opnd0 &Op0)
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_MaxOrMin(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_unless< Ty > m_Unless(const Ty &M)
Match if the inner matcher does NOT match.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition DWP.cpp:573
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
void stable_sort(R &&Range)
Definition STLExtras.h:2116
LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &)
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition Local.cpp:2511
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool succ_empty(const Instruction *I)
Definition CFG.h:141
LLVM_ABI Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
LLVM_ABI FunctionPass * createInstructionCombiningPass()
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1690
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1674
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
LLVM_ABI bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition Local.cpp:2494
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI bool LowerDbgDeclare(Function &F)
Lowers dbg.declare records into appropriate set of dbg.value records.
Definition Local.cpp:1818
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI void ConvertDebugDeclareToDebugValue(DbgVariableRecord *DVR, StoreInst *SI, DIBuilder &Builder)
Inserts a dbg.value record before a store to an alloca'd value that has an associated dbg....
Definition Local.cpp:1662
LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition Local.cpp:2063
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2440
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
TargetTransformInfo TTI
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
LLVM_ABI void findDbgUsers(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the debug info records describing a value.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
SimplifyQuery getWithInstruction(const Instruction *I) const