LLVM 22.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
42#include "llvm/ADT/Statistic.h"
47#include "llvm/Analysis/CFG.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
70#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
93#include "llvm/Support/Debug.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135namespace llvm {
136
137static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
138 cl::desc("Enable code sinking"),
139 cl::init(true));
140
142 "instcombine-max-sink-users", cl::init(32),
143 cl::desc("Maximum number of undroppable users for instruction sinking"));
144
146MaxArraySize("instcombine-maxarray-size", cl::init(1024),
147 cl::desc("Maximum array size considered when doing a combine"));
148
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(true));
160
161} // end namespace llvm
162
163std::optional<Instruction *>
165 // Handle target specific intrinsics
166 if (II.getCalledFunction()->isTargetIntrinsic()) {
167 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(*this, II);
168 }
169 return std::nullopt;
170}
171
173 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
174 bool &KnownBitsComputed) {
175 // Handle target specific intrinsics
176 if (II.getCalledFunction()->isTargetIntrinsic()) {
177 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
178 *this, II, DemandedMask, Known, KnownBitsComputed);
179 }
180 return std::nullopt;
181}
182
184 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
185 APInt &PoisonElts2, APInt &PoisonElts3,
186 std::function<void(Instruction *, unsigned, APInt, APInt &)>
187 SimplifyAndSetOp) {
188 // Handle target specific intrinsics
189 if (II.getCalledFunction()->isTargetIntrinsic()) {
190 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
191 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
192 SimplifyAndSetOp);
193 }
194 return std::nullopt;
195}
196
197bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
198 // Approved exception for TTI use: This queries a legality property of the
199 // target, not an profitability heuristic. Ideally this should be part of
200 // DataLayout instead.
201 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
202}
203
204Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
205 if (!RewriteGEP)
206 return llvm::emitGEPOffset(&Builder, DL, GEP);
207
208 IRBuilderBase::InsertPointGuard Guard(Builder);
209 auto *Inst = dyn_cast<Instruction>(GEP);
210 if (Inst)
211 Builder.SetInsertPoint(Inst);
212
213 Value *Offset = EmitGEPOffset(GEP);
214 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
215 if (Inst && !GEP->hasAllConstantIndices() &&
216 !GEP->getSourceElementType()->isIntegerTy(8)) {
218 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
219 Offset, "", GEP->getNoWrapFlags()));
221 }
222 return Offset;
223}
224
225Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
226 GEPNoWrapFlags NW, Type *IdxTy,
227 bool RewriteGEPs) {
228 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
229 if (Sum)
230 return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
231 NW.isInBounds());
232 else
233 return Offset;
234 };
235
236 Value *Sum = nullptr;
237 Value *OneUseSum = nullptr;
238 Value *OneUseBase = nullptr;
239 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
240 for (GEPOperator *GEP : reverse(GEPs)) {
241 Value *Offset;
242 {
243 // Expand the offset at the point of the previous GEP to enable rewriting.
244 // However, use the original insertion point for calculating Sum.
245 IRBuilderBase::InsertPointGuard Guard(Builder);
246 auto *Inst = dyn_cast<Instruction>(GEP);
247 if (RewriteGEPs && Inst)
248 Builder.SetInsertPoint(Inst);
249
251 if (Offset->getType() != IdxTy)
252 Offset = Builder.CreateVectorSplat(
253 cast<VectorType>(IdxTy)->getElementCount(), Offset);
254 if (GEP->hasOneUse()) {
255 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
256 OneUseSum = Add(OneUseSum, Offset);
257 OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
258 if (!OneUseBase)
259 OneUseBase = GEP->getPointerOperand();
260 continue;
261 }
262
263 if (OneUseSum)
264 Offset = Add(OneUseSum, Offset);
265
266 // Rewrite the GEP to reuse the computed offset. This also includes
267 // offsets from preceding one-use GEPs.
268 if (RewriteGEPs && Inst &&
269 !(GEP->getSourceElementType()->isIntegerTy(8) &&
270 GEP->getOperand(1) == Offset)) {
272 *Inst,
273 Builder.CreatePtrAdd(
274 OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
275 OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
277 }
278 }
279
280 Sum = Add(Sum, Offset);
281 OneUseSum = OneUseBase = nullptr;
282 OneUseFlags = GEPNoWrapFlags::all();
283 }
284 if (OneUseSum)
285 Sum = Add(Sum, OneUseSum);
286 if (!Sum)
287 return Constant::getNullValue(IdxTy);
288 return Sum;
289}
290
291/// Legal integers and common types are considered desirable. This is used to
292/// avoid creating instructions with types that may not be supported well by the
293/// the backend.
294/// NOTE: This treats i8, i16 and i32 specially because they are common
295/// types in frontend languages.
296bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
297 switch (BitWidth) {
298 case 8:
299 case 16:
300 case 32:
301 return true;
302 default:
303 return DL.isLegalInteger(BitWidth);
304 }
305}
306
307/// Return true if it is desirable to convert an integer computation from a
308/// given bit width to a new bit width.
309/// We don't want to convert from a legal or desirable type (like i8) to an
310/// illegal type or from a smaller to a larger illegal type. A width of '1'
311/// is always treated as a desirable type because i1 is a fundamental type in
312/// IR, and there are many specialized optimizations for i1 types.
313/// Common/desirable widths are equally treated as legal to convert to, in
314/// order to open up more combining opportunities.
315bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
316 unsigned ToWidth) const {
317 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
318 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
319
320 // Convert to desirable widths even if they are not legal types.
321 // Only shrink types, to prevent infinite loops.
322 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
323 return true;
324
325 // If this is a legal or desiable integer from type, and the result would be
326 // an illegal type, don't do the transformation.
327 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
328 return false;
329
330 // Otherwise, if both are illegal, do not increase the size of the result. We
331 // do allow things like i160 -> i64, but not i64 -> i160.
332 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
333 return false;
334
335 return true;
336}
337
338/// Return true if it is desirable to convert a computation from 'From' to 'To'.
339/// We don't want to convert from a legal to an illegal type or from a smaller
340/// to a larger illegal type. i1 is always treated as a legal type because it is
341/// a fundamental type in IR, and there are many specialized optimizations for
342/// i1 types.
343bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
344 // TODO: This could be extended to allow vectors. Datalayout changes might be
345 // needed to properly support that.
346 if (!From->isIntegerTy() || !To->isIntegerTy())
347 return false;
348
349 unsigned FromWidth = From->getPrimitiveSizeInBits();
350 unsigned ToWidth = To->getPrimitiveSizeInBits();
351 return shouldChangeType(FromWidth, ToWidth);
352}
353
354// Return true, if No Signed Wrap should be maintained for I.
355// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
356// where both B and C should be ConstantInts, results in a constant that does
357// not overflow. This function only handles the Add/Sub/Mul opcodes. For
358// all other opcodes, the function conservatively returns false.
361 if (!OBO || !OBO->hasNoSignedWrap())
362 return false;
363
364 const APInt *BVal, *CVal;
365 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
366 return false;
367
368 // We reason about Add/Sub/Mul Only.
369 bool Overflow = false;
370 switch (I.getOpcode()) {
371 case Instruction::Add:
372 (void)BVal->sadd_ov(*CVal, Overflow);
373 break;
374 case Instruction::Sub:
375 (void)BVal->ssub_ov(*CVal, Overflow);
376 break;
377 case Instruction::Mul:
378 (void)BVal->smul_ov(*CVal, Overflow);
379 break;
380 default:
381 // Conservatively return false for other opcodes.
382 return false;
383 }
384 return !Overflow;
385}
386
389 return OBO && OBO->hasNoUnsignedWrap();
390}
391
394 return OBO && OBO->hasNoSignedWrap();
395}
396
397/// Conservatively clears subclassOptionalData after a reassociation or
398/// commutation. We preserve fast-math flags when applicable as they can be
399/// preserved.
402 if (!FPMO) {
403 I.clearSubclassOptionalData();
404 return;
405 }
406
407 FastMathFlags FMF = I.getFastMathFlags();
408 I.clearSubclassOptionalData();
409 I.setFastMathFlags(FMF);
410}
411
412/// Combine constant operands of associative operations either before or after a
413/// cast to eliminate one of the associative operations:
414/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
415/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
417 InstCombinerImpl &IC) {
418 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
419 if (!Cast || !Cast->hasOneUse())
420 return false;
421
422 // TODO: Enhance logic for other casts and remove this check.
423 auto CastOpcode = Cast->getOpcode();
424 if (CastOpcode != Instruction::ZExt)
425 return false;
426
427 // TODO: Enhance logic for other BinOps and remove this check.
428 if (!BinOp1->isBitwiseLogicOp())
429 return false;
430
431 auto AssocOpcode = BinOp1->getOpcode();
432 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
433 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
434 return false;
435
436 Constant *C1, *C2;
437 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
438 !match(BinOp2->getOperand(1), m_Constant(C2)))
439 return false;
440
441 // TODO: This assumes a zext cast.
442 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
443 // to the destination type might lose bits.
444
445 // Fold the constants together in the destination type:
446 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
447 const DataLayout &DL = IC.getDataLayout();
448 Type *DestTy = C1->getType();
449 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
450 if (!CastC2)
451 return false;
452 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
453 if (!FoldedC)
454 return false;
455
456 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
457 IC.replaceOperand(*BinOp1, 1, FoldedC);
459 Cast->dropPoisonGeneratingFlags();
460 return true;
461}
462
463// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
464// inttoptr ( ptrtoint (x) ) --> x
465Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
466 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
467 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
468 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
469 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
470 Type *CastTy = IntToPtr->getDestTy();
471 if (PtrToInt &&
472 CastTy->getPointerAddressSpace() ==
473 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
474 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
475 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
476 return PtrToInt->getOperand(0);
477 }
478 return nullptr;
479}
480
481/// This performs a few simplifications for operators that are associative or
482/// commutative:
483///
484/// Commutative operators:
485///
486/// 1. Order operands such that they are listed from right (least complex) to
487/// left (most complex). This puts constants before unary operators before
488/// binary operators.
489///
490/// Associative operators:
491///
492/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
493/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
494///
495/// Associative and commutative operators:
496///
497/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
498/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
499/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
500/// if C1 and C2 are constants.
502 Instruction::BinaryOps Opcode = I.getOpcode();
503 bool Changed = false;
504
505 do {
506 // Order operands such that they are listed from right (least complex) to
507 // left (most complex). This puts constants before unary operators before
508 // binary operators.
509 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
510 getComplexity(I.getOperand(1)))
511 Changed = !I.swapOperands();
512
513 if (I.isCommutative()) {
514 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
515 replaceOperand(I, 0, Pair->first);
516 replaceOperand(I, 1, Pair->second);
517 Changed = true;
518 }
519 }
520
521 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
522 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
523
524 if (I.isAssociative()) {
525 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
526 if (Op0 && Op0->getOpcode() == Opcode) {
527 Value *A = Op0->getOperand(0);
528 Value *B = Op0->getOperand(1);
529 Value *C = I.getOperand(1);
530
531 // Does "B op C" simplify?
532 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
533 // It simplifies to V. Form "A op V".
534 replaceOperand(I, 0, A);
535 replaceOperand(I, 1, V);
536 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
537 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
538
539 // Conservatively clear all optional flags since they may not be
540 // preserved by the reassociation. Reset nsw/nuw based on the above
541 // analysis.
543
544 // Note: this is only valid because SimplifyBinOp doesn't look at
545 // the operands to Op0.
546 if (IsNUW)
547 I.setHasNoUnsignedWrap(true);
548
549 if (IsNSW)
550 I.setHasNoSignedWrap(true);
551
552 Changed = true;
553 ++NumReassoc;
554 continue;
555 }
556 }
557
558 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
559 if (Op1 && Op1->getOpcode() == Opcode) {
560 Value *A = I.getOperand(0);
561 Value *B = Op1->getOperand(0);
562 Value *C = Op1->getOperand(1);
563
564 // Does "A op B" simplify?
565 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
566 // It simplifies to V. Form "V op C".
567 replaceOperand(I, 0, V);
568 replaceOperand(I, 1, C);
569 // Conservatively clear the optional flags, since they may not be
570 // preserved by the reassociation.
572 Changed = true;
573 ++NumReassoc;
574 continue;
575 }
576 }
577 }
578
579 if (I.isAssociative() && I.isCommutative()) {
580 if (simplifyAssocCastAssoc(&I, *this)) {
581 Changed = true;
582 ++NumReassoc;
583 continue;
584 }
585
586 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
587 if (Op0 && Op0->getOpcode() == Opcode) {
588 Value *A = Op0->getOperand(0);
589 Value *B = Op0->getOperand(1);
590 Value *C = I.getOperand(1);
591
592 // Does "C op A" simplify?
593 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
594 // It simplifies to V. Form "V op B".
595 replaceOperand(I, 0, V);
596 replaceOperand(I, 1, B);
597 // Conservatively clear the optional flags, since they may not be
598 // preserved by the reassociation.
600 Changed = true;
601 ++NumReassoc;
602 continue;
603 }
604 }
605
606 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
607 if (Op1 && Op1->getOpcode() == Opcode) {
608 Value *A = I.getOperand(0);
609 Value *B = Op1->getOperand(0);
610 Value *C = Op1->getOperand(1);
611
612 // Does "C op A" simplify?
613 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
614 // It simplifies to V. Form "B op V".
615 replaceOperand(I, 0, B);
616 replaceOperand(I, 1, V);
617 // Conservatively clear the optional flags, since they may not be
618 // preserved by the reassociation.
620 Changed = true;
621 ++NumReassoc;
622 continue;
623 }
624 }
625
626 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
627 // if C1 and C2 are constants.
628 Value *A, *B;
629 Constant *C1, *C2, *CRes;
630 if (Op0 && Op1 &&
631 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
632 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
633 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
634 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
635 bool IsNUW = hasNoUnsignedWrap(I) &&
636 hasNoUnsignedWrap(*Op0) &&
637 hasNoUnsignedWrap(*Op1);
638 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
639 BinaryOperator::CreateNUW(Opcode, A, B) :
640 BinaryOperator::Create(Opcode, A, B);
641
642 if (isa<FPMathOperator>(NewBO)) {
643 FastMathFlags Flags = I.getFastMathFlags() &
644 Op0->getFastMathFlags() &
645 Op1->getFastMathFlags();
646 NewBO->setFastMathFlags(Flags);
647 }
648 InsertNewInstWith(NewBO, I.getIterator());
649 NewBO->takeName(Op1);
650 replaceOperand(I, 0, NewBO);
651 replaceOperand(I, 1, CRes);
652 // Conservatively clear the optional flags, since they may not be
653 // preserved by the reassociation.
655 if (IsNUW)
656 I.setHasNoUnsignedWrap(true);
657
658 Changed = true;
659 continue;
660 }
661 }
662
663 // No further simplifications.
664 return Changed;
665 } while (true);
666}
667
668/// Return whether "X LOp (Y ROp Z)" is always equal to
669/// "(X LOp Y) ROp (X LOp Z)".
672 // X & (Y | Z) <--> (X & Y) | (X & Z)
673 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
674 if (LOp == Instruction::And)
675 return ROp == Instruction::Or || ROp == Instruction::Xor;
676
677 // X | (Y & Z) <--> (X | Y) & (X | Z)
678 if (LOp == Instruction::Or)
679 return ROp == Instruction::And;
680
681 // X * (Y + Z) <--> (X * Y) + (X * Z)
682 // X * (Y - Z) <--> (X * Y) - (X * Z)
683 if (LOp == Instruction::Mul)
684 return ROp == Instruction::Add || ROp == Instruction::Sub;
685
686 return false;
687}
688
689/// Return whether "(X LOp Y) ROp Z" is always equal to
690/// "(X ROp Z) LOp (Y ROp Z)".
694 return leftDistributesOverRight(ROp, LOp);
695
696 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
698
699 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
700 // but this requires knowing that the addition does not overflow and other
701 // such subtleties.
702}
703
704/// This function returns identity value for given opcode, which can be used to
705/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
707 if (isa<Constant>(V))
708 return nullptr;
709
710 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
711}
712
713/// This function predicates factorization using distributive laws. By default,
714/// it just returns the 'Op' inputs. But for special-cases like
715/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
716/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
717/// allow more factorization opportunities.
720 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
721 assert(Op && "Expected a binary operator");
722 LHS = Op->getOperand(0);
723 RHS = Op->getOperand(1);
724 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
725 Constant *C;
726 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
727 // X << C --> X * (1 << C)
729 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
730 assert(RHS && "Constant folding of immediate constants failed");
731 return Instruction::Mul;
732 }
733 // TODO: We can add other conversions e.g. shr => div etc.
734 }
735 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
736 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
738 // lshr nneg C, X --> ashr nneg C, X
739 return Instruction::AShr;
740 }
741 }
742 return Op->getOpcode();
743}
744
745/// This tries to simplify binary operations by factorizing out common terms
746/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
749 Instruction::BinaryOps InnerOpcode, Value *A,
750 Value *B, Value *C, Value *D) {
751 assert(A && B && C && D && "All values must be provided");
752
753 Value *V = nullptr;
754 Value *RetVal = nullptr;
755 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
756 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
757
758 // Does "X op' Y" always equal "Y op' X"?
759 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
760
761 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
762 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
763 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
764 // commutative case, "(A op' B) op (C op' A)"?
765 if (A == C || (InnerCommutative && A == D)) {
766 if (A != C)
767 std::swap(C, D);
768 // Consider forming "A op' (B op D)".
769 // If "B op D" simplifies then it can be formed with no cost.
770 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
771
772 // If "B op D" doesn't simplify then only go on if one of the existing
773 // operations "A op' B" and "C op' D" will be zapped as no longer used.
774 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
775 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
776 if (V)
777 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
778 }
779 }
780
781 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
782 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
783 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
784 // commutative case, "(A op' B) op (B op' D)"?
785 if (B == D || (InnerCommutative && B == C)) {
786 if (B != D)
787 std::swap(C, D);
788 // Consider forming "(A op C) op' B".
789 // If "A op C" simplifies then it can be formed with no cost.
790 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
791
792 // If "A op C" doesn't simplify then only go on if one of the existing
793 // operations "A op' B" and "C op' D" will be zapped as no longer used.
794 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
795 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
796 if (V)
797 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
798 }
799 }
800
801 if (!RetVal)
802 return nullptr;
803
804 ++NumFactor;
805 RetVal->takeName(&I);
806
807 // Try to add no-overflow flags to the final value.
808 if (isa<BinaryOperator>(RetVal)) {
809 bool HasNSW = false;
810 bool HasNUW = false;
812 HasNSW = I.hasNoSignedWrap();
813 HasNUW = I.hasNoUnsignedWrap();
814 }
815 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
816 HasNSW &= LOBO->hasNoSignedWrap();
817 HasNUW &= LOBO->hasNoUnsignedWrap();
818 }
819
820 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
821 HasNSW &= ROBO->hasNoSignedWrap();
822 HasNUW &= ROBO->hasNoUnsignedWrap();
823 }
824
825 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
826 // We can propagate 'nsw' if we know that
827 // %Y = mul nsw i16 %X, C
828 // %Z = add nsw i16 %Y, %X
829 // =>
830 // %Z = mul nsw i16 %X, C+1
831 //
832 // iff C+1 isn't INT_MIN
833 const APInt *CInt;
834 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
835 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
836
837 // nuw can be propagated with any constant or nuw value.
838 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
839 }
840 }
841 return RetVal;
842}
843
844// If `I` has one Const operand and the other matches `(ctpop (not x))`,
845// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
846// This is only useful is the new subtract can fold so we only handle the
847// following cases:
848// 1) (add/sub/disjoint_or C, (ctpop (not x))
849// -> (add/sub/disjoint_or C', (ctpop x))
850// 1) (cmp pred C, (ctpop (not x))
851// -> (cmp pred C', (ctpop x))
853 unsigned Opc = I->getOpcode();
854 unsigned ConstIdx = 1;
855 switch (Opc) {
856 default:
857 return nullptr;
858 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
859 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
860 // is constant.
861 case Instruction::Sub:
862 ConstIdx = 0;
863 break;
864 case Instruction::ICmp:
865 // Signed predicates aren't correct in some edge cases like for i2 types, as
866 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
867 // comparisons against it are simplfied to unsigned.
868 if (cast<ICmpInst>(I)->isSigned())
869 return nullptr;
870 break;
871 case Instruction::Or:
872 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
873 return nullptr;
874 [[fallthrough]];
875 case Instruction::Add:
876 break;
877 }
878
879 Value *Op;
880 // Find ctpop.
881 if (!match(I->getOperand(1 - ConstIdx),
883 return nullptr;
884
885 Constant *C;
886 // Check other operand is ImmConstant.
887 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
888 return nullptr;
889
890 Type *Ty = Op->getType();
891 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
892 // Need extra check for icmp. Note if this check is true, it generally means
893 // the icmp will simplify to true/false.
894 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
895 Constant *Cmp =
897 if (!Cmp || !Cmp->isZeroValue())
898 return nullptr;
899 }
900
901 // Check we can invert `(not x)` for free.
902 bool Consumes = false;
903 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
904 return nullptr;
905 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
906 assert(NotOp != nullptr &&
907 "Desync between isFreeToInvert and getFreelyInverted");
908
909 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
910
911 Value *R = nullptr;
912
913 // Do the transformation here to avoid potentially introducing an infinite
914 // loop.
915 switch (Opc) {
916 case Instruction::Sub:
917 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
918 break;
919 case Instruction::Or:
920 case Instruction::Add:
921 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
922 break;
923 case Instruction::ICmp:
924 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
925 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
926 break;
927 default:
928 llvm_unreachable("Unhandled Opcode");
929 }
930 assert(R != nullptr);
931 return replaceInstUsesWith(*I, R);
932}
933
934// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
935// IFF
936// 1) the logic_shifts match
937// 2) either both binops are binops and one is `and` or
938// BinOp1 is `and`
939// (logic_shift (inv_logic_shift C1, C), C) == C1 or
940//
941// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
942//
943// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
944// IFF
945// 1) the logic_shifts match
946// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
947//
948// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
949//
950// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
951// IFF
952// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
953// 2) Binop2 is `not`
954//
955// -> (arithmetic_shift Binop1((not X), Y), Amt)
956
958 const DataLayout &DL = I.getDataLayout();
959 auto IsValidBinOpc = [](unsigned Opc) {
960 switch (Opc) {
961 default:
962 return false;
963 case Instruction::And:
964 case Instruction::Or:
965 case Instruction::Xor:
966 case Instruction::Add:
967 // Skip Sub as we only match constant masks which will canonicalize to use
968 // add.
969 return true;
970 }
971 };
972
973 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
974 // constraints.
975 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
976 unsigned ShOpc) {
977 assert(ShOpc != Instruction::AShr);
978 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
979 ShOpc == Instruction::Shl;
980 };
981
982 auto GetInvShift = [](unsigned ShOpc) {
983 assert(ShOpc != Instruction::AShr);
984 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
985 };
986
987 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
988 unsigned ShOpc, Constant *CMask,
989 Constant *CShift) {
990 // If the BinOp1 is `and` we don't need to check the mask.
991 if (BinOpc1 == Instruction::And)
992 return true;
993
994 // For all other possible transfers we need complete distributable
995 // binop/shift (anything but `add` + `lshr`).
996 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
997 return false;
998
999 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
1000 // vecs, otherwise the mask will be simplified and the following check will
1001 // handle it).
1002 if (BinOpc2 == Instruction::And)
1003 return true;
1004
1005 // Otherwise, need mask that meets the below requirement.
1006 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1007 Constant *MaskInvShift =
1008 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1009 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
1010 CMask;
1011 };
1012
1013 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1014 Constant *CMask, *CShift;
1015 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1016 if (!match(I.getOperand(ShOpnum),
1017 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
1018 return nullptr;
1019 if (!match(I.getOperand(1 - ShOpnum),
1021 m_OneUse(m_Shift(m_Value(X), m_Specific(Shift))),
1022 m_Value(ShiftedX)),
1023 m_Value(Mask))))
1024 return nullptr;
1025 // Make sure we are matching instruction shifts and not ConstantExpr
1026 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
1027 auto *IX = dyn_cast<Instruction>(ShiftedX);
1028 if (!IY || !IX)
1029 return nullptr;
1030
1031 // LHS and RHS need same shift opcode
1032 unsigned ShOpc = IY->getOpcode();
1033 if (ShOpc != IX->getOpcode())
1034 return nullptr;
1035
1036 // Make sure binop is real instruction and not ConstantExpr
1037 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
1038 if (!BO2)
1039 return nullptr;
1040
1041 unsigned BinOpc = BO2->getOpcode();
1042 // Make sure we have valid binops.
1043 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1044 return nullptr;
1045
1046 if (ShOpc == Instruction::AShr) {
1047 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
1048 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
1049 Value *NotX = Builder.CreateNot(X);
1050 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
1052 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
1053 }
1054
1055 return nullptr;
1056 }
1057
1058 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1059 // distribute to drop the shift irrelevant of constants.
1060 if (BinOpc == I.getOpcode() &&
1061 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1062 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
1063 Value *NewBinOp1 = Builder.CreateBinOp(
1064 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
1065 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
1066 }
1067
1068 // Otherwise we can only distribute by constant shifting the mask, so
1069 // ensure we have constants.
1070 if (!match(Shift, m_ImmConstant(CShift)))
1071 return nullptr;
1072 if (!match(Mask, m_ImmConstant(CMask)))
1073 return nullptr;
1074
1075 // Check if we can distribute the binops.
1076 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1077 return nullptr;
1078
1079 Constant *NewCMask =
1080 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1081 Value *NewBinOp2 = Builder.CreateBinOp(
1082 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1083 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1084 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1085 NewBinOp1, CShift);
1086 };
1087
1088 if (Instruction *R = MatchBinOp(0))
1089 return R;
1090 return MatchBinOp(1);
1091}
1092
1093// (Binop (zext C), (select C, T, F))
1094// -> (select C, (binop 1, T), (binop 0, F))
1095//
1096// (Binop (sext C), (select C, T, F))
1097// -> (select C, (binop -1, T), (binop 0, F))
1098//
1099// Attempt to simplify binary operations into a select with folded args, when
1100// one operand of the binop is a select instruction and the other operand is a
1101// zext/sext extension, whose value is the select condition.
1104 // TODO: this simplification may be extended to any speculatable instruction,
1105 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1106 Instruction::BinaryOps Opc = I.getOpcode();
1107 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1108 Value *A, *CondVal, *TrueVal, *FalseVal;
1109 Value *CastOp;
1110
1111 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1112 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1113 A->getType()->getScalarSizeInBits() == 1 &&
1114 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1115 m_Value(FalseVal)));
1116 };
1117
1118 // Make sure one side of the binop is a select instruction, and the other is a
1119 // zero/sign extension operating on a i1.
1120 if (MatchSelectAndCast(LHS, RHS))
1121 CastOp = LHS;
1122 else if (MatchSelectAndCast(RHS, LHS))
1123 CastOp = RHS;
1124 else
1125 return nullptr;
1126
1127 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1128 bool IsCastOpRHS = (CastOp == RHS);
1129 bool IsZExt = isa<ZExtInst>(CastOp);
1130 Constant *C;
1131
1132 if (IsTrueArm) {
1133 C = Constant::getNullValue(V->getType());
1134 } else if (IsZExt) {
1135 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1136 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1137 } else {
1138 C = Constant::getAllOnesValue(V->getType());
1139 }
1140
1141 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1142 : Builder.CreateBinOp(Opc, C, V);
1143 };
1144
1145 // If the value used in the zext/sext is the select condition, or the negated
1146 // of the select condition, the binop can be simplified.
1147 if (CondVal == A) {
1148 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1149 return SelectInst::Create(CondVal, NewTrueVal,
1150 NewFoldedConst(true, FalseVal));
1151 }
1152
1153 if (match(A, m_Not(m_Specific(CondVal)))) {
1154 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1155 return SelectInst::Create(CondVal, NewTrueVal,
1156 NewFoldedConst(false, FalseVal));
1157 }
1158
1159 return nullptr;
1160}
1161
1163 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1166 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1167 Value *A, *B, *C, *D;
1168 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1169
1170 if (Op0)
1171 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1172 if (Op1)
1173 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1174
1175 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1176 // a common term.
1177 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1178 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1179 return V;
1180
1181 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1182 // term.
1183 if (Op0)
1184 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1185 if (Value *V =
1186 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1187 return V;
1188
1189 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1190 // term.
1191 if (Op1)
1192 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1193 if (Value *V =
1194 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1195 return V;
1196
1197 return nullptr;
1198}
1199
1200/// This tries to simplify binary operations which some other binary operation
1201/// distributes over either by factorizing out common terms
1202/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1203/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1204/// Returns the simplified value, or null if it didn't simplify.
1206 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1209 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1210
1211 // Factorization.
1212 if (Value *R = tryFactorizationFolds(I))
1213 return R;
1214
1215 // Expansion.
1216 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1217 // The instruction has the form "(A op' B) op C". See if expanding it out
1218 // to "(A op C) op' (B op C)" results in simplifications.
1219 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1220 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1221
1222 // Disable the use of undef because it's not safe to distribute undef.
1223 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1224 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1225 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1226
1227 // Do "A op C" and "B op C" both simplify?
1228 if (L && R) {
1229 // They do! Return "L op' R".
1230 ++NumExpand;
1231 C = Builder.CreateBinOp(InnerOpcode, L, R);
1232 C->takeName(&I);
1233 return C;
1234 }
1235
1236 // Does "A op C" simplify to the identity value for the inner opcode?
1237 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1238 // They do! Return "B op C".
1239 ++NumExpand;
1240 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1241 C->takeName(&I);
1242 return C;
1243 }
1244
1245 // Does "B op C" simplify to the identity value for the inner opcode?
1246 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1247 // They do! Return "A op C".
1248 ++NumExpand;
1249 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1250 C->takeName(&I);
1251 return C;
1252 }
1253 }
1254
1255 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1256 // The instruction has the form "A op (B op' C)". See if expanding it out
1257 // to "(A op B) op' (A op C)" results in simplifications.
1258 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1259 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1260
1261 // Disable the use of undef because it's not safe to distribute undef.
1262 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1263 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1264 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1265
1266 // Do "A op B" and "A op C" both simplify?
1267 if (L && R) {
1268 // They do! Return "L op' R".
1269 ++NumExpand;
1270 A = Builder.CreateBinOp(InnerOpcode, L, R);
1271 A->takeName(&I);
1272 return A;
1273 }
1274
1275 // Does "A op B" simplify to the identity value for the inner opcode?
1276 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1277 // They do! Return "A op C".
1278 ++NumExpand;
1279 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1280 A->takeName(&I);
1281 return A;
1282 }
1283
1284 // Does "A op C" simplify to the identity value for the inner opcode?
1285 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1286 // They do! Return "A op B".
1287 ++NumExpand;
1288 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1289 A->takeName(&I);
1290 return A;
1291 }
1292 }
1293
1294 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1295}
1296
1297static std::optional<std::pair<Value *, Value *>>
1299 if (LHS->getParent() != RHS->getParent())
1300 return std::nullopt;
1301
1302 if (LHS->getNumIncomingValues() < 2)
1303 return std::nullopt;
1304
1305 if (!equal(LHS->blocks(), RHS->blocks()))
1306 return std::nullopt;
1307
1308 Value *L0 = LHS->getIncomingValue(0);
1309 Value *R0 = RHS->getIncomingValue(0);
1310
1311 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1312 Value *L1 = LHS->getIncomingValue(I);
1313 Value *R1 = RHS->getIncomingValue(I);
1314
1315 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1316 continue;
1317
1318 return std::nullopt;
1319 }
1320
1321 return std::optional(std::pair(L0, R0));
1322}
1323
1324std::optional<std::pair<Value *, Value *>>
1325InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1328 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1329 return std::nullopt;
1330 switch (LHSInst->getOpcode()) {
1331 case Instruction::PHI:
1333 case Instruction::Select: {
1334 Value *Cond = LHSInst->getOperand(0);
1335 Value *TrueVal = LHSInst->getOperand(1);
1336 Value *FalseVal = LHSInst->getOperand(2);
1337 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1338 FalseVal == RHSInst->getOperand(1))
1339 return std::pair(TrueVal, FalseVal);
1340 return std::nullopt;
1341 }
1342 case Instruction::Call: {
1343 // Match min(a, b) and max(a, b)
1344 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1345 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1346 if (LHSMinMax && RHSMinMax &&
1347 LHSMinMax->getPredicate() ==
1349 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1350 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1351 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1353 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1354 return std::nullopt;
1355 }
1356 default:
1357 return std::nullopt;
1358 }
1359}
1360
1362 Value *LHS,
1363 Value *RHS) {
1364 Value *A, *B, *C, *D, *E, *F;
1365 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1366 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1367 if (!LHSIsSelect && !RHSIsSelect)
1368 return nullptr;
1369
1371 ? nullptr
1372 : cast<SelectInst>(LHSIsSelect ? LHS : RHS);
1373
1374 FastMathFlags FMF;
1376 if (isa<FPMathOperator>(&I)) {
1377 FMF = I.getFastMathFlags();
1378 Builder.setFastMathFlags(FMF);
1379 }
1380
1381 Instruction::BinaryOps Opcode = I.getOpcode();
1382 SimplifyQuery Q = SQ.getWithInstruction(&I);
1383
1384 Value *Cond, *True = nullptr, *False = nullptr;
1385
1386 // Special-case for add/negate combination. Replace the zero in the negation
1387 // with the trailing add operand:
1388 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1389 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1390 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1391 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1392 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1393 return nullptr;
1394 Value *N;
1395 if (True && match(FVal, m_Neg(m_Value(N)))) {
1396 Value *Sub = Builder.CreateSub(Z, N);
1397 return Builder.CreateSelect(Cond, True, Sub, I.getName(), SI);
1398 }
1399 if (False && match(TVal, m_Neg(m_Value(N)))) {
1400 Value *Sub = Builder.CreateSub(Z, N);
1401 return Builder.CreateSelect(Cond, Sub, False, I.getName(), SI);
1402 }
1403 return nullptr;
1404 };
1405
1406 if (LHSIsSelect && RHSIsSelect && A == D) {
1407 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1408 Cond = A;
1409 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1410 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1411
1412 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1413 if (False && !True)
1414 True = Builder.CreateBinOp(Opcode, B, E);
1415 else if (True && !False)
1416 False = Builder.CreateBinOp(Opcode, C, F);
1417 }
1418 } else if (LHSIsSelect && LHS->hasOneUse()) {
1419 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1420 Cond = A;
1421 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1422 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(B, C, RHS))
1424 return NewSel;
1425 } else if (RHSIsSelect && RHS->hasOneUse()) {
1426 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1427 Cond = D;
1428 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1429 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1430 if (Value *NewSel = foldAddNegate(E, F, LHS))
1431 return NewSel;
1432 }
1433
1434 if (!True || !False)
1435 return nullptr;
1436
1437 Value *NewSI = Builder.CreateSelect(Cond, True, False, I.getName(), SI);
1438 NewSI->takeName(&I);
1439 return NewSI;
1440}
1441
1442/// Freely adapt every user of V as-if V was changed to !V.
1443/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1445 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1446 for (User *U : make_early_inc_range(I->users())) {
1447 if (U == IgnoredUser)
1448 continue; // Don't consider this user.
1449 switch (cast<Instruction>(U)->getOpcode()) {
1450 case Instruction::Select: {
1451 auto *SI = cast<SelectInst>(U);
1452 SI->swapValues();
1453 SI->swapProfMetadata();
1454 break;
1455 }
1456 case Instruction::Br: {
1458 BI->swapSuccessors(); // swaps prof metadata too
1459 if (BPI)
1460 BPI->swapSuccEdgesProbabilities(BI->getParent());
1461 break;
1462 }
1463 case Instruction::Xor:
1465 // Add to worklist for DCE.
1467 break;
1468 default:
1469 llvm_unreachable("Got unexpected user - out of sync with "
1470 "canFreelyInvertAllUsersOf() ?");
1471 }
1472 }
1473
1474 // Update pre-existing debug value uses.
1475 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1476 llvm::findDbgValues(I, DbgVariableRecords);
1477
1478 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1479 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1480 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1481 Idx != End; ++Idx)
1482 if (DbgVal->getVariableLocationOp(Idx) == I)
1483 DbgVal->setExpression(
1484 DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
1485 }
1486}
1487
1488/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1489/// constant zero (which is the 'negate' form).
1490Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1491 Value *NegV;
1492 if (match(V, m_Neg(m_Value(NegV))))
1493 return NegV;
1494
1495 // Constants can be considered to be negated values if they can be folded.
1497 return ConstantExpr::getNeg(C);
1498
1500 if (C->getType()->getElementType()->isIntegerTy())
1501 return ConstantExpr::getNeg(C);
1502
1504 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1505 Constant *Elt = CV->getAggregateElement(i);
1506 if (!Elt)
1507 return nullptr;
1508
1509 if (isa<UndefValue>(Elt))
1510 continue;
1511
1512 if (!isa<ConstantInt>(Elt))
1513 return nullptr;
1514 }
1515 return ConstantExpr::getNeg(CV);
1516 }
1517
1518 // Negate integer vector splats.
1519 if (auto *CV = dyn_cast<Constant>(V))
1520 if (CV->getType()->isVectorTy() &&
1521 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1522 return ConstantExpr::getNeg(CV);
1523
1524 return nullptr;
1525}
1526
1527// Try to fold:
1528// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1529// -> ({s|u}itofp (int_binop x, y))
1530// 2) (fp_binop ({s|u}itofp x), FpC)
1531// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1532//
1533// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1534Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1535 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1537
1538 Type *FPTy = BO.getType();
1539 Type *IntTy = IntOps[0]->getType();
1540
1541 unsigned IntSz = IntTy->getScalarSizeInBits();
1542 // This is the maximum number of inuse bits by the integer where the int -> fp
1543 // casts are exact.
1544 unsigned MaxRepresentableBits =
1546
1547 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1548 // checks later on.
1549 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1550
1551 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1552 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1553 auto IsNonZero = [&](unsigned OpNo) -> bool {
1554 if (OpsKnown[OpNo].hasKnownBits() &&
1555 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1556 return true;
1557 return isKnownNonZero(IntOps[OpNo], SQ);
1558 };
1559
1560 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1561 // NB: This matches the impl in ValueTracking, we just try to use cached
1562 // knownbits here. If we ever start supporting WithCache for
1563 // `isKnownNonNegative`, change this to an explicit call.
1564 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1565 };
1566
1567 // Check if we know for certain that ({s|u}itofp op) is exact.
1568 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1569 // Can we treat this operand as the desired sign?
1570 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1571 !IsNonNeg(OpNo))
1572 return false;
1573
1574 // If fp precision >= bitwidth(op) then its exact.
1575 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1576 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1577 // handled specially. We can't, however, increase the bound arbitrarily for
1578 // `sitofp` as for larger sizes, it won't sign extend.
1579 if (MaxRepresentableBits < IntSz) {
1580 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1581 // numSignBits(op).
1582 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1583 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1584 if (OpsFromSigned)
1585 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1586 // Finally for unsigned check that fp precision >= bitwidth(op) -
1587 // numLeadingZeros(op).
1588 else {
1589 NumUsedLeadingBits[OpNo] =
1590 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1591 }
1592 }
1593 // NB: We could also check if op is known to be a power of 2 or zero (which
1594 // will always be representable). Its unlikely, however, that is we are
1595 // unable to bound op in any way we will be able to pass the overflow checks
1596 // later on.
1597
1598 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1599 return false;
1600 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1601 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1602 IsNonZero(OpNo);
1603 };
1604
1605 // If we have a constant rhs, see if we can losslessly convert it to an int.
1606 if (Op1FpC != nullptr) {
1607 // Signed + Mul req non-zero
1608 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1609 !match(Op1FpC, m_NonZeroFP()))
1610 return nullptr;
1611
1613 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1614 IntTy, DL);
1615 if (Op1IntC == nullptr)
1616 return nullptr;
1617 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1618 : Instruction::UIToFP,
1619 Op1IntC, FPTy, DL) != Op1FpC)
1620 return nullptr;
1621
1622 // First try to keep sign of cast the same.
1623 IntOps[1] = Op1IntC;
1624 }
1625
1626 // Ensure lhs/rhs integer types match.
1627 if (IntTy != IntOps[1]->getType())
1628 return nullptr;
1629
1630 if (Op1FpC == nullptr) {
1631 if (!IsValidPromotion(1))
1632 return nullptr;
1633 }
1634 if (!IsValidPromotion(0))
1635 return nullptr;
1636
1637 // Final we check if the integer version of the binop will not overflow.
1639 // Because of the precision check, we can often rule out overflows.
1640 bool NeedsOverflowCheck = true;
1641 // Try to conservatively rule out overflow based on the already done precision
1642 // checks.
1643 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1644 unsigned OverflowMaxCurBits =
1645 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1646 bool OutputSigned = OpsFromSigned;
1647 switch (BO.getOpcode()) {
1648 case Instruction::FAdd:
1649 IntOpc = Instruction::Add;
1650 OverflowMaxOutputBits += OverflowMaxCurBits;
1651 break;
1652 case Instruction::FSub:
1653 IntOpc = Instruction::Sub;
1654 OverflowMaxOutputBits += OverflowMaxCurBits;
1655 break;
1656 case Instruction::FMul:
1657 IntOpc = Instruction::Mul;
1658 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1659 break;
1660 default:
1661 llvm_unreachable("Unsupported binop");
1662 }
1663 // The precision check may have already ruled out overflow.
1664 if (OverflowMaxOutputBits < IntSz) {
1665 NeedsOverflowCheck = false;
1666 // We can bound unsigned overflow from sub to in range signed value (this is
1667 // what allows us to avoid the overflow check for sub).
1668 if (IntOpc == Instruction::Sub)
1669 OutputSigned = true;
1670 }
1671
1672 // Precision check did not rule out overflow, so need to check.
1673 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1674 // `IntOps[...]` arguments to `KnownOps[...]`.
1675 if (NeedsOverflowCheck &&
1676 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1677 return nullptr;
1678
1679 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1680 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1681 IntBO->setHasNoSignedWrap(OutputSigned);
1682 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1683 }
1684 if (OutputSigned)
1685 return new SIToFPInst(IntBinOp, FPTy);
1686 return new UIToFPInst(IntBinOp, FPTy);
1687}
1688
1689// Try to fold:
1690// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1691// -> ({s|u}itofp (int_binop x, y))
1692// 2) (fp_binop ({s|u}itofp x), FpC)
1693// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1694Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1695 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1696 Constant *Op1FpC = nullptr;
1697 // Check for:
1698 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1699 // 2) (binop ({s|u}itofp x), FpC)
1700 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1701 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1702 return nullptr;
1703
1704 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1705 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1706 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1707 return nullptr;
1708
1709 // Cache KnownBits a bit to potentially save some analysis.
1710 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1711
1712 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1713 // different constraints depending on the sign of the cast.
1714 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1715 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1716 IntOps, Op1FpC, OpsKnown))
1717 return R;
1718 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1719 Op1FpC, OpsKnown);
1720}
1721
1722/// A binop with a constant operand and a sign-extended boolean operand may be
1723/// converted into a select of constants by applying the binary operation to
1724/// the constant with the two possible values of the extended boolean (0 or -1).
1725Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1726 // TODO: Handle non-commutative binop (constant is operand 0).
1727 // TODO: Handle zext.
1728 // TODO: Peek through 'not' of cast.
1729 Value *BO0 = BO.getOperand(0);
1730 Value *BO1 = BO.getOperand(1);
1731 Value *X;
1732 Constant *C;
1733 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1734 !X->getType()->isIntOrIntVectorTy(1))
1735 return nullptr;
1736
1737 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1740 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1741 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1742 return createSelectInst(X, TVal, FVal);
1743}
1744
1746 bool IsTrueArm) {
1748 for (Value *Op : I.operands()) {
1749 Value *V = nullptr;
1750 if (Op == SI) {
1751 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1752 } else if (match(SI->getCondition(),
1755 m_Specific(Op), m_Value(V))) &&
1757 // Pass
1758 } else {
1759 V = Op;
1760 }
1761 Ops.push_back(V);
1762 }
1763
1764 return simplifyInstructionWithOperands(&I, Ops, I.getDataLayout());
1765}
1766
1768 Value *NewOp, InstCombiner &IC) {
1769 Instruction *Clone = I.clone();
1770 Clone->replaceUsesOfWith(SI, NewOp);
1772 IC.InsertNewInstBefore(Clone, I.getIterator());
1773 return Clone;
1774}
1775
1777 bool FoldWithMultiUse) {
1778 // Don't modify shared select instructions unless set FoldWithMultiUse
1779 if (!SI->hasOneUse() && !FoldWithMultiUse)
1780 return nullptr;
1781
1782 Value *TV = SI->getTrueValue();
1783 Value *FV = SI->getFalseValue();
1784
1785 // Bool selects with constant operands can be folded to logical ops.
1786 if (SI->getType()->isIntOrIntVectorTy(1))
1787 return nullptr;
1788
1789 // Avoid breaking min/max reduction pattern,
1790 // which is necessary for vectorization later.
1792 for (Value *IntrinOp : Op.operands())
1793 if (auto *PN = dyn_cast<PHINode>(IntrinOp))
1794 for (Value *PhiOp : PN->operands())
1795 if (PhiOp == &Op)
1796 return nullptr;
1797
1798 // Test if a FCmpInst instruction is used exclusively by a select as
1799 // part of a minimum or maximum operation. If so, refrain from doing
1800 // any other folding. This helps out other analyses which understand
1801 // non-obfuscated minimum and maximum idioms. And in this case, at
1802 // least one of the comparison operands has at least one user besides
1803 // the compare (the select), which would often largely negate the
1804 // benefit of folding anyway.
1805 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1806 if (CI->hasOneUse()) {
1807 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1808 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1809 !CI->isCommutative())
1810 return nullptr;
1811 }
1812 }
1813
1814 // Make sure that one of the select arms folds successfully.
1815 Value *NewTV = simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/true);
1816 Value *NewFV =
1817 simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/false);
1818 if (!NewTV && !NewFV)
1819 return nullptr;
1820
1821 // Create an instruction for the arm that did not fold.
1822 if (!NewTV)
1823 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1824 if (!NewFV)
1825 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1826 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1827}
1828
1830 Value *InValue, BasicBlock *InBB,
1831 const DataLayout &DL,
1832 const SimplifyQuery SQ) {
1833 // NB: It is a precondition of this transform that the operands be
1834 // phi translatable!
1836 for (Value *Op : I.operands()) {
1837 if (Op == PN)
1838 Ops.push_back(InValue);
1839 else
1840 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1841 }
1842
1843 // Don't consider the simplification successful if we get back a constant
1844 // expression. That's just an instruction in hiding.
1845 // Also reject the case where we simplify back to the phi node. We wouldn't
1846 // be able to remove it in that case.
1848 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1849 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1850 return NewVal;
1851
1852 // Check if incoming PHI value can be replaced with constant
1853 // based on implied condition.
1854 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1855 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1856 if (TerminatorBI && TerminatorBI->isConditional() &&
1857 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1858 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1859 std::optional<bool> ImpliedCond = isImpliedCondition(
1860 TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1],
1861 DL, LHSIsTrue);
1862 if (ImpliedCond)
1863 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1864 }
1865
1866 return nullptr;
1867}
1868
1870 bool AllowMultipleUses) {
1871 unsigned NumPHIValues = PN->getNumIncomingValues();
1872 if (NumPHIValues == 0)
1873 return nullptr;
1874
1875 // We normally only transform phis with a single use. However, if a PHI has
1876 // multiple uses and they are all the same operation, we can fold *all* of the
1877 // uses into the PHI.
1878 bool OneUse = PN->hasOneUse();
1879 bool IdenticalUsers = false;
1880 if (!AllowMultipleUses && !OneUse) {
1881 // Walk the use list for the instruction, comparing them to I.
1882 for (User *U : PN->users()) {
1884 if (UI != &I && !I.isIdenticalTo(UI))
1885 return nullptr;
1886 }
1887 // Otherwise, we can replace *all* users with the new PHI we form.
1888 IdenticalUsers = true;
1889 }
1890
1891 // Check that all operands are phi-translatable.
1892 for (Value *Op : I.operands()) {
1893 if (Op == PN)
1894 continue;
1895
1896 // Non-instructions never require phi-translation.
1897 auto *I = dyn_cast<Instruction>(Op);
1898 if (!I)
1899 continue;
1900
1901 // Phi-translate can handle phi nodes in the same block.
1902 if (isa<PHINode>(I))
1903 if (I->getParent() == PN->getParent())
1904 continue;
1905
1906 // Operand dominates the block, no phi-translation necessary.
1907 if (DT.dominates(I, PN->getParent()))
1908 continue;
1909
1910 // Not phi-translatable, bail out.
1911 return nullptr;
1912 }
1913
1914 // Check to see whether the instruction can be folded into each phi operand.
1915 // If there is one operand that does not fold, remember the BB it is in.
1916 SmallVector<Value *> NewPhiValues;
1917 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1918 bool SeenNonSimplifiedInVal = false;
1919 for (unsigned i = 0; i != NumPHIValues; ++i) {
1920 Value *InVal = PN->getIncomingValue(i);
1921 BasicBlock *InBB = PN->getIncomingBlock(i);
1922
1923 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1924 NewPhiValues.push_back(NewVal);
1925 continue;
1926 }
1927
1928 // Handle some cases that can't be fully simplified, but where we know that
1929 // the two instructions will fold into one.
1930 auto WillFold = [&]() {
1931 if (!InVal->hasUseList() || !InVal->hasOneUser())
1932 return false;
1933
1934 // icmp of ucmp/scmp with constant will fold to icmp.
1935 const APInt *Ignored;
1936 if (isa<CmpIntrinsic>(InVal) &&
1937 match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
1938 return true;
1939
1940 // icmp eq zext(bool), 0 will fold to !bool.
1941 if (isa<ZExtInst>(InVal) &&
1942 cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
1943 match(&I,
1945 return true;
1946
1947 return false;
1948 };
1949
1950 if (WillFold()) {
1951 OpsToMoveUseToIncomingBB.push_back(i);
1952 NewPhiValues.push_back(nullptr);
1953 continue;
1954 }
1955
1956 if (!OneUse && !IdenticalUsers)
1957 return nullptr;
1958
1959 if (SeenNonSimplifiedInVal)
1960 return nullptr; // More than one non-simplified value.
1961 SeenNonSimplifiedInVal = true;
1962
1963 // If there is exactly one non-simplified value, we can insert a copy of the
1964 // operation in that block. However, if this is a critical edge, we would
1965 // be inserting the computation on some other paths (e.g. inside a loop).
1966 // Only do this if the pred block is unconditionally branching into the phi
1967 // block. Also, make sure that the pred block is not dead code.
1969 if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB))
1970 return nullptr;
1971
1972 NewPhiValues.push_back(nullptr);
1973 OpsToMoveUseToIncomingBB.push_back(i);
1974
1975 // If the InVal is an invoke at the end of the pred block, then we can't
1976 // insert a computation after it without breaking the edge.
1977 if (isa<InvokeInst>(InVal))
1978 if (cast<Instruction>(InVal)->getParent() == InBB)
1979 return nullptr;
1980
1981 // Do not push the operation across a loop backedge. This could result in
1982 // an infinite combine loop, and is generally non-profitable (especially
1983 // if the operation was originally outside the loop).
1984 if (isBackEdge(InBB, PN->getParent()))
1985 return nullptr;
1986 }
1987
1988 // Clone the instruction that uses the phi node and move it into the incoming
1989 // BB because we know that the next iteration of InstCombine will simplify it.
1991 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
1993 BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
1994
1995 Instruction *Clone = Clones.lookup(OpBB);
1996 if (!Clone) {
1997 Clone = I.clone();
1998 for (Use &U : Clone->operands()) {
1999 if (U == PN)
2000 U = Op;
2001 else
2002 U = U->DoPHITranslation(PN->getParent(), OpBB);
2003 }
2004 Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
2005 Clones.insert({OpBB, Clone});
2006 // We may have speculated the instruction.
2008 }
2009
2010 NewPhiValues[OpIndex] = Clone;
2011 }
2012
2013 // Okay, we can do the transformation: create the new PHI node.
2014 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
2015 InsertNewInstBefore(NewPN, PN->getIterator());
2016 NewPN->takeName(PN);
2017 NewPN->setDebugLoc(PN->getDebugLoc());
2018
2019 for (unsigned i = 0; i != NumPHIValues; ++i)
2020 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
2021
2022 if (IdenticalUsers) {
2023 // Collect and deduplicate users up-front to avoid iterator invalidation.
2025 for (User *U : PN->users()) {
2027 if (User == &I)
2028 continue;
2029 ToReplace.insert(User);
2030 }
2031 for (Instruction *I : ToReplace) {
2032 replaceInstUsesWith(*I, NewPN);
2034 }
2035 OneUse = true;
2036 }
2037
2038 if (OneUse) {
2039 replaceAllDbgUsesWith(*PN, *NewPN, *PN, DT);
2040 }
2041 return replaceInstUsesWith(I, NewPN);
2042}
2043
2045 if (!BO.isAssociative())
2046 return nullptr;
2047
2048 // Find the interleaved binary ops.
2049 auto Opc = BO.getOpcode();
2050 auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
2051 auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
2052 if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
2053 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2054 !BO0->isAssociative() || !BO1->isAssociative() ||
2055 BO0->getParent() != BO1->getParent())
2056 return nullptr;
2057
2058 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2059 "Expected commutative instructions!");
2060
2061 // Find the matching phis, forming the recurrences.
2062 PHINode *PN0, *PN1;
2063 Value *Start0, *Step0, *Start1, *Step1;
2064 if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
2065 !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
2066 PN0->getParent() != PN1->getParent())
2067 return nullptr;
2068
2069 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2070 "Expected PHIs with two incoming values!");
2071
2072 // Convert the start and step values to constants.
2073 auto *Init0 = dyn_cast<Constant>(Start0);
2074 auto *Init1 = dyn_cast<Constant>(Start1);
2075 auto *C0 = dyn_cast<Constant>(Step0);
2076 auto *C1 = dyn_cast<Constant>(Step1);
2077 if (!Init0 || !Init1 || !C0 || !C1)
2078 return nullptr;
2079
2080 // Fold the recurrence constants.
2081 auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
2082 auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
2083 if (!Init || !C)
2084 return nullptr;
2085
2086 // Create the reduced PHI.
2087 auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
2088 "reduced.phi");
2089
2090 // Create the new binary op.
2091 auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
2092 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2093 // Intersect FMF flags for FADD and FMUL.
2094 FastMathFlags Intersect = BO0->getFastMathFlags() &
2095 BO1->getFastMathFlags() & BO.getFastMathFlags();
2096 NewBO->setFastMathFlags(Intersect);
2097 } else {
2098 OverflowTracking Flags;
2099 Flags.AllKnownNonNegative = false;
2100 Flags.AllKnownNonZero = false;
2101 Flags.mergeFlags(*BO0);
2102 Flags.mergeFlags(*BO1);
2103 Flags.mergeFlags(BO);
2104 Flags.applyFlags(*NewBO);
2105 }
2106 NewBO->takeName(&BO);
2107
2108 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2109 auto *V = PN0->getIncomingValue(I);
2110 auto *BB = PN0->getIncomingBlock(I);
2111 if (V == Init0) {
2112 assert(((PN1->getIncomingValue(0) == Init1 &&
2113 PN1->getIncomingBlock(0) == BB) ||
2114 (PN1->getIncomingValue(1) == Init1 &&
2115 PN1->getIncomingBlock(1) == BB)) &&
2116 "Invalid incoming block!");
2117 NewPN->addIncoming(Init, BB);
2118 } else if (V == BO0) {
2119 assert(((PN1->getIncomingValue(0) == BO1 &&
2120 PN1->getIncomingBlock(0) == BB) ||
2121 (PN1->getIncomingValue(1) == BO1 &&
2122 PN1->getIncomingBlock(1) == BB)) &&
2123 "Invalid incoming block!");
2124 NewPN->addIncoming(NewBO, BB);
2125 } else
2126 llvm_unreachable("Unexpected incoming value!");
2127 }
2128
2129 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2130 << "\n with " << *PN1 << "\n " << *BO1
2131 << '\n');
2132
2133 // Insert the new recurrence and remove the old (dead) ones.
2134 InsertNewInstWith(NewPN, PN0->getIterator());
2135 InsertNewInstWith(NewBO, BO0->getIterator());
2136
2143
2144 return replaceInstUsesWith(BO, NewBO);
2145}
2146
2148 // Attempt to fold binary operators whose operands are simple recurrences.
2149 if (auto *NewBO = foldBinopWithRecurrence(BO))
2150 return NewBO;
2151
2152 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2153 // we are guarding against replicating the binop in >1 predecessor.
2154 // This could miss matching a phi with 2 constant incoming values.
2155 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
2156 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
2157 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2158 Phi0->getNumOperands() != Phi1->getNumOperands())
2159 return nullptr;
2160
2161 // TODO: Remove the restriction for binop being in the same block as the phis.
2162 if (BO.getParent() != Phi0->getParent() ||
2163 BO.getParent() != Phi1->getParent())
2164 return nullptr;
2165
2166 // Fold if there is at least one specific constant value in phi0 or phi1's
2167 // incoming values that comes from the same block and this specific constant
2168 // value can be used to do optimization for specific binary operator.
2169 // For example:
2170 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2171 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2172 // %add = add i32 %phi0, %phi1
2173 // ==>
2174 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2176 /*AllowRHSConstant*/ false);
2177 if (C) {
2178 SmallVector<Value *, 4> NewIncomingValues;
2179 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2180 auto &Phi0Use = std::get<0>(T);
2181 auto &Phi1Use = std::get<1>(T);
2182 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
2183 return false;
2184 Value *Phi0UseV = Phi0Use.get();
2185 Value *Phi1UseV = Phi1Use.get();
2186 if (Phi0UseV == C)
2187 NewIncomingValues.push_back(Phi1UseV);
2188 else if (Phi1UseV == C)
2189 NewIncomingValues.push_back(Phi0UseV);
2190 else
2191 return false;
2192 return true;
2193 };
2194
2195 if (all_of(zip(Phi0->operands(), Phi1->operands()),
2196 CanFoldIncomingValuePair)) {
2197 PHINode *NewPhi =
2198 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
2199 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2200 "The number of collected incoming values should equal the number "
2201 "of the original PHINode operands!");
2202 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2203 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
2204 return NewPhi;
2205 }
2206 }
2207
2208 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2209 return nullptr;
2210
2211 // Match a pair of incoming constants for one of the predecessor blocks.
2212 BasicBlock *ConstBB, *OtherBB;
2213 Constant *C0, *C1;
2214 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
2215 ConstBB = Phi0->getIncomingBlock(0);
2216 OtherBB = Phi0->getIncomingBlock(1);
2217 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
2218 ConstBB = Phi0->getIncomingBlock(1);
2219 OtherBB = Phi0->getIncomingBlock(0);
2220 } else {
2221 return nullptr;
2222 }
2223 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
2224 return nullptr;
2225
2226 // The block that we are hoisting to must reach here unconditionally.
2227 // Otherwise, we could be speculatively executing an expensive or
2228 // non-speculative op.
2229 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
2230 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
2231 !DT.isReachableFromEntry(OtherBB))
2232 return nullptr;
2233
2234 // TODO: This check could be tightened to only apply to binops (div/rem) that
2235 // are not safe to speculatively execute. But that could allow hoisting
2236 // potentially expensive instructions (fdiv for example).
2237 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2239 return nullptr;
2240
2241 // Fold constants for the predecessor block with constant incoming values.
2242 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
2243 if (!NewC)
2244 return nullptr;
2245
2246 // Make a new binop in the predecessor block with the non-constant incoming
2247 // values.
2248 Builder.SetInsertPoint(PredBlockBranch);
2249 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
2250 Phi0->getIncomingValueForBlock(OtherBB),
2251 Phi1->getIncomingValueForBlock(OtherBB));
2252 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
2253 NotFoldedNewBO->copyIRFlags(&BO);
2254
2255 // Replace the binop with a phi of the new values. The old phis are dead.
2256 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
2257 NewPhi->addIncoming(NewBO, OtherBB);
2258 NewPhi->addIncoming(NewC, ConstBB);
2259 return NewPhi;
2260}
2261
2263 if (!isa<Constant>(I.getOperand(1)))
2264 return nullptr;
2265
2266 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
2267 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
2268 return NewSel;
2269 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
2270 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
2271 return NewPhi;
2272 }
2273 return nullptr;
2274}
2275
2277 // If this GEP has only 0 indices, it is the same pointer as
2278 // Src. If Src is not a trivial GEP too, don't combine
2279 // the indices.
2280 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2281 !Src.hasOneUse())
2282 return false;
2283 return true;
2284}
2285
2286/// Find a constant NewC that has property:
2287/// shuffle(NewC, ShMask) = C
2288/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2289///
2290/// A 1-to-1 mapping is not required. Example:
2291/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2293 VectorType *NewCTy) {
2294 if (isa<ScalableVectorType>(NewCTy)) {
2295 Constant *Splat = C->getSplatValue();
2296 if (!Splat)
2297 return nullptr;
2299 }
2300
2301 if (cast<FixedVectorType>(NewCTy)->getNumElements() >
2302 cast<FixedVectorType>(C->getType())->getNumElements())
2303 return nullptr;
2304
2305 unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
2306 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2307 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2308 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
2309 for (unsigned I = 0; I < NumElts; ++I) {
2310 Constant *CElt = C->getAggregateElement(I);
2311 if (ShMask[I] >= 0) {
2312 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2313 Constant *NewCElt = NewVecC[ShMask[I]];
2314 // Bail out if:
2315 // 1. The constant vector contains a constant expression.
2316 // 2. The shuffle needs an element of the constant vector that can't
2317 // be mapped to a new constant vector.
2318 // 3. This is a widening shuffle that copies elements of V1 into the
2319 // extended elements (extending with poison is allowed).
2320 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2321 I >= NewCNumElts)
2322 return nullptr;
2323 NewVecC[ShMask[I]] = CElt;
2324 }
2325 }
2326 return ConstantVector::get(NewVecC);
2327}
2328
2330 if (!isa<VectorType>(Inst.getType()))
2331 return nullptr;
2332
2333 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2334 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2335 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2336 cast<VectorType>(Inst.getType())->getElementCount());
2337 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2338 cast<VectorType>(Inst.getType())->getElementCount());
2339
2340 // If both operands of the binop are vector concatenations, then perform the
2341 // narrow binop on each pair of the source operands followed by concatenation
2342 // of the results.
2343 Value *L0, *L1, *R0, *R1;
2344 ArrayRef<int> Mask;
2345 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2346 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2347 LHS->hasOneUse() && RHS->hasOneUse() &&
2348 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2349 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2350 // This transform does not have the speculative execution constraint as
2351 // below because the shuffle is a concatenation. The new binops are
2352 // operating on exactly the same elements as the existing binop.
2353 // TODO: We could ease the mask requirement to allow different undef lanes,
2354 // but that requires an analysis of the binop-with-undef output value.
2355 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2356 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2357 BO->copyIRFlags(&Inst);
2358 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2359 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2360 BO->copyIRFlags(&Inst);
2361 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2362 }
2363
2364 auto createBinOpReverse = [&](Value *X, Value *Y) {
2365 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2366 if (auto *BO = dyn_cast<BinaryOperator>(V))
2367 BO->copyIRFlags(&Inst);
2368 Module *M = Inst.getModule();
2370 M, Intrinsic::vector_reverse, V->getType());
2371 return CallInst::Create(F, V);
2372 };
2373
2374 // NOTE: Reverse shuffles don't require the speculative execution protection
2375 // below because they don't affect which lanes take part in the computation.
2376
2377 Value *V1, *V2;
2378 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2379 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2380 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2381 (LHS->hasOneUse() || RHS->hasOneUse() ||
2382 (LHS == RHS && LHS->hasNUses(2))))
2383 return createBinOpReverse(V1, V2);
2384
2385 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2386 if (LHS->hasOneUse() && isSplatValue(RHS))
2387 return createBinOpReverse(V1, RHS);
2388 }
2389 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2390 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2391 return createBinOpReverse(LHS, V2);
2392
2393 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2394 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2395 if (auto *BO = dyn_cast<BinaryOperator>(V))
2396 BO->copyIRFlags(&Inst);
2397
2398 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
2399 Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
2400 Module *M = Inst.getModule();
2402 M, Intrinsic::experimental_vp_reverse, V->getType());
2403 return CallInst::Create(F, {V, AllTrueMask, EVL});
2404 };
2405
2406 Value *EVL;
2408 m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
2409 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2411 m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
2412 (LHS->hasOneUse() || RHS->hasOneUse() ||
2413 (LHS == RHS && LHS->hasNUses(2))))
2414 return createBinOpVPReverse(V1, V2, EVL);
2415
2416 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2417 if (LHS->hasOneUse() && isSplatValue(RHS))
2418 return createBinOpVPReverse(V1, RHS, EVL);
2419 }
2420 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2421 else if (isSplatValue(LHS) &&
2423 m_Value(V2), m_AllOnes(), m_Value(EVL))))
2424 return createBinOpVPReverse(LHS, V2, EVL);
2425
2426 // It may not be safe to reorder shuffles and things like div, urem, etc.
2427 // because we may trap when executing those ops on unknown vector elements.
2428 // See PR20059.
2430 return nullptr;
2431
2432 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2433 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2434 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2435 BO->copyIRFlags(&Inst);
2436 return new ShuffleVectorInst(XY, M);
2437 };
2438
2439 // If both arguments of the binary operation are shuffles that use the same
2440 // mask and shuffle within a single vector, move the shuffle after the binop.
2441 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2442 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2443 V1->getType() == V2->getType() &&
2444 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2445 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2446 return createBinOpShuffle(V1, V2, Mask);
2447 }
2448
2449 // If both arguments of a commutative binop are select-shuffles that use the
2450 // same mask with commuted operands, the shuffles are unnecessary.
2451 if (Inst.isCommutative() &&
2452 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2453 match(RHS,
2454 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2455 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2456 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2457 // TODO: Allow shuffles that contain undefs in the mask?
2458 // That is legal, but it reduces undef knowledge.
2459 // TODO: Allow arbitrary shuffles by shuffling after binop?
2460 // That might be legal, but we have to deal with poison.
2461 if (LShuf->isSelect() &&
2462 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2463 RShuf->isSelect() &&
2464 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2465 // Example:
2466 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2467 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2468 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2469 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2470 NewBO->copyIRFlags(&Inst);
2471 return NewBO;
2472 }
2473 }
2474
2475 // If one argument is a shuffle within one vector and the other is a constant,
2476 // try moving the shuffle after the binary operation. This canonicalization
2477 // intends to move shuffles closer to other shuffles and binops closer to
2478 // other binops, so they can be folded. It may also enable demanded elements
2479 // transforms.
2480 Constant *C;
2482 m_Mask(Mask))),
2483 m_ImmConstant(C)))) {
2484 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2485 "Shuffle should not change scalar type");
2486
2487 bool ConstOp1 = isa<Constant>(RHS);
2488 if (Constant *NewC =
2490 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2491 // which will cause UB for div/rem. Mask them with a safe constant.
2492 if (isa<FixedVectorType>(V1->getType()) && Inst.isIntDivRem())
2493 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2494
2495 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2496 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2497 Value *NewLHS = ConstOp1 ? V1 : NewC;
2498 Value *NewRHS = ConstOp1 ? NewC : V1;
2499 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2500 }
2501 }
2502
2503 // Try to reassociate to sink a splat shuffle after a binary operation.
2504 if (Inst.isAssociative() && Inst.isCommutative()) {
2505 // Canonicalize shuffle operand as LHS.
2506 if (isa<ShuffleVectorInst>(RHS))
2507 std::swap(LHS, RHS);
2508
2509 Value *X;
2510 ArrayRef<int> MaskC;
2511 int SplatIndex;
2512 Value *Y, *OtherOp;
2513 if (!match(LHS,
2514 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2515 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2516 X->getType() != Inst.getType() ||
2517 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2518 return nullptr;
2519
2520 // FIXME: This may not be safe if the analysis allows undef elements. By
2521 // moving 'Y' before the splat shuffle, we are implicitly assuming
2522 // that it is not undef/poison at the splat index.
2523 if (isSplatValue(OtherOp, SplatIndex)) {
2524 std::swap(Y, OtherOp);
2525 } else if (!isSplatValue(Y, SplatIndex)) {
2526 return nullptr;
2527 }
2528
2529 // X and Y are splatted values, so perform the binary operation on those
2530 // values followed by a splat followed by the 2nd binary operation:
2531 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2532 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2533 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2534 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2535 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2536
2537 // Intersect FMF on both new binops. Other (poison-generating) flags are
2538 // dropped to be safe.
2539 if (isa<FPMathOperator>(R)) {
2540 R->copyFastMathFlags(&Inst);
2541 R->andIRFlags(RHS);
2542 }
2543 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2544 NewInstBO->copyIRFlags(R);
2545 return R;
2546 }
2547
2548 return nullptr;
2549}
2550
2551/// Try to narrow the width of a binop if at least 1 operand is an extend of
2552/// of a value. This requires a potentially expensive known bits check to make
2553/// sure the narrow op does not overflow.
2554Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2555 // We need at least one extended operand.
2556 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2557
2558 // If this is a sub, we swap the operands since we always want an extension
2559 // on the RHS. The LHS can be an extension or a constant.
2560 if (BO.getOpcode() == Instruction::Sub)
2561 std::swap(Op0, Op1);
2562
2563 Value *X;
2564 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2565 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2566 return nullptr;
2567
2568 // If both operands are the same extension from the same source type and we
2569 // can eliminate at least one (hasOneUse), this might work.
2570 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2571 Value *Y;
2572 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2573 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2574 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2575 // If that did not match, see if we have a suitable constant operand.
2576 // Truncating and extending must produce the same constant.
2577 Constant *WideC;
2578 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2579 return nullptr;
2580 Constant *NarrowC = getLosslessInvCast(WideC, X->getType(), CastOpc, DL);
2581 if (!NarrowC)
2582 return nullptr;
2583 Y = NarrowC;
2584 }
2585
2586 // Swap back now that we found our operands.
2587 if (BO.getOpcode() == Instruction::Sub)
2588 std::swap(X, Y);
2589
2590 // Both operands have narrow versions. Last step: the math must not overflow
2591 // in the narrow width.
2592 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2593 return nullptr;
2594
2595 // bo (ext X), (ext Y) --> ext (bo X, Y)
2596 // bo (ext X), C --> ext (bo X, C')
2597 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2598 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2599 if (IsSext)
2600 NewBinOp->setHasNoSignedWrap();
2601 else
2602 NewBinOp->setHasNoUnsignedWrap();
2603 }
2604 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2605}
2606
2607/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2608/// transform.
2613
2614/// Thread a GEP operation with constant indices through the constant true/false
2615/// arms of a select.
2617 InstCombiner::BuilderTy &Builder) {
2618 if (!GEP.hasAllConstantIndices())
2619 return nullptr;
2620
2621 Instruction *Sel;
2622 Value *Cond;
2623 Constant *TrueC, *FalseC;
2624 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2625 !match(Sel,
2626 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2627 return nullptr;
2628
2629 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2630 // Propagate 'inbounds' and metadata from existing instructions.
2631 // Note: using IRBuilder to create the constants for efficiency.
2632 SmallVector<Value *, 4> IndexC(GEP.indices());
2633 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2634 Type *Ty = GEP.getSourceElementType();
2635 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2636 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2637 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2638}
2639
2640// Canonicalization:
2641// gep T, (gep i8, base, C1), (Index + C2) into
2642// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2644 GEPOperator *Src,
2645 InstCombinerImpl &IC) {
2646 if (GEP.getNumIndices() != 1)
2647 return nullptr;
2648 auto &DL = IC.getDataLayout();
2649 Value *Base;
2650 const APInt *C1;
2651 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2652 return nullptr;
2653 Value *VarIndex;
2654 const APInt *C2;
2655 Type *PtrTy = Src->getType()->getScalarType();
2656 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2657 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2658 return nullptr;
2659 if (C1->getBitWidth() != IndexSizeInBits ||
2660 C2->getBitWidth() != IndexSizeInBits)
2661 return nullptr;
2662 Type *BaseType = GEP.getSourceElementType();
2664 return nullptr;
2665 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2666 APInt NewOffset = TypeSize * *C2 + *C1;
2667 if (NewOffset.isZero() ||
2668 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2670 if (GEP.hasNoUnsignedWrap() &&
2671 cast<GEPOperator>(Src)->hasNoUnsignedWrap() &&
2672 match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) {
2674 if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds())
2675 Flags |= GEPNoWrapFlags::inBounds();
2676 }
2677
2678 Value *GEPConst =
2679 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags);
2680 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags);
2681 }
2682
2683 return nullptr;
2684}
2685
2686/// Combine constant offsets separated by variable offsets.
2687/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2689 InstCombinerImpl &IC) {
2690 if (!GEP.hasAllConstantIndices())
2691 return nullptr;
2692
2695 auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2696 while (true) {
2697 if (!InnerGEP)
2698 return nullptr;
2699
2700 NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2701 if (InnerGEP->hasAllConstantIndices())
2702 break;
2703
2704 if (!InnerGEP->hasOneUse())
2705 return nullptr;
2706
2707 Skipped.push_back(InnerGEP);
2708 InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2709 }
2710
2711 // The two constant offset GEPs are directly adjacent: Let normal offset
2712 // merging handle it.
2713 if (Skipped.empty())
2714 return nullptr;
2715
2716 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2717 // if profitable.
2718 if (!InnerGEP->hasOneUse())
2719 return nullptr;
2720
2721 // Don't bother with vector splats.
2722 Type *Ty = GEP.getType();
2723 if (InnerGEP->getType() != Ty)
2724 return nullptr;
2725
2726 const DataLayout &DL = IC.getDataLayout();
2727 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2728 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2729 !InnerGEP->accumulateConstantOffset(DL, Offset))
2730 return nullptr;
2731
2732 IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2733 for (GetElementPtrInst *SkippedGEP : Skipped)
2734 SkippedGEP->setNoWrapFlags(NW);
2735
2736 return IC.replaceInstUsesWith(
2737 GEP,
2738 IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2739 NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2740}
2741
2743 GEPOperator *Src) {
2744 // Combine Indices - If the source pointer to this getelementptr instruction
2745 // is a getelementptr instruction with matching element type, combine the
2746 // indices of the two getelementptr instructions into a single instruction.
2747 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2748 return nullptr;
2749
2750 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2751 return I;
2752
2753 if (auto *I = combineConstantOffsets(GEP, *this))
2754 return I;
2755
2756 if (Src->getResultElementType() != GEP.getSourceElementType())
2757 return nullptr;
2758
2759 // Find out whether the last index in the source GEP is a sequential idx.
2760 bool EndsWithSequential = false;
2761 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2762 I != E; ++I)
2763 EndsWithSequential = I.isSequential();
2764 if (!EndsWithSequential)
2765 return nullptr;
2766
2767 // Replace: gep (gep %P, long B), long A, ...
2768 // With: T = long A+B; gep %P, T, ...
2769 Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2770 Value *GO1 = GEP.getOperand(1);
2771
2772 // If they aren't the same type, then the input hasn't been processed
2773 // by the loop above yet (which canonicalizes sequential index types to
2774 // intptr_t). Just avoid transforming this until the input has been
2775 // normalized.
2776 if (SO1->getType() != GO1->getType())
2777 return nullptr;
2778
2779 Value *Sum =
2780 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2781 // Only do the combine when we are sure the cost after the
2782 // merge is never more than that before the merge.
2783 if (Sum == nullptr)
2784 return nullptr;
2785
2787 Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2788 Indices.push_back(Sum);
2789 Indices.append(GEP.op_begin() + 2, GEP.op_end());
2790
2791 // Don't create GEPs with more than one non-zero index.
2792 unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2793 auto *C = dyn_cast<Constant>(Idx);
2794 return !C || !C->isNullValue();
2795 });
2796 if (NumNonZeroIndices > 1)
2797 return nullptr;
2798
2799 return replaceInstUsesWith(
2800 GEP, Builder.CreateGEP(
2801 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2803}
2804
2807 bool &DoesConsume, unsigned Depth) {
2808 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2809 // ~(~(X)) -> X.
2810 Value *A, *B;
2811 if (match(V, m_Not(m_Value(A)))) {
2812 DoesConsume = true;
2813 return A;
2814 }
2815
2816 Constant *C;
2817 // Constants can be considered to be not'ed values.
2818 if (match(V, m_ImmConstant(C)))
2819 return ConstantExpr::getNot(C);
2820
2822 return nullptr;
2823
2824 // The rest of the cases require that we invert all uses so don't bother
2825 // doing the analysis if we know we can't use the result.
2826 if (!WillInvertAllUses)
2827 return nullptr;
2828
2829 // Compares can be inverted if all of their uses are being modified to use
2830 // the ~V.
2831 if (auto *I = dyn_cast<CmpInst>(V)) {
2832 if (Builder != nullptr)
2833 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2834 I->getOperand(1));
2835 return NonNull;
2836 }
2837
2838 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2839 // `(-1 - B) - A` if we are willing to invert all of the uses.
2840 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2841 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2842 DoesConsume, Depth))
2843 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2844 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2845 DoesConsume, Depth))
2846 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2847 return nullptr;
2848 }
2849
2850 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2851 // into `A ^ B` if we are willing to invert all of the uses.
2852 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2853 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2854 DoesConsume, Depth))
2855 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2856 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2857 DoesConsume, Depth))
2858 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2859 return nullptr;
2860 }
2861
2862 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2863 // `A + (-1 - B)` if we are willing to invert all of the uses.
2864 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2865 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2866 DoesConsume, Depth))
2867 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2868 return nullptr;
2869 }
2870
2871 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2872 // into `A s>> B` if we are willing to invert all of the uses.
2873 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2874 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2875 DoesConsume, Depth))
2876 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2877 return nullptr;
2878 }
2879
2880 Value *Cond;
2881 // LogicOps are special in that we canonicalize them at the cost of an
2882 // instruction.
2883 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2885 // Selects/min/max with invertible operands are freely invertible
2886 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2887 bool LocalDoesConsume = DoesConsume;
2888 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2889 LocalDoesConsume, Depth))
2890 return nullptr;
2891 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2892 LocalDoesConsume, Depth)) {
2893 DoesConsume = LocalDoesConsume;
2894 if (Builder != nullptr) {
2895 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2896 DoesConsume, Depth);
2897 assert(NotB != nullptr &&
2898 "Unable to build inverted value for known freely invertable op");
2899 if (auto *II = dyn_cast<IntrinsicInst>(V))
2900 return Builder->CreateBinaryIntrinsic(
2901 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2902 return Builder->CreateSelect(Cond, NotA, NotB);
2903 }
2904 return NonNull;
2905 }
2906 }
2907
2908 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2909 bool LocalDoesConsume = DoesConsume;
2911 for (Use &U : PN->operands()) {
2912 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2913 Value *NewIncomingVal = getFreelyInvertedImpl(
2914 U.get(), /*WillInvertAllUses=*/false,
2915 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2916 if (NewIncomingVal == nullptr)
2917 return nullptr;
2918 // Make sure that we can safely erase the original PHI node.
2919 if (NewIncomingVal == V)
2920 return nullptr;
2921 if (Builder != nullptr)
2922 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2923 }
2924
2925 DoesConsume = LocalDoesConsume;
2926 if (Builder != nullptr) {
2928 Builder->SetInsertPoint(PN);
2929 PHINode *NewPN =
2930 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2931 for (auto [Val, Pred] : IncomingValues)
2932 NewPN->addIncoming(Val, Pred);
2933 return NewPN;
2934 }
2935 return NonNull;
2936 }
2937
2938 if (match(V, m_SExtLike(m_Value(A)))) {
2939 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2940 DoesConsume, Depth))
2941 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2942 return nullptr;
2943 }
2944
2945 if (match(V, m_Trunc(m_Value(A)))) {
2946 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2947 DoesConsume, Depth))
2948 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2949 return nullptr;
2950 }
2951
2952 // De Morgan's Laws:
2953 // (~(A | B)) -> (~A & ~B)
2954 // (~(A & B)) -> (~A | ~B)
2955 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2956 bool IsLogical, Value *A,
2957 Value *B) -> Value * {
2958 bool LocalDoesConsume = DoesConsume;
2959 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2960 LocalDoesConsume, Depth))
2961 return nullptr;
2962 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2963 LocalDoesConsume, Depth)) {
2964 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2965 LocalDoesConsume, Depth);
2966 DoesConsume = LocalDoesConsume;
2967 if (IsLogical)
2968 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2969 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2970 }
2971
2972 return nullptr;
2973 };
2974
2975 if (match(V, m_Or(m_Value(A), m_Value(B))))
2976 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2977 B);
2978
2979 if (match(V, m_And(m_Value(A), m_Value(B))))
2980 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2981 B);
2982
2983 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2984 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2985 B);
2986
2987 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2988 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2989 B);
2990
2991 return nullptr;
2992}
2993
2994/// Return true if we should canonicalize the gep to an i8 ptradd.
2996 Value *PtrOp = GEP.getOperand(0);
2997 Type *GEPEltType = GEP.getSourceElementType();
2998 if (GEPEltType->isIntegerTy(8))
2999 return false;
3000
3001 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3002 // intrinsic. This has better support in BasicAA.
3003 if (GEPEltType->isScalableTy())
3004 return true;
3005
3006 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3007 // together.
3008 if (GEP.getNumIndices() == 1 &&
3009 match(GEP.getOperand(1),
3011 m_Shl(m_Value(), m_ConstantInt())))))
3012 return true;
3013
3014 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3015 // possibly be merged together.
3016 auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
3017 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3018 any_of(GEP.indices(), [](Value *V) {
3019 const APInt *C;
3020 return match(V, m_APInt(C)) && !C->isZero();
3021 });
3022}
3023
3025 IRBuilderBase &Builder) {
3026 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
3027 if (!Op1)
3028 return nullptr;
3029
3030 // Don't fold a GEP into itself through a PHI node. This can only happen
3031 // through the back-edge of a loop. Folding a GEP into itself means that
3032 // the value of the previous iteration needs to be stored in the meantime,
3033 // thus requiring an additional register variable to be live, but not
3034 // actually achieving anything (the GEP still needs to be executed once per
3035 // loop iteration).
3036 if (Op1 == &GEP)
3037 return nullptr;
3038 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3039
3040 int DI = -1;
3041
3042 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3043 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
3044 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3045 Op1->getSourceElementType() != Op2->getSourceElementType())
3046 return nullptr;
3047
3048 // As for Op1 above, don't try to fold a GEP into itself.
3049 if (Op2 == &GEP)
3050 return nullptr;
3051
3052 // Keep track of the type as we walk the GEP.
3053 Type *CurTy = nullptr;
3054
3055 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3056 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
3057 return nullptr;
3058
3059 if (Op1->getOperand(J) != Op2->getOperand(J)) {
3060 if (DI == -1) {
3061 // We have not seen any differences yet in the GEPs feeding the
3062 // PHI yet, so we record this one if it is allowed to be a
3063 // variable.
3064
3065 // The first two arguments can vary for any GEP, the rest have to be
3066 // static for struct slots
3067 if (J > 1) {
3068 assert(CurTy && "No current type?");
3069 if (CurTy->isStructTy())
3070 return nullptr;
3071 }
3072
3073 DI = J;
3074 } else {
3075 // The GEP is different by more than one input. While this could be
3076 // extended to support GEPs that vary by more than one variable it
3077 // doesn't make sense since it greatly increases the complexity and
3078 // would result in an R+R+R addressing mode which no backend
3079 // directly supports and would need to be broken into several
3080 // simpler instructions anyway.
3081 return nullptr;
3082 }
3083 }
3084
3085 // Sink down a layer of the type for the next iteration.
3086 if (J > 0) {
3087 if (J == 1) {
3088 CurTy = Op1->getSourceElementType();
3089 } else {
3090 CurTy =
3091 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
3092 }
3093 }
3094 }
3095
3096 NW &= Op2->getNoWrapFlags();
3097 }
3098
3099 // If not all GEPs are identical we'll have to create a new PHI node.
3100 // Check that the old PHI node has only one use so that it will get
3101 // removed.
3102 if (DI != -1 && !PN->hasOneUse())
3103 return nullptr;
3104
3105 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
3106 NewGEP->setNoWrapFlags(NW);
3107
3108 if (DI == -1) {
3109 // All the GEPs feeding the PHI are identical. Clone one down into our
3110 // BB so that it can be merged with the current GEP.
3111 } else {
3112 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3113 // into the current block so it can be merged, and create a new PHI to
3114 // set that index.
3115 PHINode *NewPN;
3116 {
3117 IRBuilderBase::InsertPointGuard Guard(Builder);
3118 Builder.SetInsertPoint(PN);
3119 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
3120 PN->getNumOperands());
3121 }
3122
3123 for (auto &I : PN->operands())
3124 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
3125 PN->getIncomingBlock(I));
3126
3127 NewGEP->setOperand(DI, NewPN);
3128 }
3129
3130 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
3131 return NewGEP;
3132}
3133
3135 Value *PtrOp = GEP.getOperand(0);
3136 SmallVector<Value *, 8> Indices(GEP.indices());
3137 Type *GEPType = GEP.getType();
3138 Type *GEPEltType = GEP.getSourceElementType();
3139 if (Value *V =
3140 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
3141 SQ.getWithInstruction(&GEP)))
3142 return replaceInstUsesWith(GEP, V);
3143
3144 // For vector geps, use the generic demanded vector support.
3145 // Skip if GEP return type is scalable. The number of elements is unknown at
3146 // compile-time.
3147 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
3148 auto VWidth = GEPFVTy->getNumElements();
3149 APInt PoisonElts(VWidth, 0);
3150 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3151 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
3152 PoisonElts)) {
3153 if (V != &GEP)
3154 return replaceInstUsesWith(GEP, V);
3155 return &GEP;
3156 }
3157 }
3158
3159 // Eliminate unneeded casts for indices, and replace indices which displace
3160 // by multiples of a zero size type with zero.
3161 bool MadeChange = false;
3162
3163 // Index width may not be the same width as pointer width.
3164 // Data layout chooses the right type based on supported integer types.
3165 Type *NewScalarIndexTy =
3166 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
3167
3169 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3170 ++I, ++GTI) {
3171 // Skip indices into struct types.
3172 if (GTI.isStruct())
3173 continue;
3174
3175 Type *IndexTy = (*I)->getType();
3176 Type *NewIndexType =
3177 IndexTy->isVectorTy()
3178 ? VectorType::get(NewScalarIndexTy,
3179 cast<VectorType>(IndexTy)->getElementCount())
3180 : NewScalarIndexTy;
3181
3182 // If the element type has zero size then any index over it is equivalent
3183 // to an index of zero, so replace it with zero if it is not zero already.
3184 Type *EltTy = GTI.getIndexedType();
3185 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
3186 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
3187 *I = Constant::getNullValue(NewIndexType);
3188 MadeChange = true;
3189 }
3190
3191 if (IndexTy != NewIndexType) {
3192 // If we are using a wider index than needed for this platform, shrink
3193 // it to what we need. If narrower, sign-extend it to what we need.
3194 // This explicit cast can make subsequent optimizations more obvious.
3195 if (IndexTy->getScalarSizeInBits() <
3196 NewIndexType->getScalarSizeInBits()) {
3197 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3198 *I = Builder.CreateZExt(*I, NewIndexType, "", /*IsNonNeg=*/true);
3199 else
3200 *I = Builder.CreateSExt(*I, NewIndexType);
3201 } else {
3202 *I = Builder.CreateTrunc(*I, NewIndexType, "", GEP.hasNoUnsignedWrap(),
3203 GEP.hasNoUnsignedSignedWrap());
3204 }
3205 MadeChange = true;
3206 }
3207 }
3208 if (MadeChange)
3209 return &GEP;
3210
3211 // Canonicalize constant GEPs to i8 type.
3212 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3213 APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
3214 if (GEP.accumulateConstantOffset(DL, Offset))
3215 return replaceInstUsesWith(
3216 GEP, Builder.CreatePtrAdd(PtrOp, Builder.getInt(Offset), "",
3217 GEP.getNoWrapFlags()));
3218 }
3219
3221 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
3222 Value *NewGEP =
3223 Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
3224 return replaceInstUsesWith(GEP, NewGEP);
3225 }
3226
3227 // Strip trailing zero indices.
3228 auto *LastIdx = dyn_cast<Constant>(Indices.back());
3229 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3230 return replaceInstUsesWith(
3231 GEP, Builder.CreateGEP(GEP.getSourceElementType(), PtrOp,
3232 drop_end(Indices), "", GEP.getNoWrapFlags()));
3233 }
3234
3235 // Strip leading zero indices.
3236 auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3237 if (FirstIdx && FirstIdx->isNullValue() &&
3238 !FirstIdx->getType()->isVectorTy()) {
3240 ++GTI;
3241 if (!GTI.isStruct())
3242 return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3243 GEP.getPointerOperand(),
3244 drop_begin(Indices), "",
3245 GEP.getNoWrapFlags()));
3246 }
3247
3248 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3249 // Note that this looses information about undef lanes; we run it after
3250 // demanded bits to partially mitigate that loss.
3251 if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3252 return Op->getType()->isVectorTy() && getSplatValue(Op);
3253 })) {
3254 SmallVector<Value *> NewOps;
3255 for (auto &Op : GEP.operands()) {
3256 if (Op->getType()->isVectorTy())
3257 if (Value *Scalar = getSplatValue(Op)) {
3258 NewOps.push_back(Scalar);
3259 continue;
3260 }
3261 NewOps.push_back(Op);
3262 }
3263
3264 Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3265 ArrayRef(NewOps).drop_front(), GEP.getName(),
3266 GEP.getNoWrapFlags());
3267 if (!Res->getType()->isVectorTy()) {
3268 ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3269 Res = Builder.CreateVectorSplat(EC, Res);
3270 }
3271 return replaceInstUsesWith(GEP, Res);
3272 }
3273
3274 bool SeenNonZeroIndex = false;
3275 for (auto [IdxNum, Idx] : enumerate(Indices)) {
3276 auto *C = dyn_cast<Constant>(Idx);
3277 if (C && C->isNullValue())
3278 continue;
3279
3280 if (!SeenNonZeroIndex) {
3281 SeenNonZeroIndex = true;
3282 continue;
3283 }
3284
3285 // GEP has multiple non-zero indices: Split it.
3286 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
3287 Value *FrontGEP =
3288 Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
3289 GEP.getName() + ".split", GEP.getNoWrapFlags());
3290
3291 SmallVector<Value *> BackIndices;
3292 BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
3293 append_range(BackIndices, drop_begin(Indices, IdxNum));
3295 GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
3296 BackIndices, GEP.getNoWrapFlags());
3297 }
3298
3299 // Check to see if the inputs to the PHI node are getelementptr instructions.
3300 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
3301 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3302 return replaceOperand(GEP, 0, NewPtrOp);
3303 }
3304
3305 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
3306 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3307 return I;
3308
3309 if (GEP.getNumIndices() == 1) {
3310 unsigned AS = GEP.getPointerAddressSpace();
3311 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
3312 DL.getIndexSizeInBits(AS)) {
3313 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
3314
3315 if (TyAllocSize == 1) {
3316 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3317 // but only if the result pointer is only used as if it were an integer,
3318 // or both point to the same underlying object (otherwise provenance is
3319 // not necessarily retained).
3320 Value *X = GEP.getPointerOperand();
3321 Value *Y;
3322 if (match(GEP.getOperand(1),
3324 GEPType == Y->getType()) {
3325 bool HasSameUnderlyingObject =
3327 bool Changed = false;
3328 GEP.replaceUsesWithIf(Y, [&](Use &U) {
3329 bool ShouldReplace = HasSameUnderlyingObject ||
3330 isa<ICmpInst>(U.getUser()) ||
3331 isa<PtrToIntInst>(U.getUser());
3332 Changed |= ShouldReplace;
3333 return ShouldReplace;
3334 });
3335 return Changed ? &GEP : nullptr;
3336 }
3337 } else if (auto *ExactIns =
3338 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
3339 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3340 Value *V;
3341 if (ExactIns->isExact()) {
3342 if ((has_single_bit(TyAllocSize) &&
3343 match(GEP.getOperand(1),
3344 m_Shr(m_Value(V),
3345 m_SpecificInt(countr_zero(TyAllocSize))))) ||
3346 match(GEP.getOperand(1),
3347 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
3348 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3349 GEP.getPointerOperand(), V,
3350 GEP.getNoWrapFlags());
3351 }
3352 }
3353 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3354 // Try to canonicalize non-i8 element type to i8 if the index is an
3355 // exact instruction. If the index is an exact instruction (div/shr)
3356 // with a constant RHS, we can fold the non-i8 element scale into the
3357 // div/shr (similiar to the mul case, just inverted).
3358 const APInt *C;
3359 std::optional<APInt> NewC;
3360 if (has_single_bit(TyAllocSize) &&
3361 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
3362 C->uge(countr_zero(TyAllocSize)))
3363 NewC = *C - countr_zero(TyAllocSize);
3364 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
3365 APInt Quot;
3366 uint64_t Rem;
3367 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
3368 if (Rem == 0)
3369 NewC = Quot;
3370 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
3371 APInt Quot;
3372 int64_t Rem;
3373 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
3374 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3375 if (!Quot.isAllOnes() && Rem == 0)
3376 NewC = Quot;
3377 }
3378
3379 if (NewC.has_value()) {
3380 Value *NewOp = Builder.CreateBinOp(
3381 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
3382 ConstantInt::get(V->getType(), *NewC));
3383 cast<BinaryOperator>(NewOp)->setIsExact();
3384 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3385 GEP.getPointerOperand(), NewOp,
3386 GEP.getNoWrapFlags());
3387 }
3388 }
3389 }
3390 }
3391 }
3392 // We do not handle pointer-vector geps here.
3393 if (GEPType->isVectorTy())
3394 return nullptr;
3395
3396 if (!GEP.isInBounds()) {
3397 unsigned IdxWidth =
3398 DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
3399 APInt BasePtrOffset(IdxWidth, 0);
3400 Value *UnderlyingPtrOp =
3401 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
3402 bool CanBeNull, CanBeFreed;
3403 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3404 DL, CanBeNull, CanBeFreed);
3405 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3406 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3407 BasePtrOffset.isNonNegative()) {
3408 APInt AllocSize(IdxWidth, DerefBytes);
3409 if (BasePtrOffset.ule(AllocSize)) {
3411 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3412 }
3413 }
3414 }
3415 }
3416
3417 // nusw + nneg -> nuw
3418 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3419 all_of(GEP.indices(), [&](Value *Idx) {
3420 return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
3421 })) {
3422 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3423 return &GEP;
3424 }
3425
3426 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3427 // to do this after having tried to derive "nuw" above.
3428 if (GEP.getNumIndices() == 1) {
3429 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3430 // geps if transforming into (gep (gep p, x), y).
3431 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3432 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3433 // that x + y does not have unsigned wrap.
3434 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3435 return GEP.getNoWrapFlags();
3436 return GEPNoWrapFlags::none();
3437 };
3438
3439 // Try to replace ADD + GEP with GEP + GEP.
3440 Value *Idx1, *Idx2;
3441 if (match(GEP.getOperand(1),
3442 m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
3443 // %idx = add i64 %idx1, %idx2
3444 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3445 // as:
3446 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3447 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3448 bool NUW = match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()));
3449 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3450 auto *NewPtr =
3451 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3452 Idx1, "", NWFlags);
3453 return replaceInstUsesWith(GEP,
3454 Builder.CreateGEP(GEP.getSourceElementType(),
3455 NewPtr, Idx2, "", NWFlags));
3456 }
3457 ConstantInt *C;
3458 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAddLike(
3459 m_Value(Idx1), m_ConstantInt(C))))))) {
3460 // %add = add nsw i32 %idx1, idx2
3461 // %sidx = sext i32 %add to i64
3462 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3463 // as:
3464 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3465 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3466 bool NUW = match(GEP.getOperand(1),
3468 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3469 auto *NewPtr = Builder.CreateGEP(
3470 GEP.getSourceElementType(), GEP.getPointerOperand(),
3471 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "", NWFlags);
3472 return replaceInstUsesWith(
3473 GEP,
3474 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3475 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3476 "", NWFlags));
3477 }
3478 }
3479
3481 return R;
3482
3483 return nullptr;
3484}
3485
3487 Instruction *AI) {
3489 return true;
3490 if (auto *LI = dyn_cast<LoadInst>(V))
3491 return isa<GlobalVariable>(LI->getPointerOperand());
3492 // Two distinct allocations will never be equal.
3493 return isAllocLikeFn(V, &TLI) && V != AI;
3494}
3495
3496/// Given a call CB which uses an address UsedV, return true if we can prove the
3497/// call's only possible effect is storing to V.
3498static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3499 const TargetLibraryInfo &TLI) {
3500 if (!CB.use_empty())
3501 // TODO: add recursion if returned attribute is present
3502 return false;
3503
3504 if (CB.isTerminator())
3505 // TODO: remove implementation restriction
3506 return false;
3507
3508 if (!CB.willReturn() || !CB.doesNotThrow())
3509 return false;
3510
3511 // If the only possible side effect of the call is writing to the alloca,
3512 // and the result isn't used, we can safely remove any reads implied by the
3513 // call including those which might read the alloca itself.
3514 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3515 return Dest && Dest->Ptr == UsedV;
3516}
3517
3518static std::optional<ModRefInfo>
3520 const TargetLibraryInfo &TLI, bool KnowInit) {
3522 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3523 Worklist.push_back(AI);
3525
3526 do {
3527 Instruction *PI = Worklist.pop_back_val();
3528 for (User *U : PI->users()) {
3530 switch (I->getOpcode()) {
3531 default:
3532 // Give up the moment we see something we can't handle.
3533 return std::nullopt;
3534
3535 case Instruction::AddrSpaceCast:
3536 case Instruction::BitCast:
3537 case Instruction::GetElementPtr:
3538 Users.emplace_back(I);
3539 Worklist.push_back(I);
3540 continue;
3541
3542 case Instruction::ICmp: {
3543 ICmpInst *ICI = cast<ICmpInst>(I);
3544 // We can fold eq/ne comparisons with null to false/true, respectively.
3545 // We also fold comparisons in some conditions provided the alloc has
3546 // not escaped (see isNeverEqualToUnescapedAlloc).
3547 if (!ICI->isEquality())
3548 return std::nullopt;
3549 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3550 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3551 return std::nullopt;
3552
3553 // Do not fold compares to aligned_alloc calls, as they may have to
3554 // return null in case the required alignment cannot be satisfied,
3555 // unless we can prove that both alignment and size are valid.
3556 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3557 // Check if alignment and size of a call to aligned_alloc is valid,
3558 // that is alignment is a power-of-2 and the size is a multiple of the
3559 // alignment.
3560 const APInt *Alignment;
3561 const APInt *Size;
3562 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3563 match(CB->getArgOperand(1), m_APInt(Size)) &&
3564 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3565 };
3566 auto *CB = dyn_cast<CallBase>(AI);
3567 LibFunc TheLibFunc;
3568 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3569 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3570 !AlignmentAndSizeKnownValid(CB))
3571 return std::nullopt;
3572 Users.emplace_back(I);
3573 continue;
3574 }
3575
3576 case Instruction::Call:
3577 // Ignore no-op and store intrinsics.
3579 switch (II->getIntrinsicID()) {
3580 default:
3581 return std::nullopt;
3582
3583 case Intrinsic::memmove:
3584 case Intrinsic::memcpy:
3585 case Intrinsic::memset: {
3587 if (MI->isVolatile())
3588 return std::nullopt;
3589 // Note: this could also be ModRef, but we can still interpret that
3590 // as just Mod in that case.
3591 ModRefInfo NewAccess =
3592 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3593 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3594 return std::nullopt;
3595 Access |= NewAccess;
3596 [[fallthrough]];
3597 }
3598 case Intrinsic::assume:
3599 case Intrinsic::invariant_start:
3600 case Intrinsic::invariant_end:
3601 case Intrinsic::lifetime_start:
3602 case Intrinsic::lifetime_end:
3603 case Intrinsic::objectsize:
3604 Users.emplace_back(I);
3605 continue;
3606 case Intrinsic::launder_invariant_group:
3607 case Intrinsic::strip_invariant_group:
3608 Users.emplace_back(I);
3609 Worklist.push_back(I);
3610 continue;
3611 }
3612 }
3613
3614 if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3615 getAllocationFamily(I, &TLI) == Family) {
3616 Users.emplace_back(I);
3617 continue;
3618 }
3619
3620 if (Family && getReallocatedOperand(cast<CallBase>(I)) == PI &&
3621 getAllocationFamily(I, &TLI) == Family) {
3622 Users.emplace_back(I);
3623 Worklist.push_back(I);
3624 continue;
3625 }
3626
3627 if (!isRefSet(Access) &&
3628 isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3630 Users.emplace_back(I);
3631 continue;
3632 }
3633
3634 return std::nullopt;
3635
3636 case Instruction::Store: {
3638 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3639 return std::nullopt;
3640 if (isRefSet(Access))
3641 return std::nullopt;
3643 Users.emplace_back(I);
3644 continue;
3645 }
3646
3647 case Instruction::Load: {
3648 LoadInst *LI = cast<LoadInst>(I);
3649 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3650 return std::nullopt;
3651 if (isModSet(Access))
3652 return std::nullopt;
3654 Users.emplace_back(I);
3655 continue;
3656 }
3657 }
3658 llvm_unreachable("missing a return?");
3659 }
3660 } while (!Worklist.empty());
3661
3663 return Access;
3664}
3665
3668
3669 // If we have a malloc call which is only used in any amount of comparisons to
3670 // null and free calls, delete the calls and replace the comparisons with true
3671 // or false as appropriate.
3672
3673 // This is based on the principle that we can substitute our own allocation
3674 // function (which will never return null) rather than knowledge of the
3675 // specific function being called. In some sense this can change the permitted
3676 // outputs of a program (when we convert a malloc to an alloca, the fact that
3677 // the allocation is now on the stack is potentially visible, for example),
3678 // but we believe in a permissible manner.
3680
3681 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3682 // before each store.
3684 std::unique_ptr<DIBuilder> DIB;
3685 if (isa<AllocaInst>(MI)) {
3686 findDbgUsers(&MI, DVRs);
3687 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3688 }
3689
3690 // Determine what getInitialValueOfAllocation would return without actually
3691 // allocating the result.
3692 bool KnowInitUndef = false;
3693 bool KnowInitZero = false;
3694 Constant *Init =
3696 if (Init) {
3697 if (isa<UndefValue>(Init))
3698 KnowInitUndef = true;
3699 else if (Init->isNullValue())
3700 KnowInitZero = true;
3701 }
3702 // The various sanitizers don't actually return undef memory, but rather
3703 // memory initialized with special forms of runtime poison
3704 auto &F = *MI.getFunction();
3705 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
3706 F.hasFnAttribute(Attribute::SanitizeAddress))
3707 KnowInitUndef = false;
3708
3709 auto Removable =
3710 isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef);
3711 if (Removable) {
3712 for (WeakTrackingVH &User : Users) {
3713 // Lowering all @llvm.objectsize and MTI calls first because they may use
3714 // a bitcast/GEP of the alloca we are removing.
3715 if (!User)
3716 continue;
3717
3719
3721 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3722 SmallVector<Instruction *> InsertedInstructions;
3723 Value *Result = lowerObjectSizeCall(
3724 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3725 for (Instruction *Inserted : InsertedInstructions)
3726 Worklist.add(Inserted);
3727 replaceInstUsesWith(*I, Result);
3729 User = nullptr; // Skip examining in the next loop.
3730 continue;
3731 }
3732 if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
3733 if (KnowInitZero && isRefSet(*Removable)) {
3735 Builder.SetInsertPoint(MTI);
3736 auto *M = Builder.CreateMemSet(
3737 MTI->getRawDest(),
3738 ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
3739 MTI->getLength(), MTI->getDestAlign());
3740 M->copyMetadata(*MTI);
3741 }
3742 }
3743 }
3744 }
3745 for (WeakTrackingVH &User : Users) {
3746 if (!User)
3747 continue;
3748
3750
3751 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3753 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3754 C->isFalseWhenEqual()));
3755 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3756 for (auto *DVR : DVRs)
3757 if (DVR->isAddressOfVariable())
3759 } else {
3760 // Casts, GEP, or anything else: we're about to delete this instruction,
3761 // so it can not have any valid uses.
3762 Constant *Replace;
3763 if (isa<LoadInst>(I)) {
3764 assert(KnowInitZero || KnowInitUndef);
3765 Replace = KnowInitUndef ? UndefValue::get(I->getType())
3766 : Constant::getNullValue(I->getType());
3767 } else
3768 Replace = PoisonValue::get(I->getType());
3769 replaceInstUsesWith(*I, Replace);
3770 }
3772 }
3773
3775 // Replace invoke with a NOP intrinsic to maintain the original CFG
3776 Module *M = II->getModule();
3777 Function *F = Intrinsic::getOrInsertDeclaration(M, Intrinsic::donothing);
3778 auto *NewII = InvokeInst::Create(
3779 F, II->getNormalDest(), II->getUnwindDest(), {}, "", II->getParent());
3780 NewII->setDebugLoc(II->getDebugLoc());
3781 }
3782
3783 // Remove debug intrinsics which describe the value contained within the
3784 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3785 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3786 //
3787 // ```
3788 // define void @foo(i32 %0) {
3789 // %a = alloca i32 ; Deleted.
3790 // store i32 %0, i32* %a
3791 // dbg.value(i32 %0, "arg0") ; Not deleted.
3792 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3793 // call void @trivially_inlinable_no_op(i32* %a)
3794 // ret void
3795 // }
3796 // ```
3797 //
3798 // This may not be required if we stop describing the contents of allocas
3799 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3800 // the LowerDbgDeclare utility.
3801 //
3802 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3803 // "arg0" dbg.value may be stale after the call. However, failing to remove
3804 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3805 //
3806 // FIXME: the Assignment Tracking project has now likely made this
3807 // redundant (and it's sometimes harmful).
3808 for (auto *DVR : DVRs)
3809 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3810 DVR->eraseFromParent();
3811
3812 return eraseInstFromFunction(MI);
3813 }
3814 return nullptr;
3815}
3816
3817/// Move the call to free before a NULL test.
3818///
3819/// Check if this free is accessed after its argument has been test
3820/// against NULL (property 0).
3821/// If yes, it is legal to move this call in its predecessor block.
3822///
3823/// The move is performed only if the block containing the call to free
3824/// will be removed, i.e.:
3825/// 1. it has only one predecessor P, and P has two successors
3826/// 2. it contains the call, noops, and an unconditional branch
3827/// 3. its successor is the same as its predecessor's successor
3828///
3829/// The profitability is out-of concern here and this function should
3830/// be called only if the caller knows this transformation would be
3831/// profitable (e.g., for code size).
3833 const DataLayout &DL) {
3834 Value *Op = FI.getArgOperand(0);
3835 BasicBlock *FreeInstrBB = FI.getParent();
3836 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3837
3838 // Validate part of constraint #1: Only one predecessor
3839 // FIXME: We can extend the number of predecessor, but in that case, we
3840 // would duplicate the call to free in each predecessor and it may
3841 // not be profitable even for code size.
3842 if (!PredBB)
3843 return nullptr;
3844
3845 // Validate constraint #2: Does this block contains only the call to
3846 // free, noops, and an unconditional branch?
3847 BasicBlock *SuccBB;
3848 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3849 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3850 return nullptr;
3851
3852 // If there are only 2 instructions in the block, at this point,
3853 // this is the call to free and unconditional.
3854 // If there are more than 2 instructions, check that they are noops
3855 // i.e., they won't hurt the performance of the generated code.
3856 if (FreeInstrBB->size() != 2) {
3857 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3858 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3859 continue;
3860 auto *Cast = dyn_cast<CastInst>(&Inst);
3861 if (!Cast || !Cast->isNoopCast(DL))
3862 return nullptr;
3863 }
3864 }
3865 // Validate the rest of constraint #1 by matching on the pred branch.
3866 Instruction *TI = PredBB->getTerminator();
3867 BasicBlock *TrueBB, *FalseBB;
3868 CmpPredicate Pred;
3869 if (!match(TI, m_Br(m_ICmp(Pred,
3871 m_Specific(Op->stripPointerCasts())),
3872 m_Zero()),
3873 TrueBB, FalseBB)))
3874 return nullptr;
3875 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3876 return nullptr;
3877
3878 // Validate constraint #3: Ensure the null case just falls through.
3879 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3880 return nullptr;
3881 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3882 "Broken CFG: missing edge from predecessor to successor");
3883
3884 // At this point, we know that everything in FreeInstrBB can be moved
3885 // before TI.
3886 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3887 if (&Instr == FreeInstrBBTerminator)
3888 break;
3889 Instr.moveBeforePreserving(TI->getIterator());
3890 }
3891 assert(FreeInstrBB->size() == 1 &&
3892 "Only the branch instruction should remain");
3893
3894 // Now that we've moved the call to free before the NULL check, we have to
3895 // remove any attributes on its parameter that imply it's non-null, because
3896 // those attributes might have only been valid because of the NULL check, and
3897 // we can get miscompiles if we keep them. This is conservative if non-null is
3898 // also implied by something other than the NULL check, but it's guaranteed to
3899 // be correct, and the conservativeness won't matter in practice, since the
3900 // attributes are irrelevant for the call to free itself and the pointer
3901 // shouldn't be used after the call.
3902 AttributeList Attrs = FI.getAttributes();
3903 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3904 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3905 if (Dereferenceable.isValid()) {
3906 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3907 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3908 Attribute::Dereferenceable);
3909 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3910 }
3911 FI.setAttributes(Attrs);
3912
3913 return &FI;
3914}
3915
3917 // free undef -> unreachable.
3918 if (isa<UndefValue>(Op)) {
3919 // Leave a marker since we can't modify the CFG here.
3921 return eraseInstFromFunction(FI);
3922 }
3923
3924 // If we have 'free null' delete the instruction. This can happen in stl code
3925 // when lots of inlining happens.
3927 return eraseInstFromFunction(FI);
3928
3929 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3930 // realloc() entirely.
3932 if (CI && CI->hasOneUse())
3933 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3934 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3935
3936 // If we optimize for code size, try to move the call to free before the null
3937 // test so that simplify cfg can remove the empty block and dead code
3938 // elimination the branch. I.e., helps to turn something like:
3939 // if (foo) free(foo);
3940 // into
3941 // free(foo);
3942 //
3943 // Note that we can only do this for 'free' and not for any flavor of
3944 // 'operator delete'; there is no 'operator delete' symbol for which we are
3945 // permitted to invent a call, even if we're passing in a null pointer.
3946 if (MinimizeSize) {
3947 LibFunc Func;
3948 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3950 return I;
3951 }
3952
3953 return nullptr;
3954}
3955
3957 Value *RetVal = RI.getReturnValue();
3958 if (!RetVal)
3959 return nullptr;
3960
3961 Function *F = RI.getFunction();
3962 Type *RetTy = RetVal->getType();
3963 if (RetTy->isPointerTy()) {
3964 bool HasDereferenceable =
3965 F->getAttributes().getRetDereferenceableBytes() > 0;
3966 if (F->hasRetAttribute(Attribute::NonNull) ||
3967 (HasDereferenceable &&
3969 if (Value *V = simplifyNonNullOperand(RetVal, HasDereferenceable))
3970 return replaceOperand(RI, 0, V);
3971 }
3972 }
3973
3974 if (!AttributeFuncs::isNoFPClassCompatibleType(RetTy))
3975 return nullptr;
3976
3977 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3978 if (ReturnClass == fcNone)
3979 return nullptr;
3980
3981 KnownFPClass KnownClass;
3982 Value *Simplified =
3983 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, &RI);
3984 if (!Simplified)
3985 return nullptr;
3986
3987 return ReturnInst::Create(RI.getContext(), Simplified);
3988}
3989
3990// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3992 // Try to remove the previous instruction if it must lead to unreachable.
3993 // This includes instructions like stores and "llvm.assume" that may not get
3994 // removed by simple dead code elimination.
3995 bool Changed = false;
3996 while (Instruction *Prev = I.getPrevNode()) {
3997 // While we theoretically can erase EH, that would result in a block that
3998 // used to start with an EH no longer starting with EH, which is invalid.
3999 // To make it valid, we'd need to fixup predecessors to no longer refer to
4000 // this block, but that changes CFG, which is not allowed in InstCombine.
4001 if (Prev->isEHPad())
4002 break; // Can not drop any more instructions. We're done here.
4003
4005 break; // Can not drop any more instructions. We're done here.
4006 // Otherwise, this instruction can be freely erased,
4007 // even if it is not side-effect free.
4008
4009 // A value may still have uses before we process it here (for example, in
4010 // another unreachable block), so convert those to poison.
4011 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
4012 eraseInstFromFunction(*Prev);
4013 Changed = true;
4014 }
4015 return Changed;
4016}
4017
4022
4024 assert(BI.isUnconditional() && "Only for unconditional branches.");
4025
4026 // If this store is the second-to-last instruction in the basic block
4027 // (excluding debug info) and if the block ends with
4028 // an unconditional branch, try to move the store to the successor block.
4029
4030 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4031 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4032 do {
4033 if (BBI != FirstInstr)
4034 --BBI;
4035 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4036
4037 return dyn_cast<StoreInst>(BBI);
4038 };
4039
4040 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4042 return &BI;
4043
4044 return nullptr;
4045}
4046
4049 if (!DeadEdges.insert({From, To}).second)
4050 return;
4051
4052 // Replace phi node operands in successor with poison.
4053 for (PHINode &PN : To->phis())
4054 for (Use &U : PN.incoming_values())
4055 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
4056 replaceUse(U, PoisonValue::get(PN.getType()));
4057 addToWorklist(&PN);
4058 MadeIRChange = true;
4059 }
4060
4061 Worklist.push_back(To);
4062}
4063
4064// Under the assumption that I is unreachable, remove it and following
4065// instructions. Changes are reported directly to MadeIRChange.
4068 BasicBlock *BB = I->getParent();
4069 for (Instruction &Inst : make_early_inc_range(
4070 make_range(std::next(BB->getTerminator()->getReverseIterator()),
4071 std::next(I->getReverseIterator())))) {
4072 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4073 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
4074 MadeIRChange = true;
4075 }
4076 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4077 continue;
4078 // RemoveDIs: erase debug-info on this instruction manually.
4079 Inst.dropDbgRecords();
4081 MadeIRChange = true;
4082 }
4083
4086 MadeIRChange = true;
4087 for (Value *V : Changed)
4089 }
4090
4091 // Handle potentially dead successors.
4092 for (BasicBlock *Succ : successors(BB))
4093 addDeadEdge(BB, Succ, Worklist);
4094}
4095
4098 while (!Worklist.empty()) {
4099 BasicBlock *BB = Worklist.pop_back_val();
4100 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
4101 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4102 }))
4103 continue;
4104
4106 }
4107}
4108
4110 BasicBlock *LiveSucc) {
4112 for (BasicBlock *Succ : successors(BB)) {
4113 // The live successor isn't dead.
4114 if (Succ == LiveSucc)
4115 continue;
4116
4117 addDeadEdge(BB, Succ, Worklist);
4118 }
4119
4121}
4122
4124 if (BI.isUnconditional())
4126
4127 // Change br (not X), label True, label False to: br X, label False, True
4128 Value *Cond = BI.getCondition();
4129 Value *X;
4130 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
4131 // Swap Destinations and condition...
4132 BI.swapSuccessors();
4133 if (BPI)
4134 BPI->swapSuccEdgesProbabilities(BI.getParent());
4135 return replaceOperand(BI, 0, X);
4136 }
4137
4138 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4139 // This is done by inverting the condition and swapping successors:
4140 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4141 Value *Y;
4142 if (isa<SelectInst>(Cond) &&
4143 match(Cond,
4145 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
4146 Value *Or = Builder.CreateLogicalOr(NotX, Y);
4147 BI.swapSuccessors();
4148 if (BPI)
4149 BPI->swapSuccEdgesProbabilities(BI.getParent());
4150 return replaceOperand(BI, 0, Or);
4151 }
4152
4153 // If the condition is irrelevant, remove the use so that other
4154 // transforms on the condition become more effective.
4155 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
4156 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
4157
4158 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4159 CmpPredicate Pred;
4160 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
4161 !isCanonicalPredicate(Pred)) {
4162 // Swap destinations and condition.
4163 auto *Cmp = cast<CmpInst>(Cond);
4164 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
4165 BI.swapSuccessors();
4166 if (BPI)
4167 BPI->swapSuccEdgesProbabilities(BI.getParent());
4168 Worklist.push(Cmp);
4169 return &BI;
4170 }
4171
4172 if (isa<UndefValue>(Cond)) {
4173 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
4174 return nullptr;
4175 }
4176 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4178 BI.getSuccessor(!CI->getZExtValue()));
4179 return nullptr;
4180 }
4181
4182 // Replace all dominated uses of the condition with true/false
4183 // Ignore constant expressions to avoid iterating over uses on other
4184 // functions.
4185 if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
4186 for (auto &U : make_early_inc_range(Cond->uses())) {
4187 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
4188 if (DT.dominates(Edge0, U)) {
4189 replaceUse(U, ConstantInt::getTrue(Cond->getType()));
4190 addToWorklist(cast<Instruction>(U.getUser()));
4191 continue;
4192 }
4193 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(1));
4194 if (DT.dominates(Edge1, U)) {
4195 replaceUse(U, ConstantInt::getFalse(Cond->getType()));
4196 addToWorklist(cast<Instruction>(U.getUser()));
4197 }
4198 }
4199 }
4200
4201 DC.registerBranch(&BI);
4202 return nullptr;
4203}
4204
4205// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4206// we can prove that both (switch C) and (switch X) go to the default when cond
4207// is false/true.
4210 bool IsTrueArm) {
4211 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4212 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
4213 if (!C)
4214 return nullptr;
4215
4216 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4217 if (CstBB != SI.getDefaultDest())
4218 return nullptr;
4219 Value *X = Select->getOperand(3 - CstOpIdx);
4220 CmpPredicate Pred;
4221 const APInt *RHSC;
4222 if (!match(Select->getCondition(),
4223 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
4224 return nullptr;
4225 if (IsTrueArm)
4226 Pred = ICmpInst::getInversePredicate(Pred);
4227
4228 // See whether we can replace the select with X
4230 for (auto Case : SI.cases())
4231 if (!CR.contains(Case.getCaseValue()->getValue()))
4232 return nullptr;
4233
4234 return X;
4235}
4236
4238 Value *Cond = SI.getCondition();
4239 Value *Op0;
4240 ConstantInt *AddRHS;
4241 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
4242 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
4243 for (auto Case : SI.cases()) {
4244 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
4245 assert(isa<ConstantInt>(NewCase) &&
4246 "Result of expression should be constant");
4247 Case.setValue(cast<ConstantInt>(NewCase));
4248 }
4249 return replaceOperand(SI, 0, Op0);
4250 }
4251
4252 ConstantInt *SubLHS;
4253 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
4254 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
4255 for (auto Case : SI.cases()) {
4256 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
4257 assert(isa<ConstantInt>(NewCase) &&
4258 "Result of expression should be constant");
4259 Case.setValue(cast<ConstantInt>(NewCase));
4260 }
4261 return replaceOperand(SI, 0, Op0);
4262 }
4263
4264 uint64_t ShiftAmt;
4265 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
4266 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4267 all_of(SI.cases(), [&](const auto &Case) {
4268 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4269 })) {
4270 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4272 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4273 Shl->hasOneUse()) {
4274 Value *NewCond = Op0;
4275 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4276 // If the shift may wrap, we need to mask off the shifted bits.
4277 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4278 NewCond = Builder.CreateAnd(
4279 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
4280 }
4281 for (auto Case : SI.cases()) {
4282 const APInt &CaseVal = Case.getCaseValue()->getValue();
4283 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4284 : CaseVal.lshr(ShiftAmt);
4285 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
4286 }
4287 return replaceOperand(SI, 0, NewCond);
4288 }
4289 }
4290
4291 // Fold switch(zext/sext(X)) into switch(X) if possible.
4292 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
4293 bool IsZExt = isa<ZExtInst>(Cond);
4294 Type *SrcTy = Op0->getType();
4295 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4296
4297 if (all_of(SI.cases(), [&](const auto &Case) {
4298 const APInt &CaseVal = Case.getCaseValue()->getValue();
4299 return IsZExt ? CaseVal.isIntN(NewWidth)
4300 : CaseVal.isSignedIntN(NewWidth);
4301 })) {
4302 for (auto &Case : SI.cases()) {
4303 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4304 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4305 }
4306 return replaceOperand(SI, 0, Op0);
4307 }
4308 }
4309
4310 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4311 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
4312 if (Value *V =
4313 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4314 return replaceOperand(SI, 0, V);
4315 if (Value *V =
4316 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4317 return replaceOperand(SI, 0, V);
4318 }
4319
4320 KnownBits Known = computeKnownBits(Cond, &SI);
4321 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4322 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4323
4324 // Compute the number of leading bits we can ignore.
4325 // TODO: A better way to determine this would use ComputeNumSignBits().
4326 for (const auto &C : SI.cases()) {
4327 LeadingKnownZeros =
4328 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
4329 LeadingKnownOnes =
4330 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
4331 }
4332
4333 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
4334
4335 // Shrink the condition operand if the new type is smaller than the old type.
4336 // But do not shrink to a non-standard type, because backend can't generate
4337 // good code for that yet.
4338 // TODO: We can make it aggressive again after fixing PR39569.
4339 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4340 shouldChangeType(Known.getBitWidth(), NewWidth)) {
4341 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
4342 Builder.SetInsertPoint(&SI);
4343 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
4344
4345 for (auto Case : SI.cases()) {
4346 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4347 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4348 }
4349 return replaceOperand(SI, 0, NewCond);
4350 }
4351
4352 if (isa<UndefValue>(Cond)) {
4353 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
4354 return nullptr;
4355 }
4356 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4358 SI.findCaseValue(CI)->getCaseSuccessor());
4359 return nullptr;
4360 }
4361
4362 return nullptr;
4363}
4364
4366InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4368 if (!WO)
4369 return nullptr;
4370
4371 Intrinsic::ID OvID = WO->getIntrinsicID();
4372 const APInt *C = nullptr;
4373 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
4374 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4375 OvID == Intrinsic::umul_with_overflow)) {
4376 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4377 if (C->isAllOnes())
4378 return BinaryOperator::CreateNeg(WO->getLHS());
4379 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4380 if (C->isPowerOf2()) {
4381 return BinaryOperator::CreateShl(
4382 WO->getLHS(),
4383 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
4384 }
4385 }
4386 }
4387
4388 // We're extracting from an overflow intrinsic. See if we're the only user.
4389 // That allows us to simplify multiple result intrinsics to simpler things
4390 // that just get one value.
4391 if (!WO->hasOneUse())
4392 return nullptr;
4393
4394 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4395 // and replace it with a traditional binary instruction.
4396 if (*EV.idx_begin() == 0) {
4397 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4398 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4399 // Replace the old instruction's uses with poison.
4400 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
4402 return BinaryOperator::Create(BinOp, LHS, RHS);
4403 }
4404
4405 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4406
4407 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4408 if (OvID == Intrinsic::usub_with_overflow)
4409 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4410
4411 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4412 // +1 is not possible because we assume signed values.
4413 if (OvID == Intrinsic::smul_with_overflow &&
4414 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
4415 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
4416
4417 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4418 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4419 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4420 // Only handle even bitwidths for performance reasons.
4421 if (BitWidth % 2 == 0)
4422 return new ICmpInst(
4423 ICmpInst::ICMP_UGT, WO->getLHS(),
4424 ConstantInt::get(WO->getLHS()->getType(),
4426 }
4427
4428 // If only the overflow result is used, and the right hand side is a
4429 // constant (or constant splat), we can remove the intrinsic by directly
4430 // checking for overflow.
4431 if (C) {
4432 // Compute the no-wrap range for LHS given RHS=C, then construct an
4433 // equivalent icmp, potentially using an offset.
4434 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4435 WO->getBinaryOp(), *C, WO->getNoWrapKind());
4436
4437 CmpInst::Predicate Pred;
4438 APInt NewRHSC, Offset;
4439 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
4440 auto *OpTy = WO->getRHS()->getType();
4441 auto *NewLHS = WO->getLHS();
4442 if (Offset != 0)
4443 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
4444 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
4445 ConstantInt::get(OpTy, NewRHSC));
4446 }
4447
4448 return nullptr;
4449}
4450
4453 InstCombiner::BuilderTy &Builder) {
4454 // Helper to fold frexp of select to select of frexp.
4455
4456 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4457 return nullptr;
4459 Value *TrueVal = SelectInst->getTrueValue();
4460 Value *FalseVal = SelectInst->getFalseValue();
4461
4462 const APFloat *ConstVal = nullptr;
4463 Value *VarOp = nullptr;
4464 bool ConstIsTrue = false;
4465
4466 if (match(TrueVal, m_APFloat(ConstVal))) {
4467 VarOp = FalseVal;
4468 ConstIsTrue = true;
4469 } else if (match(FalseVal, m_APFloat(ConstVal))) {
4470 VarOp = TrueVal;
4471 ConstIsTrue = false;
4472 } else {
4473 return nullptr;
4474 }
4475
4476 Builder.SetInsertPoint(&EV);
4477
4478 CallInst *NewFrexp =
4479 Builder.CreateCall(FrexpCall->getCalledFunction(), {VarOp}, "frexp");
4480 NewFrexp->copyIRFlags(FrexpCall);
4481
4482 Value *NewEV = Builder.CreateExtractValue(NewFrexp, 0, "mantissa");
4483
4484 int Exp;
4485 APFloat Mantissa = frexp(*ConstVal, Exp, APFloat::rmNearestTiesToEven);
4486
4487 Constant *ConstantMantissa = ConstantFP::get(TrueVal->getType(), Mantissa);
4488
4489 Value *NewSel = Builder.CreateSelectFMF(
4490 Cond, ConstIsTrue ? ConstantMantissa : NewEV,
4491 ConstIsTrue ? NewEV : ConstantMantissa, SelectInst, "select.frexp");
4492 return NewSel;
4493}
4495 Value *Agg = EV.getAggregateOperand();
4496
4497 if (!EV.hasIndices())
4498 return replaceInstUsesWith(EV, Agg);
4499
4500 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
4501 SQ.getWithInstruction(&EV)))
4502 return replaceInstUsesWith(EV, V);
4503
4504 Value *Cond, *TrueVal, *FalseVal;
4506 m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal)))))) {
4507 auto *SelInst =
4508 cast<SelectInst>(cast<IntrinsicInst>(Agg)->getArgOperand(0));
4509 if (Value *Result =
4510 foldFrexpOfSelect(EV, cast<IntrinsicInst>(Agg), SelInst, Builder))
4511 return replaceInstUsesWith(EV, Result);
4512 }
4514 // We're extracting from an insertvalue instruction, compare the indices
4515 const unsigned *exti, *exte, *insi, *inse;
4516 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4517 exte = EV.idx_end(), inse = IV->idx_end();
4518 exti != exte && insi != inse;
4519 ++exti, ++insi) {
4520 if (*insi != *exti)
4521 // The insert and extract both reference distinctly different elements.
4522 // This means the extract is not influenced by the insert, and we can
4523 // replace the aggregate operand of the extract with the aggregate
4524 // operand of the insert. i.e., replace
4525 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4526 // %E = extractvalue { i32, { i32 } } %I, 0
4527 // with
4528 // %E = extractvalue { i32, { i32 } } %A, 0
4529 return ExtractValueInst::Create(IV->getAggregateOperand(),
4530 EV.getIndices());
4531 }
4532 if (exti == exte && insi == inse)
4533 // Both iterators are at the end: Index lists are identical. Replace
4534 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4535 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4536 // with "i32 42"
4537 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
4538 if (exti == exte) {
4539 // The extract list is a prefix of the insert list. i.e. replace
4540 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4541 // %E = extractvalue { i32, { i32 } } %I, 1
4542 // with
4543 // %X = extractvalue { i32, { i32 } } %A, 1
4544 // %E = insertvalue { i32 } %X, i32 42, 0
4545 // by switching the order of the insert and extract (though the
4546 // insertvalue should be left in, since it may have other uses).
4547 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4548 EV.getIndices());
4549 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4550 ArrayRef(insi, inse));
4551 }
4552 if (insi == inse)
4553 // The insert list is a prefix of the extract list
4554 // We can simply remove the common indices from the extract and make it
4555 // operate on the inserted value instead of the insertvalue result.
4556 // i.e., replace
4557 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4558 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4559 // with
4560 // %E extractvalue { i32 } { i32 42 }, 0
4561 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4562 ArrayRef(exti, exte));
4563 }
4564
4565 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4566 return R;
4567
4568 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4569 // Bail out if the aggregate contains scalable vector type
4570 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4571 STy && STy->isScalableTy())
4572 return nullptr;
4573
4574 // If the (non-volatile) load only has one use, we can rewrite this to a
4575 // load from a GEP. This reduces the size of the load. If a load is used
4576 // only by extractvalue instructions then this either must have been
4577 // optimized before, or it is a struct with padding, in which case we
4578 // don't want to do the transformation as it loses padding knowledge.
4579 if (L->isSimple() && L->hasOneUse()) {
4580 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4581 SmallVector<Value*, 4> Indices;
4582 // Prefix an i32 0 since we need the first element.
4583 Indices.push_back(Builder.getInt32(0));
4584 for (unsigned Idx : EV.indices())
4585 Indices.push_back(Builder.getInt32(Idx));
4586
4587 // We need to insert these at the location of the old load, not at that of
4588 // the extractvalue.
4589 Builder.SetInsertPoint(L);
4590 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4591 L->getPointerOperand(), Indices);
4592 Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
4593 // Whatever aliasing information we had for the orignal load must also
4594 // hold for the smaller load, so propagate the annotations.
4595 NL->setAAMetadata(L->getAAMetadata());
4596 // Returning the load directly will cause the main loop to insert it in
4597 // the wrong spot, so use replaceInstUsesWith().
4598 return replaceInstUsesWith(EV, NL);
4599 }
4600 }
4601
4602 if (auto *PN = dyn_cast<PHINode>(Agg))
4603 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4604 return Res;
4605
4606 // Canonicalize extract (select Cond, TV, FV)
4607 // -> select cond, (extract TV), (extract FV)
4608 if (auto *SI = dyn_cast<SelectInst>(Agg))
4609 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4610 return R;
4611
4612 // We could simplify extracts from other values. Note that nested extracts may
4613 // already be simplified implicitly by the above: extract (extract (insert) )
4614 // will be translated into extract ( insert ( extract ) ) first and then just
4615 // the value inserted, if appropriate. Similarly for extracts from single-use
4616 // loads: extract (extract (load)) will be translated to extract (load (gep))
4617 // and if again single-use then via load (gep (gep)) to load (gep).
4618 // However, double extracts from e.g. function arguments or return values
4619 // aren't handled yet.
4620 return nullptr;
4621}
4622
4623/// Return 'true' if the given typeinfo will match anything.
4624static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4625 switch (Personality) {
4629 // The GCC C EH and Rust personality only exists to support cleanups, so
4630 // it's not clear what the semantics of catch clauses are.
4631 return false;
4633 return false;
4635 // While __gnat_all_others_value will match any Ada exception, it doesn't
4636 // match foreign exceptions (or didn't, before gcc-4.7).
4637 return false;
4648 return TypeInfo->isNullValue();
4649 }
4650 llvm_unreachable("invalid enum");
4651}
4652
4653static bool shorter_filter(const Value *LHS, const Value *RHS) {
4654 return
4655 cast<ArrayType>(LHS->getType())->getNumElements()
4656 <
4657 cast<ArrayType>(RHS->getType())->getNumElements();
4658}
4659
4661 // The logic here should be correct for any real-world personality function.
4662 // However if that turns out not to be true, the offending logic can always
4663 // be conditioned on the personality function, like the catch-all logic is.
4664 EHPersonality Personality =
4665 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4666
4667 // Simplify the list of clauses, eg by removing repeated catch clauses
4668 // (these are often created by inlining).
4669 bool MakeNewInstruction = false; // If true, recreate using the following:
4670 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4671 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4672
4673 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4674 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4675 bool isLastClause = i + 1 == e;
4676 if (LI.isCatch(i)) {
4677 // A catch clause.
4678 Constant *CatchClause = LI.getClause(i);
4679 Constant *TypeInfo = CatchClause->stripPointerCasts();
4680
4681 // If we already saw this clause, there is no point in having a second
4682 // copy of it.
4683 if (AlreadyCaught.insert(TypeInfo).second) {
4684 // This catch clause was not already seen.
4685 NewClauses.push_back(CatchClause);
4686 } else {
4687 // Repeated catch clause - drop the redundant copy.
4688 MakeNewInstruction = true;
4689 }
4690
4691 // If this is a catch-all then there is no point in keeping any following
4692 // clauses or marking the landingpad as having a cleanup.
4693 if (isCatchAll(Personality, TypeInfo)) {
4694 if (!isLastClause)
4695 MakeNewInstruction = true;
4696 CleanupFlag = false;
4697 break;
4698 }
4699 } else {
4700 // A filter clause. If any of the filter elements were already caught
4701 // then they can be dropped from the filter. It is tempting to try to
4702 // exploit the filter further by saying that any typeinfo that does not
4703 // occur in the filter can't be caught later (and thus can be dropped).
4704 // However this would be wrong, since typeinfos can match without being
4705 // equal (for example if one represents a C++ class, and the other some
4706 // class derived from it).
4707 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4708 Constant *FilterClause = LI.getClause(i);
4709 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4710 unsigned NumTypeInfos = FilterType->getNumElements();
4711
4712 // An empty filter catches everything, so there is no point in keeping any
4713 // following clauses or marking the landingpad as having a cleanup. By
4714 // dealing with this case here the following code is made a bit simpler.
4715 if (!NumTypeInfos) {
4716 NewClauses.push_back(FilterClause);
4717 if (!isLastClause)
4718 MakeNewInstruction = true;
4719 CleanupFlag = false;
4720 break;
4721 }
4722
4723 bool MakeNewFilter = false; // If true, make a new filter.
4724 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4725 if (isa<ConstantAggregateZero>(FilterClause)) {
4726 // Not an empty filter - it contains at least one null typeinfo.
4727 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4728 Constant *TypeInfo =
4730 // If this typeinfo is a catch-all then the filter can never match.
4731 if (isCatchAll(Personality, TypeInfo)) {
4732 // Throw the filter away.
4733 MakeNewInstruction = true;
4734 continue;
4735 }
4736
4737 // There is no point in having multiple copies of this typeinfo, so
4738 // discard all but the first copy if there is more than one.
4739 NewFilterElts.push_back(TypeInfo);
4740 if (NumTypeInfos > 1)
4741 MakeNewFilter = true;
4742 } else {
4743 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4744 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4745 NewFilterElts.reserve(NumTypeInfos);
4746
4747 // Remove any filter elements that were already caught or that already
4748 // occurred in the filter. While there, see if any of the elements are
4749 // catch-alls. If so, the filter can be discarded.
4750 bool SawCatchAll = false;
4751 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4752 Constant *Elt = Filter->getOperand(j);
4753 Constant *TypeInfo = Elt->stripPointerCasts();
4754 if (isCatchAll(Personality, TypeInfo)) {
4755 // This element is a catch-all. Bail out, noting this fact.
4756 SawCatchAll = true;
4757 break;
4758 }
4759
4760 // Even if we've seen a type in a catch clause, we don't want to
4761 // remove it from the filter. An unexpected type handler may be
4762 // set up for a call site which throws an exception of the same
4763 // type caught. In order for the exception thrown by the unexpected
4764 // handler to propagate correctly, the filter must be correctly
4765 // described for the call site.
4766 //
4767 // Example:
4768 //
4769 // void unexpected() { throw 1;}
4770 // void foo() throw (int) {
4771 // std::set_unexpected(unexpected);
4772 // try {
4773 // throw 2.0;
4774 // } catch (int i) {}
4775 // }
4776
4777 // There is no point in having multiple copies of the same typeinfo in
4778 // a filter, so only add it if we didn't already.
4779 if (SeenInFilter.insert(TypeInfo).second)
4780 NewFilterElts.push_back(cast<Constant>(Elt));
4781 }
4782 // A filter containing a catch-all cannot match anything by definition.
4783 if (SawCatchAll) {
4784 // Throw the filter away.
4785 MakeNewInstruction = true;
4786 continue;
4787 }
4788
4789 // If we dropped something from the filter, make a new one.
4790 if (NewFilterElts.size() < NumTypeInfos)
4791 MakeNewFilter = true;
4792 }
4793 if (MakeNewFilter) {
4794 FilterType = ArrayType::get(FilterType->getElementType(),
4795 NewFilterElts.size());
4796 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4797 MakeNewInstruction = true;
4798 }
4799
4800 NewClauses.push_back(FilterClause);
4801
4802 // If the new filter is empty then it will catch everything so there is
4803 // no point in keeping any following clauses or marking the landingpad
4804 // as having a cleanup. The case of the original filter being empty was
4805 // already handled above.
4806 if (MakeNewFilter && !NewFilterElts.size()) {
4807 assert(MakeNewInstruction && "New filter but not a new instruction!");
4808 CleanupFlag = false;
4809 break;
4810 }
4811 }
4812 }
4813
4814 // If several filters occur in a row then reorder them so that the shortest
4815 // filters come first (those with the smallest number of elements). This is
4816 // advantageous because shorter filters are more likely to match, speeding up
4817 // unwinding, but mostly because it increases the effectiveness of the other
4818 // filter optimizations below.
4819 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4820 unsigned j;
4821 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4822 for (j = i; j != e; ++j)
4823 if (!isa<ArrayType>(NewClauses[j]->getType()))
4824 break;
4825
4826 // Check whether the filters are already sorted by length. We need to know
4827 // if sorting them is actually going to do anything so that we only make a
4828 // new landingpad instruction if it does.
4829 for (unsigned k = i; k + 1 < j; ++k)
4830 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4831 // Not sorted, so sort the filters now. Doing an unstable sort would be
4832 // correct too but reordering filters pointlessly might confuse users.
4833 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4835 MakeNewInstruction = true;
4836 break;
4837 }
4838
4839 // Look for the next batch of filters.
4840 i = j + 1;
4841 }
4842
4843 // If typeinfos matched if and only if equal, then the elements of a filter L
4844 // that occurs later than a filter F could be replaced by the intersection of
4845 // the elements of F and L. In reality two typeinfos can match without being
4846 // equal (for example if one represents a C++ class, and the other some class
4847 // derived from it) so it would be wrong to perform this transform in general.
4848 // However the transform is correct and useful if F is a subset of L. In that
4849 // case L can be replaced by F, and thus removed altogether since repeating a
4850 // filter is pointless. So here we look at all pairs of filters F and L where
4851 // L follows F in the list of clauses, and remove L if every element of F is
4852 // an element of L. This can occur when inlining C++ functions with exception
4853 // specifications.
4854 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4855 // Examine each filter in turn.
4856 Value *Filter = NewClauses[i];
4857 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4858 if (!FTy)
4859 // Not a filter - skip it.
4860 continue;
4861 unsigned FElts = FTy->getNumElements();
4862 // Examine each filter following this one. Doing this backwards means that
4863 // we don't have to worry about filters disappearing under us when removed.
4864 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4865 Value *LFilter = NewClauses[j];
4866 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4867 if (!LTy)
4868 // Not a filter - skip it.
4869 continue;
4870 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4871 // an element of LFilter, then discard LFilter.
4872 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4873 // If Filter is empty then it is a subset of LFilter.
4874 if (!FElts) {
4875 // Discard LFilter.
4876 NewClauses.erase(J);
4877 MakeNewInstruction = true;
4878 // Move on to the next filter.
4879 continue;
4880 }
4881 unsigned LElts = LTy->getNumElements();
4882 // If Filter is longer than LFilter then it cannot be a subset of it.
4883 if (FElts > LElts)
4884 // Move on to the next filter.
4885 continue;
4886 // At this point we know that LFilter has at least one element.
4887 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4888 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4889 // already know that Filter is not longer than LFilter).
4891 assert(FElts <= LElts && "Should have handled this case earlier!");
4892 // Discard LFilter.
4893 NewClauses.erase(J);
4894 MakeNewInstruction = true;
4895 }
4896 // Move on to the next filter.
4897 continue;
4898 }
4899 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4900 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4901 // Since Filter is non-empty and contains only zeros, it is a subset of
4902 // LFilter iff LFilter contains a zero.
4903 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4904 for (unsigned l = 0; l != LElts; ++l)
4905 if (LArray->getOperand(l)->isNullValue()) {
4906 // LFilter contains a zero - discard it.
4907 NewClauses.erase(J);
4908 MakeNewInstruction = true;
4909 break;
4910 }
4911 // Move on to the next filter.
4912 continue;
4913 }
4914 // At this point we know that both filters are ConstantArrays. Loop over
4915 // operands to see whether every element of Filter is also an element of
4916 // LFilter. Since filters tend to be short this is probably faster than
4917 // using a method that scales nicely.
4919 bool AllFound = true;
4920 for (unsigned f = 0; f != FElts; ++f) {
4921 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4922 AllFound = false;
4923 for (unsigned l = 0; l != LElts; ++l) {
4924 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4925 if (LTypeInfo == FTypeInfo) {
4926 AllFound = true;
4927 break;
4928 }
4929 }
4930 if (!AllFound)
4931 break;
4932 }
4933 if (AllFound) {
4934 // Discard LFilter.
4935 NewClauses.erase(J);
4936 MakeNewInstruction = true;
4937 }
4938 // Move on to the next filter.
4939 }
4940 }
4941
4942 // If we changed any of the clauses, replace the old landingpad instruction
4943 // with a new one.
4944 if (MakeNewInstruction) {
4946 NewClauses.size());
4947 for (Constant *C : NewClauses)
4948 NLI->addClause(C);
4949 // A landing pad with no clauses must have the cleanup flag set. It is
4950 // theoretically possible, though highly unlikely, that we eliminated all
4951 // clauses. If so, force the cleanup flag to true.
4952 if (NewClauses.empty())
4953 CleanupFlag = true;
4954 NLI->setCleanup(CleanupFlag);
4955 return NLI;
4956 }
4957
4958 // Even if none of the clauses changed, we may nonetheless have understood
4959 // that the cleanup flag is pointless. Clear it if so.
4960 if (LI.isCleanup() != CleanupFlag) {
4961 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4962 LI.setCleanup(CleanupFlag);
4963 return &LI;
4964 }
4965
4966 return nullptr;
4967}
4968
4969Value *
4971 // Try to push freeze through instructions that propagate but don't produce
4972 // poison as far as possible. If an operand of freeze does not produce poison
4973 // then push the freeze through to the operands that are not guaranteed
4974 // non-poison. The actual transform is as follows.
4975 // Op1 = ... ; Op1 can be poison
4976 // Op0 = Inst(Op1, NonPoisonOps...)
4977 // ... = Freeze(Op0)
4978 // =>
4979 // Op1 = ...
4980 // Op1.fr = Freeze(Op1)
4981 // ... = Inst(Op1.fr, NonPoisonOps...)
4982
4983 auto CanPushFreeze = [](Value *V) {
4984 if (!isa<Instruction>(V) || isa<PHINode>(V))
4985 return false;
4986
4987 // We can't push the freeze through an instruction which can itself create
4988 // poison. If the only source of new poison is flags, we can simply
4989 // strip them (since we know the only use is the freeze and nothing can
4990 // benefit from them.)
4992 /*ConsiderFlagsAndMetadata*/ false);
4993 };
4994
4995 // Pushing freezes up long instruction chains can be expensive. Instead,
4996 // we directly push the freeze all the way to the leaves. However, we leave
4997 // deduplication of freezes on the same value for freezeOtherUses().
4998 Use *OrigUse = &OrigFI.getOperandUse(0);
5001 Worklist.push_back(OrigUse);
5002 while (!Worklist.empty()) {
5003 auto *U = Worklist.pop_back_val();
5004 Value *V = U->get();
5005 if (!CanPushFreeze(V)) {
5006 // If we can't push through the original instruction, abort the transform.
5007 if (U == OrigUse)
5008 return nullptr;
5009
5010 auto *UserI = cast<Instruction>(U->getUser());
5011 Builder.SetInsertPoint(UserI);
5012 Value *Frozen = Builder.CreateFreeze(V, V->getName() + ".fr");
5013 U->set(Frozen);
5014 continue;
5015 }
5016
5017 auto *I = cast<Instruction>(V);
5018 if (!Visited.insert(I).second)
5019 continue;
5020
5021 // reverse() to emit freezes in a more natural order.
5022 for (Use &Op : reverse(I->operands())) {
5023 Value *OpV = Op.get();
5025 continue;
5026 Worklist.push_back(&Op);
5027 }
5028
5029 I->dropPoisonGeneratingAnnotations();
5030 this->Worklist.add(I);
5031 }
5032
5033 return OrigUse->get();
5034}
5035
5037 PHINode *PN) {
5038 // Detect whether this is a recurrence with a start value and some number of
5039 // backedge values. We'll check whether we can push the freeze through the
5040 // backedge values (possibly dropping poison flags along the way) until we
5041 // reach the phi again. In that case, we can move the freeze to the start
5042 // value.
5043 Use *StartU = nullptr;
5045 for (Use &U : PN->incoming_values()) {
5046 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
5047 // Add backedge value to worklist.
5048 Worklist.push_back(U.get());
5049 continue;
5050 }
5051
5052 // Don't bother handling multiple start values.
5053 if (StartU)
5054 return nullptr;
5055 StartU = &U;
5056 }
5057
5058 if (!StartU || Worklist.empty())
5059 return nullptr; // Not a recurrence.
5060
5061 Value *StartV = StartU->get();
5062 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
5063 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
5064 // We can't insert freeze if the start value is the result of the
5065 // terminator (e.g. an invoke).
5066 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5067 return nullptr;
5068
5071 while (!Worklist.empty()) {
5072 Value *V = Worklist.pop_back_val();
5073 if (!Visited.insert(V).second)
5074 continue;
5075
5076 if (Visited.size() > 32)
5077 return nullptr; // Limit the total number of values we inspect.
5078
5079 // Assume that PN is non-poison, because it will be after the transform.
5080 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5081 continue;
5082
5085 /*ConsiderFlagsAndMetadata*/ false))
5086 return nullptr;
5087
5088 DropFlags.push_back(I);
5089 append_range(Worklist, I->operands());
5090 }
5091
5092 for (Instruction *I : DropFlags)
5093 I->dropPoisonGeneratingAnnotations();
5094
5095 if (StartNeedsFreeze) {
5096 Builder.SetInsertPoint(StartBB->getTerminator());
5097 Value *FrozenStartV = Builder.CreateFreeze(StartV,
5098 StartV->getName() + ".fr");
5099 replaceUse(*StartU, FrozenStartV);
5100 }
5101 return replaceInstUsesWith(FI, PN);
5102}
5103
5105 Value *Op = FI.getOperand(0);
5106
5107 if (isa<Constant>(Op) || Op->hasOneUse())
5108 return false;
5109
5110 // Move the freeze directly after the definition of its operand, so that
5111 // it dominates the maximum number of uses. Note that it may not dominate
5112 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5113 // the normal/default destination. This is why the domination check in the
5114 // replacement below is still necessary.
5115 BasicBlock::iterator MoveBefore;
5116 if (isa<Argument>(Op)) {
5117 MoveBefore =
5119 } else {
5120 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
5121 if (!MoveBeforeOpt)
5122 return false;
5123 MoveBefore = *MoveBeforeOpt;
5124 }
5125
5126 // Re-point iterator to come after any debug-info records.
5127 MoveBefore.setHeadBit(false);
5128
5129 bool Changed = false;
5130 if (&FI != &*MoveBefore) {
5131 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
5132 Changed = true;
5133 }
5134
5135 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
5136 bool Dominates = DT.dominates(&FI, U);
5137 Changed |= Dominates;
5138 return Dominates;
5139 });
5140
5141 return Changed;
5142}
5143
5144// Check if any direct or bitcast user of this value is a shuffle instruction.
5146 for (auto *U : V->users()) {
5148 return true;
5149 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
5150 return true;
5151 }
5152 return false;
5153}
5154
5156 Value *Op0 = I.getOperand(0);
5157
5158 if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
5159 return replaceInstUsesWith(I, V);
5160
5161 // freeze (phi const, x) --> phi const, (freeze x)
5162 if (auto *PN = dyn_cast<PHINode>(Op0)) {
5163 if (Instruction *NV = foldOpIntoPhi(I, PN))
5164 return NV;
5165 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
5166 return NV;
5167 }
5168
5170 return replaceInstUsesWith(I, NI);
5171
5172 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5173 // - or: pick -1
5174 // - select's condition: if the true value is constant, choose it by making
5175 // the condition true.
5176 // - phi: pick the common constant across operands
5177 // - default: pick 0
5178 //
5179 // Note that this transform is intentionally done here rather than
5180 // via an analysis in InstSimplify or at individual user sites. That is
5181 // because we must produce the same value for all uses of the freeze -
5182 // it's the reason "freeze" exists!
5183 //
5184 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5185 // duplicating logic for binops at least.
5186 auto getUndefReplacement = [&](Type *Ty) {
5187 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5188 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5189 // removed.
5190 Constant *BestValue = nullptr;
5191 for (Value *V : PN.incoming_values()) {
5192 if (match(V, m_Freeze(m_Undef())))
5193 continue;
5194
5196 if (!C)
5197 return nullptr;
5198
5200 return nullptr;
5201
5202 if (BestValue && BestValue != C)
5203 return nullptr;
5204
5205 BestValue = C;
5206 }
5207 return BestValue;
5208 };
5209
5210 Value *NullValue = Constant::getNullValue(Ty);
5211 Value *BestValue = nullptr;
5212 for (auto *U : I.users()) {
5213 Value *V = NullValue;
5214 if (match(U, m_Or(m_Value(), m_Value())))
5216 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
5217 V = ConstantInt::getTrue(Ty);
5218 else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
5219 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
5220 V = NullValue;
5221 } else if (auto *PHI = dyn_cast<PHINode>(U)) {
5222 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5223 V = MaybeV;
5224 }
5225
5226 if (!BestValue)
5227 BestValue = V;
5228 else if (BestValue != V)
5229 BestValue = NullValue;
5230 }
5231 assert(BestValue && "Must have at least one use");
5232 assert(BestValue != &I && "Cannot replace with itself");
5233 return BestValue;
5234 };
5235
5236 if (match(Op0, m_Undef())) {
5237 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5238 // a shuffle. This may improve codegen for shuffles that allow
5239 // unspecified inputs.
5241 return nullptr;
5242 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
5243 }
5244
5245 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5246 Type *Ty = C->getType();
5247 auto *VTy = dyn_cast<FixedVectorType>(Ty);
5248 if (!VTy)
5249 return nullptr;
5250 unsigned NumElts = VTy->getNumElements();
5251 Constant *BestValue = Constant::getNullValue(VTy->getScalarType());
5252 for (unsigned i = 0; i != NumElts; ++i) {
5253 Constant *EltC = C->getAggregateElement(i);
5254 if (EltC && !match(EltC, m_Undef())) {
5255 BestValue = EltC;
5256 break;
5257 }
5258 }
5259 return Constant::replaceUndefsWith(C, BestValue);
5260 };
5261
5262 Constant *C;
5263 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5264 !C->containsConstantExpression()) {
5265 if (Constant *Repl = getFreezeVectorReplacement(C))
5266 return replaceInstUsesWith(I, Repl);
5267 }
5268
5269 // Replace uses of Op with freeze(Op).
5270 if (freezeOtherUses(I))
5271 return &I;
5272
5273 return nullptr;
5274}
5275
5276/// Check for case where the call writes to an otherwise dead alloca. This
5277/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5278/// helper *only* analyzes the write; doesn't check any other legality aspect.
5280 auto *CB = dyn_cast<CallBase>(I);
5281 if (!CB)
5282 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5283 // to allow reload along used path as described below. Otherwise, this
5284 // is simply a store to a dead allocation which will be removed.
5285 return false;
5286 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
5287 if (!Dest)
5288 return false;
5289 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
5290 if (!AI)
5291 // TODO: allow malloc?
5292 return false;
5293 // TODO: allow memory access dominated by move point? Note that since AI
5294 // could have a reference to itself captured by the call, we would need to
5295 // account for cycles in doing so.
5296 SmallVector<const User *> AllocaUsers;
5298 auto pushUsers = [&](const Instruction &I) {
5299 for (const User *U : I.users()) {
5300 if (Visited.insert(U).second)
5301 AllocaUsers.push_back(U);
5302 }
5303 };
5304 pushUsers(*AI);
5305 while (!AllocaUsers.empty()) {
5306 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
5307 if (isa<GetElementPtrInst>(UserI) || isa<AddrSpaceCastInst>(UserI)) {
5308 pushUsers(*UserI);
5309 continue;
5310 }
5311 if (UserI == CB)
5312 continue;
5313 // TODO: support lifetime.start/end here
5314 return false;
5315 }
5316 return true;
5317}
5318
5319/// Try to move the specified instruction from its current block into the
5320/// beginning of DestBlock, which can only happen if it's safe to move the
5321/// instruction past all of the instructions between it and the end of its
5322/// block.
5324 BasicBlock *DestBlock) {
5325 BasicBlock *SrcBlock = I->getParent();
5326
5327 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5328 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5329 I->isTerminator())
5330 return false;
5331
5332 // Do not sink static or dynamic alloca instructions. Static allocas must
5333 // remain in the entry block, and dynamic allocas must not be sunk in between
5334 // a stacksave / stackrestore pair, which would incorrectly shorten its
5335 // lifetime.
5336 if (isa<AllocaInst>(I))
5337 return false;
5338
5339 // Do not sink into catchswitch blocks.
5340 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
5341 return false;
5342
5343 // Do not sink convergent call instructions.
5344 if (auto *CI = dyn_cast<CallInst>(I)) {
5345 if (CI->isConvergent())
5346 return false;
5347 }
5348
5349 // Unless we can prove that the memory write isn't visibile except on the
5350 // path we're sinking to, we must bail.
5351 if (I->mayWriteToMemory()) {
5352 if (!SoleWriteToDeadLocal(I, TLI))
5353 return false;
5354 }
5355
5356 // We can only sink load instructions if there is nothing between the load and
5357 // the end of block that could change the value.
5358 if (I->mayReadFromMemory() &&
5359 !I->hasMetadata(LLVMContext::MD_invariant_load)) {
5360 // We don't want to do any sophisticated alias analysis, so we only check
5361 // the instructions after I in I's parent block if we try to sink to its
5362 // successor block.
5363 if (DestBlock->getUniquePredecessor() != I->getParent())
5364 return false;
5365 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
5366 E = I->getParent()->end();
5367 Scan != E; ++Scan)
5368 if (Scan->mayWriteToMemory())
5369 return false;
5370 }
5371
5372 I->dropDroppableUses([&](const Use *U) {
5373 auto *I = dyn_cast<Instruction>(U->getUser());
5374 if (I && I->getParent() != DestBlock) {
5375 Worklist.add(I);
5376 return true;
5377 }
5378 return false;
5379 });
5380 /// FIXME: We could remove droppable uses that are not dominated by
5381 /// the new position.
5382
5383 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5384 I->moveBefore(*DestBlock, InsertPos);
5385 ++NumSunkInst;
5386
5387 // Also sink all related debug uses from the source basic block. Otherwise we
5388 // get debug use before the def. Attempt to salvage debug uses first, to
5389 // maximise the range variables have location for. If we cannot salvage, then
5390 // mark the location undef: we know it was supposed to receive a new location
5391 // here, but that computation has been sunk.
5392 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5393 findDbgUsers(I, DbgVariableRecords);
5394 if (!DbgVariableRecords.empty())
5395 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5396 DbgVariableRecords);
5397
5398 // PS: there are numerous flaws with this behaviour, not least that right now
5399 // assignments can be re-ordered past other assignments to the same variable
5400 // if they use different Values. Creating more undef assignements can never be
5401 // undone. And salvaging all users outside of this block can un-necessarily
5402 // alter the lifetime of the live-value that the variable refers to.
5403 // Some of these things can be resolved by tolerating debug use-before-defs in
5404 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5405 // being used for more architectures.
5406
5407 return true;
5408}
5409
5411 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5412 BasicBlock *DestBlock,
5413 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5414 // For all debug values in the destination block, the sunk instruction
5415 // will still be available, so they do not need to be dropped.
5416
5417 // Fetch all DbgVariableRecords not already in the destination.
5418 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5419 for (auto &DVR : DbgVariableRecords)
5420 if (DVR->getParent() != DestBlock)
5421 DbgVariableRecordsToSalvage.push_back(DVR);
5422
5423 // Fetch a second collection, of DbgVariableRecords in the source block that
5424 // we're going to sink.
5425 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5426 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5427 if (DVR->getParent() == SrcBlock)
5428 DbgVariableRecordsToSink.push_back(DVR);
5429
5430 // Sort DbgVariableRecords according to their position in the block. This is a
5431 // partial order: DbgVariableRecords attached to different instructions will
5432 // be ordered by the instruction order, but DbgVariableRecords attached to the
5433 // same instruction won't have an order.
5434 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5435 return B->getInstruction()->comesBefore(A->getInstruction());
5436 };
5437 llvm::stable_sort(DbgVariableRecordsToSink, Order);
5438
5439 // If there are two assignments to the same variable attached to the same
5440 // instruction, the ordering between the two assignments is important. Scan
5441 // for this (rare) case and establish which is the last assignment.
5442 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5444 if (DbgVariableRecordsToSink.size() > 1) {
5446 // Count how many assignments to each variable there is per instruction.
5447 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5448 DebugVariable DbgUserVariable =
5449 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5450 DVR->getDebugLoc()->getInlinedAt());
5451 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
5452 }
5453
5454 // If there are any instructions with two assignments, add them to the
5455 // FilterOutMap to record that they need extra filtering.
5457 for (auto It : CountMap) {
5458 if (It.second > 1) {
5459 FilterOutMap[It.first] = nullptr;
5460 DupSet.insert(It.first.first);
5461 }
5462 }
5463
5464 // For all instruction/variable pairs needing extra filtering, find the
5465 // latest assignment.
5466 for (const Instruction *Inst : DupSet) {
5467 for (DbgVariableRecord &DVR :
5468 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
5469 DebugVariable DbgUserVariable =
5470 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5471 DVR.getDebugLoc()->getInlinedAt());
5472 auto FilterIt =
5473 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
5474 if (FilterIt == FilterOutMap.end())
5475 continue;
5476 if (FilterIt->second != nullptr)
5477 continue;
5478 FilterIt->second = &DVR;
5479 }
5480 }
5481 }
5482
5483 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5484 // out any duplicate assignments identified above.
5486 SmallSet<DebugVariable, 4> SunkVariables;
5487 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5489 continue;
5490
5491 DebugVariable DbgUserVariable =
5492 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5493 DVR->getDebugLoc()->getInlinedAt());
5494
5495 // For any variable where there were multiple assignments in the same place,
5496 // ignore all but the last assignment.
5497 if (!FilterOutMap.empty()) {
5498 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
5499 auto It = FilterOutMap.find(IVP);
5500
5501 // Filter out.
5502 if (It != FilterOutMap.end() && It->second != DVR)
5503 continue;
5504 }
5505
5506 if (!SunkVariables.insert(DbgUserVariable).second)
5507 continue;
5508
5509 if (DVR->isDbgAssign())
5510 continue;
5511
5512 DVRClones.emplace_back(DVR->clone());
5513 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5514 }
5515
5516 // Perform salvaging without the clones, then sink the clones.
5517 if (DVRClones.empty())
5518 return;
5519
5520 salvageDebugInfoForDbgValues(*I, DbgVariableRecordsToSalvage);
5521
5522 // The clones are in reverse order of original appearance. Assert that the
5523 // head bit is set on the iterator as we _should_ have received it via
5524 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5525 // we'll repeatedly insert at the head, such as:
5526 // DVR-3 (third insertion goes here)
5527 // DVR-2 (second insertion goes here)
5528 // DVR-1 (first insertion goes here)
5529 // Any-Prior-DVRs
5530 // InsertPtInst
5531 assert(InsertPos.getHeadBit());
5532 for (DbgVariableRecord *DVRClone : DVRClones) {
5533 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
5534 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5535 }
5536}
5537
5539 while (!Worklist.isEmpty()) {
5540 // Walk deferred instructions in reverse order, and push them to the
5541 // worklist, which means they'll end up popped from the worklist in-order.
5542 while (Instruction *I = Worklist.popDeferred()) {
5543 // Check to see if we can DCE the instruction. We do this already here to
5544 // reduce the number of uses and thus allow other folds to trigger.
5545 // Note that eraseInstFromFunction() may push additional instructions on
5546 // the deferred worklist, so this will DCE whole instruction chains.
5549 ++NumDeadInst;
5550 continue;
5551 }
5552
5553 Worklist.push(I);
5554 }
5555
5556 Instruction *I = Worklist.removeOne();
5557 if (I == nullptr) continue; // skip null values.
5558
5559 // Check to see if we can DCE the instruction.
5562 ++NumDeadInst;
5563 continue;
5564 }
5565
5566 if (!DebugCounter::shouldExecute(VisitCounter))
5567 continue;
5568
5569 // See if we can trivially sink this instruction to its user if we can
5570 // prove that the successor is not executed more frequently than our block.
5571 // Return the UserBlock if successful.
5572 auto getOptionalSinkBlockForInst =
5573 [this](Instruction *I) -> std::optional<BasicBlock *> {
5574 if (!EnableCodeSinking)
5575 return std::nullopt;
5576
5577 BasicBlock *BB = I->getParent();
5578 BasicBlock *UserParent = nullptr;
5579 unsigned NumUsers = 0;
5580
5581 for (Use &U : I->uses()) {
5582 User *User = U.getUser();
5583 if (User->isDroppable())
5584 continue;
5585 if (NumUsers > MaxSinkNumUsers)
5586 return std::nullopt;
5587
5588 Instruction *UserInst = cast<Instruction>(User);
5589 // Special handling for Phi nodes - get the block the use occurs in.
5590 BasicBlock *UserBB = UserInst->getParent();
5591 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5592 UserBB = PN->getIncomingBlock(U);
5593 // Bail out if we have uses in different blocks. We don't do any
5594 // sophisticated analysis (i.e finding NearestCommonDominator of these
5595 // use blocks).
5596 if (UserParent && UserParent != UserBB)
5597 return std::nullopt;
5598 UserParent = UserBB;
5599
5600 // Make sure these checks are done only once, naturally we do the checks
5601 // the first time we get the userparent, this will save compile time.
5602 if (NumUsers == 0) {
5603 // Try sinking to another block. If that block is unreachable, then do
5604 // not bother. SimplifyCFG should handle it.
5605 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5606 return std::nullopt;
5607
5608 auto *Term = UserParent->getTerminator();
5609 // See if the user is one of our successors that has only one
5610 // predecessor, so that we don't have to split the critical edge.
5611 // Another option where we can sink is a block that ends with a
5612 // terminator that does not pass control to other block (such as
5613 // return or unreachable or resume). In this case:
5614 // - I dominates the User (by SSA form);
5615 // - the User will be executed at most once.
5616 // So sinking I down to User is always profitable or neutral.
5617 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5618 return std::nullopt;
5619
5620 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5621 }
5622
5623 NumUsers++;
5624 }
5625
5626 // No user or only has droppable users.
5627 if (!UserParent)
5628 return std::nullopt;
5629
5630 return UserParent;
5631 };
5632
5633 auto OptBB = getOptionalSinkBlockForInst(I);
5634 if (OptBB) {
5635 auto *UserParent = *OptBB;
5636 // Okay, the CFG is simple enough, try to sink this instruction.
5637 if (tryToSinkInstruction(I, UserParent)) {
5638 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5639 MadeIRChange = true;
5640 // We'll add uses of the sunk instruction below, but since
5641 // sinking can expose opportunities for it's *operands* add
5642 // them to the worklist
5643 for (Use &U : I->operands())
5644 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5645 Worklist.push(OpI);
5646 }
5647 }
5648
5649 // Now that we have an instruction, try combining it to simplify it.
5650 Builder.SetInsertPoint(I);
5651 Builder.CollectMetadataToCopy(
5652 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5653
5654#ifndef NDEBUG
5655 std::string OrigI;
5656#endif
5657 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5658 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5659
5660 if (Instruction *Result = visit(*I)) {
5661 ++NumCombined;
5662 // Should we replace the old instruction with a new one?
5663 if (Result != I) {
5664 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5665 << " New = " << *Result << '\n');
5666
5667 // We copy the old instruction's DebugLoc to the new instruction, unless
5668 // InstCombine already assigned a DebugLoc to it, in which case we
5669 // should trust the more specifically selected DebugLoc.
5670 Result->setDebugLoc(Result->getDebugLoc().orElse(I->getDebugLoc()));
5671 // We also copy annotation metadata to the new instruction.
5672 Result->copyMetadata(*I, LLVMContext::MD_annotation);
5673 // Everything uses the new instruction now.
5674 I->replaceAllUsesWith(Result);
5675
5676 // Move the name to the new instruction first.
5677 Result->takeName(I);
5678
5679 // Insert the new instruction into the basic block...
5680 BasicBlock *InstParent = I->getParent();
5681 BasicBlock::iterator InsertPos = I->getIterator();
5682
5683 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5684 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5685 // We need to fix up the insertion point.
5686 if (isa<PHINode>(I)) // PHI -> Non-PHI
5687 InsertPos = InstParent->getFirstInsertionPt();
5688 else // Non-PHI -> PHI
5689 InsertPos = InstParent->getFirstNonPHIIt();
5690 }
5691
5692 Result->insertInto(InstParent, InsertPos);
5693
5694 // Push the new instruction and any users onto the worklist.
5695 Worklist.pushUsersToWorkList(*Result);
5696 Worklist.push(Result);
5697
5699 } else {
5700 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5701 << " New = " << *I << '\n');
5702
5703 // If the instruction was modified, it's possible that it is now dead.
5704 // if so, remove it.
5707 } else {
5708 Worklist.pushUsersToWorkList(*I);
5709 Worklist.push(I);
5710 }
5711 }
5712 MadeIRChange = true;
5713 }
5714 }
5715
5716 Worklist.zap();
5717 return MadeIRChange;
5718}
5719
5720// Track the scopes used by !alias.scope and !noalias. In a function, a
5721// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5722// by both sets. If not, the declaration of the scope can be safely omitted.
5723// The MDNode of the scope can be omitted as well for the instructions that are
5724// part of this function. We do not do that at this point, as this might become
5725// too time consuming to do.
5727 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5728 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5729
5730public:
5732 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5733 if (!I->hasMetadataOtherThanDebugLoc())
5734 return;
5735
5736 auto Track = [](Metadata *ScopeList, auto &Container) {
5737 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5738 if (!MDScopeList || !Container.insert(MDScopeList).second)
5739 return;
5740 for (const auto &MDOperand : MDScopeList->operands())
5741 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5742 Container.insert(MDScope);
5743 };
5744
5745 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5746 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5747 }
5748
5751 if (!Decl)
5752 return false;
5753
5754 assert(Decl->use_empty() &&
5755 "llvm.experimental.noalias.scope.decl in use ?");
5756 const MDNode *MDSL = Decl->getScopeList();
5757 assert(MDSL->getNumOperands() == 1 &&
5758 "llvm.experimental.noalias.scope should refer to a single scope");
5759 auto &MDOperand = MDSL->getOperand(0);
5760 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5761 return !UsedAliasScopesAndLists.contains(MD) ||
5762 !UsedNoAliasScopesAndLists.contains(MD);
5763
5764 // Not an MDNode ? throw away.
5765 return true;
5766 }
5767};
5768
5769/// Populate the IC worklist from a function, by walking it in reverse
5770/// post-order and adding all reachable code to the worklist.
5771///
5772/// This has a couple of tricks to make the code faster and more powerful. In
5773/// particular, we constant fold and DCE instructions as we go, to avoid adding
5774/// them to the worklist (this significantly speeds up instcombine on code where
5775/// many instructions are dead or constant). Additionally, if we find a branch
5776/// whose condition is a known constant, we only visit the reachable successors.
5778 bool MadeIRChange = false;
5780 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5781 DenseMap<Constant *, Constant *> FoldedConstants;
5782 AliasScopeTracker SeenAliasScopes;
5783
5784 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5785 for (BasicBlock *Succ : successors(BB))
5786 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5787 for (PHINode &PN : Succ->phis())
5788 for (Use &U : PN.incoming_values())
5789 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5790 U.set(PoisonValue::get(PN.getType()));
5791 MadeIRChange = true;
5792 }
5793 };
5794
5795 for (BasicBlock *BB : RPOT) {
5796 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5797 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5798 })) {
5799 HandleOnlyLiveSuccessor(BB, nullptr);
5800 continue;
5801 }
5802 LiveBlocks.insert(BB);
5803
5804 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5805 // ConstantProp instruction if trivially constant.
5806 if (!Inst.use_empty() &&
5807 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5808 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5809 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5810 << '\n');
5811 Inst.replaceAllUsesWith(C);
5812 ++NumConstProp;
5813 if (isInstructionTriviallyDead(&Inst, &TLI))
5814 Inst.eraseFromParent();
5815 MadeIRChange = true;
5816 continue;
5817 }
5818
5819 // See if we can constant fold its operands.
5820 for (Use &U : Inst.operands()) {
5822 continue;
5823
5824 auto *C = cast<Constant>(U);
5825 Constant *&FoldRes = FoldedConstants[C];
5826 if (!FoldRes)
5827 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5828
5829 if (FoldRes != C) {
5830 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5831 << "\n Old = " << *C
5832 << "\n New = " << *FoldRes << '\n');
5833 U = FoldRes;
5834 MadeIRChange = true;
5835 }
5836 }
5837
5838 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5839 // these call instructions consumes non-trivial amount of time and
5840 // provides no value for the optimization.
5841 if (!Inst.isDebugOrPseudoInst()) {
5842 InstrsForInstructionWorklist.push_back(&Inst);
5843 SeenAliasScopes.analyse(&Inst);
5844 }
5845 }
5846
5847 // If this is a branch or switch on a constant, mark only the single
5848 // live successor. Otherwise assume all successors are live.
5849 Instruction *TI = BB->getTerminator();
5850 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5851 if (isa<UndefValue>(BI->getCondition())) {
5852 // Branch on undef is UB.
5853 HandleOnlyLiveSuccessor(BB, nullptr);
5854 continue;
5855 }
5856 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5857 bool CondVal = Cond->getZExtValue();
5858 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5859 continue;
5860 }
5861 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5862 if (isa<UndefValue>(SI->getCondition())) {
5863 // Switch on undef is UB.
5864 HandleOnlyLiveSuccessor(BB, nullptr);
5865 continue;
5866 }
5867 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5868 HandleOnlyLiveSuccessor(BB,
5869 SI->findCaseValue(Cond)->getCaseSuccessor());
5870 continue;
5871 }
5872 }
5873 }
5874
5875 // Remove instructions inside unreachable blocks. This prevents the
5876 // instcombine code from having to deal with some bad special cases, and
5877 // reduces use counts of instructions.
5878 for (BasicBlock &BB : F) {
5879 if (LiveBlocks.count(&BB))
5880 continue;
5881
5882 unsigned NumDeadInstInBB;
5883 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
5884
5885 MadeIRChange |= NumDeadInstInBB != 0;
5886 NumDeadInst += NumDeadInstInBB;
5887 }
5888
5889 // Once we've found all of the instructions to add to instcombine's worklist,
5890 // add them in reverse order. This way instcombine will visit from the top
5891 // of the function down. This jives well with the way that it adds all uses
5892 // of instructions to the worklist after doing a transformation, thus avoiding
5893 // some N^2 behavior in pathological cases.
5894 Worklist.reserve(InstrsForInstructionWorklist.size());
5895 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5896 // DCE instruction if trivially dead. As we iterate in reverse program
5897 // order here, we will clean up whole chains of dead instructions.
5898 if (isInstructionTriviallyDead(Inst, &TLI) ||
5899 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5900 ++NumDeadInst;
5901 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5902 salvageDebugInfo(*Inst);
5903 Inst->eraseFromParent();
5904 MadeIRChange = true;
5905 continue;
5906 }
5907
5908 Worklist.push(Inst);
5909 }
5910
5911 return MadeIRChange;
5912}
5913
5915 // Collect backedges.
5917 for (BasicBlock *BB : RPOT) {
5918 Visited.insert(BB);
5919 for (BasicBlock *Succ : successors(BB))
5920 if (Visited.contains(Succ))
5921 BackEdges.insert({BB, Succ});
5922 }
5923 ComputedBackEdges = true;
5924}
5925
5931 const InstCombineOptions &Opts) {
5932 auto &DL = F.getDataLayout();
5933 bool VerifyFixpoint = Opts.VerifyFixpoint &&
5934 !F.hasFnAttribute("instcombine-no-verify-fixpoint");
5935
5936 /// Builder - This is an IRBuilder that automatically inserts new
5937 /// instructions into the worklist when they are created.
5939 F.getContext(), TargetFolder(DL),
5940 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5941 Worklist.add(I);
5942 if (auto *Assume = dyn_cast<AssumeInst>(I))
5943 AC.registerAssumption(Assume);
5944 }));
5945
5947
5948 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5949 // by instcombiner.
5950 bool MadeIRChange = false;
5952 MadeIRChange = LowerDbgDeclare(F);
5953
5954 // Iterate while there is work to do.
5955 unsigned Iteration = 0;
5956 while (true) {
5957 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
5958 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5959 << " on " << F.getName()
5960 << " reached; stopping without verifying fixpoint\n");
5961 break;
5962 }
5963
5964 ++Iteration;
5965 ++NumWorklistIterations;
5966 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5967 << F.getName() << "\n");
5968
5969 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
5970 BPI, PSI, DL, RPOT);
5972 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
5973 MadeChangeInThisIteration |= IC.run();
5974 if (!MadeChangeInThisIteration)
5975 break;
5976
5977 MadeIRChange = true;
5978 if (Iteration > Opts.MaxIterations) {
5980 "Instruction Combining on " + Twine(F.getName()) +
5981 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
5982 " iterations. " +
5983 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
5984 "'instcombine-no-verify-fixpoint' to suppress this error.");
5985 }
5986 }
5987
5988 if (Iteration == 1)
5989 ++NumOneIteration;
5990 else if (Iteration == 2)
5991 ++NumTwoIterations;
5992 else if (Iteration == 3)
5993 ++NumThreeIterations;
5994 else
5995 ++NumFourOrMoreIterations;
5996
5997 return MadeIRChange;
5998}
5999
6001
6003 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6004 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6005 OS, MapClassName2PassName);
6006 OS << '<';
6007 OS << "max-iterations=" << Options.MaxIterations << ";";
6008 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6009 OS << '>';
6010}
6011
6012char InstCombinePass::ID = 0;
6013
6016 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(F);
6017 // No changes since last InstCombine pass, exit early.
6018 if (LRT.shouldSkip(&ID))
6019 return PreservedAnalyses::all();
6020
6021 auto &AC = AM.getResult<AssumptionAnalysis>(F);
6022 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
6023 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
6025 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
6026
6027 auto *AA = &AM.getResult<AAManager>(F);
6028 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
6029 ProfileSummaryInfo *PSI =
6030 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
6031 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6032 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
6034
6035 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6036 BFI, BPI, PSI, Options)) {
6037 // No changes, all analyses are preserved.
6038 LRT.update(&ID, /*Changed=*/false);
6039 return PreservedAnalyses::all();
6040 }
6041
6042 // Mark all the analyses that instcombine updates as preserved.
6044 LRT.update(&ID, /*Changed=*/true);
6047 return PA;
6048}
6049
6065
6067 if (skipFunction(F))
6068 return false;
6069
6070 // Required analyses.
6071 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6072 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6073 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6075 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6077
6078 // Optional analyses.
6079 ProfileSummaryInfo *PSI =
6081 BlockFrequencyInfo *BFI =
6082 (PSI && PSI->hasProfileSummary()) ?
6084 nullptr;
6085 BranchProbabilityInfo *BPI = nullptr;
6086 if (auto *WrapperPass =
6088 BPI = &WrapperPass->getBPI();
6089
6090 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6091 BFI, BPI, PSI, InstCombineOptions());
6092}
6093
6095
6099
6101 "Combine redundant instructions", false, false)
6112 "Combine redundant instructions", false, false)
6113
6114// Initialization Routines
6118
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
DXIL Resource Access
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * combineConstantOffsets(GetElementPtrInst &GEP, InstCombinerImpl &IC)
Combine constant offsets separated by variable offsets.
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts)
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP)
Return true if we should canonicalize the gep to an i8 ptradd.
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static Value * foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall, SelectInst *SelectInst, InstCombiner::BuilderTy &Builder)
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static Value * simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static Instruction * foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, IRBuilderBase &Builder)
static std::optional< ModRefInfo > isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI, bool KnowInit)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, GEPOperator &GEP2)
Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) transform.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
static bool IsSelect(MachineInstr &MI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
unsigned OpIndex
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1890
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:827
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:224
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
uint64_t getNumElements() const
Type * getElementType() const
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
LLVM_ABI uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantArray - Constant Array Declarations.
Definition Constants.h:433
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:776
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNot(Constant *C)
static LLVM_ABI Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition Constants.h:517
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
const Constant * stripPointerCasts() const
Definition Constant.h:219
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags all()
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep (gep, p, y), x).
bool hasNoUnsignedWrap() const
bool isInBounds() const
GEPNoWrapFlags intersectForOffsetAdd(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep p, x+y).
static GEPNoWrapFlags none()
GEPNoWrapFlags getNoWrapFlags() const
Definition Operator.h:425
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2036
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition IRBuilder.h:538
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI InstCombinePass(InstCombineOptions Opts={})
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
bool prepareWorklist(Function &F)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * foldBinopWithRecurrence(BinaryOperator &BO)
Try to fold binary operators whose operands are simple interleaved recurrences to a single recurrence...
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, Instruction *CxtI, unsigned Depth=0)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
TargetLibraryInfo & TLI
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
static bool isCanonicalPredicate(CmpPredicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
BranchProbabilityInfo * BPI
ReversePostOrderTraversal< BasicBlock * > & RPOT
const DataLayout & DL
DomConditionCache DC
const bool MinimizeSize
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
void addToWorklist(Instruction *I)
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
SmallDenseSet< std::pair< const BasicBlock *, const BasicBlock * >, 8 > BackEdges
Backedges, used to avoid pushing instructions across backedges in cases where this may result in infi...
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
bool isBackEdge(const BasicBlock *From, const BasicBlock *To)
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition InstCombine.h:68
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void add(Instruction *I)
Add instruction to the worklist.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
bool isIntDivRem() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
static LLVM_ABI LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
LLVM_ABI void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
bool isCatch(unsigned Idx) const
Return 'true' if the clause and index Idx is a catch clause.
bool isFilter(unsigned Idx) const
Return 'true' if the clause and index Idx is a filter clause.
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
A function/module analysis which provides an empty LastRunTrackingInfo.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
This is the common base class for memset/memcpy/memmove.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition Metadata.h:64
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1468
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Use * op_iterator
Definition User.h:279
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
op_iterator op_begin()
Definition User.h:284
const Use & getOperandUse(unsigned i) const
Definition User.h:245
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
op_iterator op_end()
Definition User.h:286
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:115
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:759
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:166
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:881
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Value handle that is nullable, but tries to track the Value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
OneOps_match< OpTy, Instruction::Freeze > m_Freeze(const OpTy &Op)
Matches FreezeInst.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2038
LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &)
LLVM_ABI unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition Local.cpp:2485
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
LLVM_ABI Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
LLVM_ABI FunctionPass * createInstructionCombiningPass()
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1555
LLVM_ABI bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
LLVM_ABI bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition Local.cpp:2468
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI bool LowerDbgDeclare(Function &F)
Lowers dbg.declare records into appropriate set of dbg.value records.
Definition Local.cpp:1795
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI void ConvertDebugDeclareToDebugValue(DbgVariableRecord *DVR, StoreInst *SI, DIBuilder &Builder)
Inserts a dbg.value record before a store to an alloca'd value that has an associated dbg....
Definition Local.cpp:1662
LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition Local.cpp:2037
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2414
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
TargetTransformInfo TTI
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2068
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
LLVM_ABI void findDbgUsers(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the debug info records describing a value.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:251
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
SimplifyQuery getWithInstruction(const Instruction *I) const