LLVM 19.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
158 if (II.getCalledFunction()->isTargetIntrinsic()) {
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
168 if (II.getCalledFunction()->isTargetIntrinsic()) {
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
181 if (II.getCalledFunction()->isTargetIntrinsic()) {
183 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
194 if (!RewriteGEP)
196
198 auto *Inst = dyn_cast<Instruction>(GEP);
199 if (Inst)
201
202 Value *Offset = EmitGEPOffset(GEP);
203 // If a non-trivial GEP has other uses, rewrite it to avoid duplicating
204 // the offset arithmetic.
205 if (Inst && !GEP->hasOneUse() && !GEP->hasAllConstantIndices() &&
206 !GEP->getSourceElementType()->isIntegerTy(8)) {
208 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
209 Offset, "", GEP->getNoWrapFlags()));
211 }
212 return Offset;
213}
214
215/// Legal integers and common types are considered desirable. This is used to
216/// avoid creating instructions with types that may not be supported well by the
217/// the backend.
218/// NOTE: This treats i8, i16 and i32 specially because they are common
219/// types in frontend languages.
220bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
221 switch (BitWidth) {
222 case 8:
223 case 16:
224 case 32:
225 return true;
226 default:
227 return DL.isLegalInteger(BitWidth);
228 }
229}
230
231/// Return true if it is desirable to convert an integer computation from a
232/// given bit width to a new bit width.
233/// We don't want to convert from a legal or desirable type (like i8) to an
234/// illegal type or from a smaller to a larger illegal type. A width of '1'
235/// is always treated as a desirable type because i1 is a fundamental type in
236/// IR, and there are many specialized optimizations for i1 types.
237/// Common/desirable widths are equally treated as legal to convert to, in
238/// order to open up more combining opportunities.
239bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
240 unsigned ToWidth) const {
241 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
242 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
243
244 // Convert to desirable widths even if they are not legal types.
245 // Only shrink types, to prevent infinite loops.
246 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
247 return true;
248
249 // If this is a legal or desiable integer from type, and the result would be
250 // an illegal type, don't do the transformation.
251 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
252 return false;
253
254 // Otherwise, if both are illegal, do not increase the size of the result. We
255 // do allow things like i160 -> i64, but not i64 -> i160.
256 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
257 return false;
258
259 return true;
260}
261
262/// Return true if it is desirable to convert a computation from 'From' to 'To'.
263/// We don't want to convert from a legal to an illegal type or from a smaller
264/// to a larger illegal type. i1 is always treated as a legal type because it is
265/// a fundamental type in IR, and there are many specialized optimizations for
266/// i1 types.
267bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
268 // TODO: This could be extended to allow vectors. Datalayout changes might be
269 // needed to properly support that.
270 if (!From->isIntegerTy() || !To->isIntegerTy())
271 return false;
272
273 unsigned FromWidth = From->getPrimitiveSizeInBits();
274 unsigned ToWidth = To->getPrimitiveSizeInBits();
275 return shouldChangeType(FromWidth, ToWidth);
276}
277
278// Return true, if No Signed Wrap should be maintained for I.
279// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
280// where both B and C should be ConstantInts, results in a constant that does
281// not overflow. This function only handles the Add and Sub opcodes. For
282// all other opcodes, the function conservatively returns false.
284 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
285 if (!OBO || !OBO->hasNoSignedWrap())
286 return false;
287
288 // We reason about Add and Sub Only.
289 Instruction::BinaryOps Opcode = I.getOpcode();
290 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
291 return false;
292
293 const APInt *BVal, *CVal;
294 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
295 return false;
296
297 bool Overflow = false;
298 if (Opcode == Instruction::Add)
299 (void)BVal->sadd_ov(*CVal, Overflow);
300 else
301 (void)BVal->ssub_ov(*CVal, Overflow);
302
303 return !Overflow;
304}
305
307 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
308 return OBO && OBO->hasNoUnsignedWrap();
309}
310
312 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
313 return OBO && OBO->hasNoSignedWrap();
314}
315
316/// Conservatively clears subclassOptionalData after a reassociation or
317/// commutation. We preserve fast-math flags when applicable as they can be
318/// preserved.
320 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
321 if (!FPMO) {
322 I.clearSubclassOptionalData();
323 return;
324 }
325
326 FastMathFlags FMF = I.getFastMathFlags();
327 I.clearSubclassOptionalData();
328 I.setFastMathFlags(FMF);
329}
330
331/// Combine constant operands of associative operations either before or after a
332/// cast to eliminate one of the associative operations:
333/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
334/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
336 InstCombinerImpl &IC) {
337 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
338 if (!Cast || !Cast->hasOneUse())
339 return false;
340
341 // TODO: Enhance logic for other casts and remove this check.
342 auto CastOpcode = Cast->getOpcode();
343 if (CastOpcode != Instruction::ZExt)
344 return false;
345
346 // TODO: Enhance logic for other BinOps and remove this check.
347 if (!BinOp1->isBitwiseLogicOp())
348 return false;
349
350 auto AssocOpcode = BinOp1->getOpcode();
351 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
352 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
353 return false;
354
355 Constant *C1, *C2;
356 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
357 !match(BinOp2->getOperand(1), m_Constant(C2)))
358 return false;
359
360 // TODO: This assumes a zext cast.
361 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
362 // to the destination type might lose bits.
363
364 // Fold the constants together in the destination type:
365 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
366 const DataLayout &DL = IC.getDataLayout();
367 Type *DestTy = C1->getType();
368 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
369 if (!CastC2)
370 return false;
371 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
372 if (!FoldedC)
373 return false;
374
375 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
376 IC.replaceOperand(*BinOp1, 1, FoldedC);
378 Cast->dropPoisonGeneratingFlags();
379 return true;
380}
381
382// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
383// inttoptr ( ptrtoint (x) ) --> x
384Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
385 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
386 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
387 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
388 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
389 Type *CastTy = IntToPtr->getDestTy();
390 if (PtrToInt &&
391 CastTy->getPointerAddressSpace() ==
392 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
393 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
394 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
395 return PtrToInt->getOperand(0);
396 }
397 return nullptr;
398}
399
400/// This performs a few simplifications for operators that are associative or
401/// commutative:
402///
403/// Commutative operators:
404///
405/// 1. Order operands such that they are listed from right (least complex) to
406/// left (most complex). This puts constants before unary operators before
407/// binary operators.
408///
409/// Associative operators:
410///
411/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
412/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
413///
414/// Associative and commutative operators:
415///
416/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
417/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
418/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
419/// if C1 and C2 are constants.
421 Instruction::BinaryOps Opcode = I.getOpcode();
422 bool Changed = false;
423
424 do {
425 // Order operands such that they are listed from right (least complex) to
426 // left (most complex). This puts constants before unary operators before
427 // binary operators.
428 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
429 getComplexity(I.getOperand(1)))
430 Changed = !I.swapOperands();
431
432 if (I.isCommutative()) {
433 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
434 replaceOperand(I, 0, Pair->first);
435 replaceOperand(I, 1, Pair->second);
436 Changed = true;
437 }
438 }
439
440 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
441 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
442
443 if (I.isAssociative()) {
444 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
445 if (Op0 && Op0->getOpcode() == Opcode) {
446 Value *A = Op0->getOperand(0);
447 Value *B = Op0->getOperand(1);
448 Value *C = I.getOperand(1);
449
450 // Does "B op C" simplify?
451 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
452 // It simplifies to V. Form "A op V".
453 replaceOperand(I, 0, A);
454 replaceOperand(I, 1, V);
455 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
456 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
457
458 // Conservatively clear all optional flags since they may not be
459 // preserved by the reassociation. Reset nsw/nuw based on the above
460 // analysis.
462
463 // Note: this is only valid because SimplifyBinOp doesn't look at
464 // the operands to Op0.
465 if (IsNUW)
466 I.setHasNoUnsignedWrap(true);
467
468 if (IsNSW)
469 I.setHasNoSignedWrap(true);
470
471 Changed = true;
472 ++NumReassoc;
473 continue;
474 }
475 }
476
477 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
478 if (Op1 && Op1->getOpcode() == Opcode) {
479 Value *A = I.getOperand(0);
480 Value *B = Op1->getOperand(0);
481 Value *C = Op1->getOperand(1);
482
483 // Does "A op B" simplify?
484 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
485 // It simplifies to V. Form "V op C".
486 replaceOperand(I, 0, V);
487 replaceOperand(I, 1, C);
488 // Conservatively clear the optional flags, since they may not be
489 // preserved by the reassociation.
491 Changed = true;
492 ++NumReassoc;
493 continue;
494 }
495 }
496 }
497
498 if (I.isAssociative() && I.isCommutative()) {
499 if (simplifyAssocCastAssoc(&I, *this)) {
500 Changed = true;
501 ++NumReassoc;
502 continue;
503 }
504
505 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
506 if (Op0 && Op0->getOpcode() == Opcode) {
507 Value *A = Op0->getOperand(0);
508 Value *B = Op0->getOperand(1);
509 Value *C = I.getOperand(1);
510
511 // Does "C op A" simplify?
512 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
513 // It simplifies to V. Form "V op B".
514 replaceOperand(I, 0, V);
515 replaceOperand(I, 1, B);
516 // Conservatively clear the optional flags, since they may not be
517 // preserved by the reassociation.
519 Changed = true;
520 ++NumReassoc;
521 continue;
522 }
523 }
524
525 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
526 if (Op1 && Op1->getOpcode() == Opcode) {
527 Value *A = I.getOperand(0);
528 Value *B = Op1->getOperand(0);
529 Value *C = Op1->getOperand(1);
530
531 // Does "C op A" simplify?
532 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
533 // It simplifies to V. Form "B op V".
534 replaceOperand(I, 0, B);
535 replaceOperand(I, 1, V);
536 // Conservatively clear the optional flags, since they may not be
537 // preserved by the reassociation.
539 Changed = true;
540 ++NumReassoc;
541 continue;
542 }
543 }
544
545 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
546 // if C1 and C2 are constants.
547 Value *A, *B;
548 Constant *C1, *C2, *CRes;
549 if (Op0 && Op1 &&
550 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
551 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
552 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
553 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
554 bool IsNUW = hasNoUnsignedWrap(I) &&
555 hasNoUnsignedWrap(*Op0) &&
556 hasNoUnsignedWrap(*Op1);
557 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
558 BinaryOperator::CreateNUW(Opcode, A, B) :
559 BinaryOperator::Create(Opcode, A, B);
560
561 if (isa<FPMathOperator>(NewBO)) {
562 FastMathFlags Flags = I.getFastMathFlags() &
563 Op0->getFastMathFlags() &
564 Op1->getFastMathFlags();
565 NewBO->setFastMathFlags(Flags);
566 }
567 InsertNewInstWith(NewBO, I.getIterator());
568 NewBO->takeName(Op1);
569 replaceOperand(I, 0, NewBO);
570 replaceOperand(I, 1, CRes);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
574 if (IsNUW)
575 I.setHasNoUnsignedWrap(true);
576
577 Changed = true;
578 continue;
579 }
580 }
581
582 // No further simplifications.
583 return Changed;
584 } while (true);
585}
586
587/// Return whether "X LOp (Y ROp Z)" is always equal to
588/// "(X LOp Y) ROp (X LOp Z)".
591 // X & (Y | Z) <--> (X & Y) | (X & Z)
592 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
593 if (LOp == Instruction::And)
594 return ROp == Instruction::Or || ROp == Instruction::Xor;
595
596 // X | (Y & Z) <--> (X | Y) & (X | Z)
597 if (LOp == Instruction::Or)
598 return ROp == Instruction::And;
599
600 // X * (Y + Z) <--> (X * Y) + (X * Z)
601 // X * (Y - Z) <--> (X * Y) - (X * Z)
602 if (LOp == Instruction::Mul)
603 return ROp == Instruction::Add || ROp == Instruction::Sub;
604
605 return false;
606}
607
608/// Return whether "(X LOp Y) ROp Z" is always equal to
609/// "(X ROp Z) LOp (Y ROp Z)".
613 return leftDistributesOverRight(ROp, LOp);
614
615 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
617
618 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
619 // but this requires knowing that the addition does not overflow and other
620 // such subtleties.
621}
622
623/// This function returns identity value for given opcode, which can be used to
624/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
626 if (isa<Constant>(V))
627 return nullptr;
628
629 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
630}
631
632/// This function predicates factorization using distributive laws. By default,
633/// it just returns the 'Op' inputs. But for special-cases like
634/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
635/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
636/// allow more factorization opportunities.
639 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
640 assert(Op && "Expected a binary operator");
641 LHS = Op->getOperand(0);
642 RHS = Op->getOperand(1);
643 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
644 Constant *C;
645 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
646 // X << C --> X * (1 << C)
648 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
649 assert(RHS && "Constant folding of immediate constants failed");
650 return Instruction::Mul;
651 }
652 // TODO: We can add other conversions e.g. shr => div etc.
653 }
654 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
655 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
657 // lshr nneg C, X --> ashr nneg C, X
658 return Instruction::AShr;
659 }
660 }
661 return Op->getOpcode();
662}
663
664/// This tries to simplify binary operations by factorizing out common terms
665/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
668 Instruction::BinaryOps InnerOpcode, Value *A,
669 Value *B, Value *C, Value *D) {
670 assert(A && B && C && D && "All values must be provided");
671
672 Value *V = nullptr;
673 Value *RetVal = nullptr;
674 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
675 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
676
677 // Does "X op' Y" always equal "Y op' X"?
678 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
679
680 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
681 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
682 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
683 // commutative case, "(A op' B) op (C op' A)"?
684 if (A == C || (InnerCommutative && A == D)) {
685 if (A != C)
686 std::swap(C, D);
687 // Consider forming "A op' (B op D)".
688 // If "B op D" simplifies then it can be formed with no cost.
689 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
690
691 // If "B op D" doesn't simplify then only go on if one of the existing
692 // operations "A op' B" and "C op' D" will be zapped as no longer used.
693 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
694 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
695 if (V)
696 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
697 }
698 }
699
700 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
701 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
702 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
703 // commutative case, "(A op' B) op (B op' D)"?
704 if (B == D || (InnerCommutative && B == C)) {
705 if (B != D)
706 std::swap(C, D);
707 // Consider forming "(A op C) op' B".
708 // If "A op C" simplifies then it can be formed with no cost.
709 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
710
711 // If "A op C" doesn't simplify then only go on if one of the existing
712 // operations "A op' B" and "C op' D" will be zapped as no longer used.
713 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
714 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
715 if (V)
716 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
717 }
718 }
719
720 if (!RetVal)
721 return nullptr;
722
723 ++NumFactor;
724 RetVal->takeName(&I);
725
726 // Try to add no-overflow flags to the final value.
727 if (isa<OverflowingBinaryOperator>(RetVal)) {
728 bool HasNSW = false;
729 bool HasNUW = false;
730 if (isa<OverflowingBinaryOperator>(&I)) {
731 HasNSW = I.hasNoSignedWrap();
732 HasNUW = I.hasNoUnsignedWrap();
733 }
734 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
735 HasNSW &= LOBO->hasNoSignedWrap();
736 HasNUW &= LOBO->hasNoUnsignedWrap();
737 }
738
739 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
740 HasNSW &= ROBO->hasNoSignedWrap();
741 HasNUW &= ROBO->hasNoUnsignedWrap();
742 }
743
744 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
745 // We can propagate 'nsw' if we know that
746 // %Y = mul nsw i16 %X, C
747 // %Z = add nsw i16 %Y, %X
748 // =>
749 // %Z = mul nsw i16 %X, C+1
750 //
751 // iff C+1 isn't INT_MIN
752 const APInt *CInt;
753 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
754 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
755
756 // nuw can be propagated with any constant or nuw value.
757 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
758 }
759 }
760 return RetVal;
761}
762
763// If `I` has one Const operand and the other matches `(ctpop (not x))`,
764// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
765// This is only useful is the new subtract can fold so we only handle the
766// following cases:
767// 1) (add/sub/disjoint_or C, (ctpop (not x))
768// -> (add/sub/disjoint_or C', (ctpop x))
769// 1) (cmp pred C, (ctpop (not x))
770// -> (cmp pred C', (ctpop x))
772 unsigned Opc = I->getOpcode();
773 unsigned ConstIdx = 1;
774 switch (Opc) {
775 default:
776 return nullptr;
777 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
778 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
779 // is constant.
780 case Instruction::Sub:
781 ConstIdx = 0;
782 break;
783 case Instruction::ICmp:
784 // Signed predicates aren't correct in some edge cases like for i2 types, as
785 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
786 // comparisons against it are simplfied to unsigned.
787 if (cast<ICmpInst>(I)->isSigned())
788 return nullptr;
789 break;
790 case Instruction::Or:
791 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
792 return nullptr;
793 [[fallthrough]];
794 case Instruction::Add:
795 break;
796 }
797
798 Value *Op;
799 // Find ctpop.
800 if (!match(I->getOperand(1 - ConstIdx),
801 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
802 return nullptr;
803
804 Constant *C;
805 // Check other operand is ImmConstant.
806 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
807 return nullptr;
808
809 Type *Ty = Op->getType();
810 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
811 // Need extra check for icmp. Note if this check is true, it generally means
812 // the icmp will simplify to true/false.
813 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
814 Constant *Cmp =
816 if (!Cmp || !Cmp->isZeroValue())
817 return nullptr;
818 }
819
820 // Check we can invert `(not x)` for free.
821 bool Consumes = false;
822 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
823 return nullptr;
824 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
825 assert(NotOp != nullptr &&
826 "Desync between isFreeToInvert and getFreelyInverted");
827
828 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
829
830 Value *R = nullptr;
831
832 // Do the transformation here to avoid potentially introducing an infinite
833 // loop.
834 switch (Opc) {
835 case Instruction::Sub:
836 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
837 break;
838 case Instruction::Or:
839 case Instruction::Add:
840 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
841 break;
842 case Instruction::ICmp:
843 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
844 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
845 break;
846 default:
847 llvm_unreachable("Unhandled Opcode");
848 }
849 assert(R != nullptr);
850 return replaceInstUsesWith(*I, R);
851}
852
853// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
854// IFF
855// 1) the logic_shifts match
856// 2) either both binops are binops and one is `and` or
857// BinOp1 is `and`
858// (logic_shift (inv_logic_shift C1, C), C) == C1 or
859//
860// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
861//
862// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
863// IFF
864// 1) the logic_shifts match
865// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
866//
867// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
868//
869// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
870// IFF
871// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
872// 2) Binop2 is `not`
873//
874// -> (arithmetic_shift Binop1((not X), Y), Amt)
875
877 const DataLayout &DL = I.getDataLayout();
878 auto IsValidBinOpc = [](unsigned Opc) {
879 switch (Opc) {
880 default:
881 return false;
882 case Instruction::And:
883 case Instruction::Or:
884 case Instruction::Xor:
885 case Instruction::Add:
886 // Skip Sub as we only match constant masks which will canonicalize to use
887 // add.
888 return true;
889 }
890 };
891
892 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
893 // constraints.
894 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
895 unsigned ShOpc) {
896 assert(ShOpc != Instruction::AShr);
897 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
898 ShOpc == Instruction::Shl;
899 };
900
901 auto GetInvShift = [](unsigned ShOpc) {
902 assert(ShOpc != Instruction::AShr);
903 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
904 };
905
906 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
907 unsigned ShOpc, Constant *CMask,
908 Constant *CShift) {
909 // If the BinOp1 is `and` we don't need to check the mask.
910 if (BinOpc1 == Instruction::And)
911 return true;
912
913 // For all other possible transfers we need complete distributable
914 // binop/shift (anything but `add` + `lshr`).
915 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
916 return false;
917
918 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
919 // vecs, otherwise the mask will be simplified and the following check will
920 // handle it).
921 if (BinOpc2 == Instruction::And)
922 return true;
923
924 // Otherwise, need mask that meets the below requirement.
925 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
926 Constant *MaskInvShift =
927 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
928 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
929 CMask;
930 };
931
932 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
933 Constant *CMask, *CShift;
934 Value *X, *Y, *ShiftedX, *Mask, *Shift;
935 if (!match(I.getOperand(ShOpnum),
936 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
937 return nullptr;
938 if (!match(I.getOperand(1 - ShOpnum),
939 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
940 return nullptr;
941
942 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
943 return nullptr;
944
945 // Make sure we are matching instruction shifts and not ConstantExpr
946 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
947 auto *IX = dyn_cast<Instruction>(ShiftedX);
948 if (!IY || !IX)
949 return nullptr;
950
951 // LHS and RHS need same shift opcode
952 unsigned ShOpc = IY->getOpcode();
953 if (ShOpc != IX->getOpcode())
954 return nullptr;
955
956 // Make sure binop is real instruction and not ConstantExpr
957 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
958 if (!BO2)
959 return nullptr;
960
961 unsigned BinOpc = BO2->getOpcode();
962 // Make sure we have valid binops.
963 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
964 return nullptr;
965
966 if (ShOpc == Instruction::AShr) {
967 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
968 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
969 Value *NotX = Builder.CreateNot(X);
970 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
972 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
973 }
974
975 return nullptr;
976 }
977
978 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
979 // distribute to drop the shift irrelevant of constants.
980 if (BinOpc == I.getOpcode() &&
981 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
982 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
983 Value *NewBinOp1 = Builder.CreateBinOp(
984 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
985 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
986 }
987
988 // Otherwise we can only distribute by constant shifting the mask, so
989 // ensure we have constants.
990 if (!match(Shift, m_ImmConstant(CShift)))
991 return nullptr;
992 if (!match(Mask, m_ImmConstant(CMask)))
993 return nullptr;
994
995 // Check if we can distribute the binops.
996 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
997 return nullptr;
998
999 Constant *NewCMask =
1000 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1001 Value *NewBinOp2 = Builder.CreateBinOp(
1002 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1003 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1004 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1005 NewBinOp1, CShift);
1006 };
1007
1008 if (Instruction *R = MatchBinOp(0))
1009 return R;
1010 return MatchBinOp(1);
1011}
1012
1013// (Binop (zext C), (select C, T, F))
1014// -> (select C, (binop 1, T), (binop 0, F))
1015//
1016// (Binop (sext C), (select C, T, F))
1017// -> (select C, (binop -1, T), (binop 0, F))
1018//
1019// Attempt to simplify binary operations into a select with folded args, when
1020// one operand of the binop is a select instruction and the other operand is a
1021// zext/sext extension, whose value is the select condition.
1024 // TODO: this simplification may be extended to any speculatable instruction,
1025 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1026 Instruction::BinaryOps Opc = I.getOpcode();
1027 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1028 Value *A, *CondVal, *TrueVal, *FalseVal;
1029 Value *CastOp;
1030
1031 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1032 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1033 A->getType()->getScalarSizeInBits() == 1 &&
1034 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1035 m_Value(FalseVal)));
1036 };
1037
1038 // Make sure one side of the binop is a select instruction, and the other is a
1039 // zero/sign extension operating on a i1.
1040 if (MatchSelectAndCast(LHS, RHS))
1041 CastOp = LHS;
1042 else if (MatchSelectAndCast(RHS, LHS))
1043 CastOp = RHS;
1044 else
1045 return nullptr;
1046
1047 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1048 bool IsCastOpRHS = (CastOp == RHS);
1049 bool IsZExt = isa<ZExtInst>(CastOp);
1050 Constant *C;
1051
1052 if (IsTrueArm) {
1053 C = Constant::getNullValue(V->getType());
1054 } else if (IsZExt) {
1055 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1056 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1057 } else {
1058 C = Constant::getAllOnesValue(V->getType());
1059 }
1060
1061 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1062 : Builder.CreateBinOp(Opc, C, V);
1063 };
1064
1065 // If the value used in the zext/sext is the select condition, or the negated
1066 // of the select condition, the binop can be simplified.
1067 if (CondVal == A) {
1068 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1069 return SelectInst::Create(CondVal, NewTrueVal,
1070 NewFoldedConst(true, FalseVal));
1071 }
1072
1073 if (match(A, m_Not(m_Specific(CondVal)))) {
1074 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1075 return SelectInst::Create(CondVal, NewTrueVal,
1076 NewFoldedConst(false, FalseVal));
1077 }
1078
1079 return nullptr;
1080}
1081
1083 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1084 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1085 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1086 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1087 Value *A, *B, *C, *D;
1088 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1089
1090 if (Op0)
1091 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1092 if (Op1)
1093 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1094
1095 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1096 // a common term.
1097 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1098 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1099 return V;
1100
1101 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1102 // term.
1103 if (Op0)
1104 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1105 if (Value *V =
1106 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1107 return V;
1108
1109 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1110 // term.
1111 if (Op1)
1112 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1113 if (Value *V =
1114 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1115 return V;
1116
1117 return nullptr;
1118}
1119
1120/// This tries to simplify binary operations which some other binary operation
1121/// distributes over either by factorizing out common terms
1122/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1123/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1124/// Returns the simplified value, or null if it didn't simplify.
1126 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1127 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1128 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1129 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1130
1131 // Factorization.
1132 if (Value *R = tryFactorizationFolds(I))
1133 return R;
1134
1135 // Expansion.
1136 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1137 // The instruction has the form "(A op' B) op C". See if expanding it out
1138 // to "(A op C) op' (B op C)" results in simplifications.
1139 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1140 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1141
1142 // Disable the use of undef because it's not safe to distribute undef.
1143 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1144 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1145 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1146
1147 // Do "A op C" and "B op C" both simplify?
1148 if (L && R) {
1149 // They do! Return "L op' R".
1150 ++NumExpand;
1151 C = Builder.CreateBinOp(InnerOpcode, L, R);
1152 C->takeName(&I);
1153 return C;
1154 }
1155
1156 // Does "A op C" simplify to the identity value for the inner opcode?
1157 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1158 // They do! Return "B op C".
1159 ++NumExpand;
1160 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1161 C->takeName(&I);
1162 return C;
1163 }
1164
1165 // Does "B op C" simplify to the identity value for the inner opcode?
1166 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1167 // They do! Return "A op C".
1168 ++NumExpand;
1169 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1170 C->takeName(&I);
1171 return C;
1172 }
1173 }
1174
1175 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1176 // The instruction has the form "A op (B op' C)". See if expanding it out
1177 // to "(A op B) op' (A op C)" results in simplifications.
1178 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1179 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1180
1181 // Disable the use of undef because it's not safe to distribute undef.
1182 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1183 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1184 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1185
1186 // Do "A op B" and "A op C" both simplify?
1187 if (L && R) {
1188 // They do! Return "L op' R".
1189 ++NumExpand;
1190 A = Builder.CreateBinOp(InnerOpcode, L, R);
1191 A->takeName(&I);
1192 return A;
1193 }
1194
1195 // Does "A op B" simplify to the identity value for the inner opcode?
1196 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1197 // They do! Return "A op C".
1198 ++NumExpand;
1199 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1200 A->takeName(&I);
1201 return A;
1202 }
1203
1204 // Does "A op C" simplify to the identity value for the inner opcode?
1205 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1206 // They do! Return "A op B".
1207 ++NumExpand;
1208 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1209 A->takeName(&I);
1210 return A;
1211 }
1212 }
1213
1215}
1216
1217static std::optional<std::pair<Value *, Value *>>
1219 if (LHS->getParent() != RHS->getParent())
1220 return std::nullopt;
1221
1222 if (LHS->getNumIncomingValues() < 2)
1223 return std::nullopt;
1224
1225 if (!equal(LHS->blocks(), RHS->blocks()))
1226 return std::nullopt;
1227
1228 Value *L0 = LHS->getIncomingValue(0);
1229 Value *R0 = RHS->getIncomingValue(0);
1230
1231 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1232 Value *L1 = LHS->getIncomingValue(I);
1233 Value *R1 = RHS->getIncomingValue(I);
1234
1235 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1236 continue;
1237
1238 return std::nullopt;
1239 }
1240
1241 return std::optional(std::pair(L0, R0));
1242}
1243
1244std::optional<std::pair<Value *, Value *>>
1245InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1246 Instruction *LHSInst = dyn_cast<Instruction>(LHS);
1247 Instruction *RHSInst = dyn_cast<Instruction>(RHS);
1248 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1249 return std::nullopt;
1250 switch (LHSInst->getOpcode()) {
1251 case Instruction::PHI:
1252 return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS));
1253 case Instruction::Select: {
1254 Value *Cond = LHSInst->getOperand(0);
1255 Value *TrueVal = LHSInst->getOperand(1);
1256 Value *FalseVal = LHSInst->getOperand(2);
1257 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1258 FalseVal == RHSInst->getOperand(1))
1259 return std::pair(TrueVal, FalseVal);
1260 return std::nullopt;
1261 }
1262 case Instruction::Call: {
1263 // Match min(a, b) and max(a, b)
1264 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1265 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1266 if (LHSMinMax && RHSMinMax &&
1267 LHSMinMax->getPredicate() ==
1269 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1270 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1271 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1272 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1273 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1274 return std::nullopt;
1275 }
1276 default:
1277 return std::nullopt;
1278 }
1279}
1280
1282 Value *LHS,
1283 Value *RHS) {
1284 Value *A, *B, *C, *D, *E, *F;
1285 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1286 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1287 if (!LHSIsSelect && !RHSIsSelect)
1288 return nullptr;
1289
1290 FastMathFlags FMF;
1292 if (isa<FPMathOperator>(&I)) {
1293 FMF = I.getFastMathFlags();
1295 }
1296
1297 Instruction::BinaryOps Opcode = I.getOpcode();
1299
1300 Value *Cond, *True = nullptr, *False = nullptr;
1301
1302 // Special-case for add/negate combination. Replace the zero in the negation
1303 // with the trailing add operand:
1304 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1305 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1306 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1307 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1308 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1309 return nullptr;
1310
1311 Value *N;
1312 if (True && match(FVal, m_Neg(m_Value(N)))) {
1313 Value *Sub = Builder.CreateSub(Z, N);
1314 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1315 }
1316 if (False && match(TVal, m_Neg(m_Value(N)))) {
1317 Value *Sub = Builder.CreateSub(Z, N);
1318 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1319 }
1320 return nullptr;
1321 };
1322
1323 if (LHSIsSelect && RHSIsSelect && A == D) {
1324 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1325 Cond = A;
1326 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1327 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1328
1329 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1330 if (False && !True)
1331 True = Builder.CreateBinOp(Opcode, B, E);
1332 else if (True && !False)
1333 False = Builder.CreateBinOp(Opcode, C, F);
1334 }
1335 } else if (LHSIsSelect && LHS->hasOneUse()) {
1336 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1337 Cond = A;
1338 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1339 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1340 if (Value *NewSel = foldAddNegate(B, C, RHS))
1341 return NewSel;
1342 } else if (RHSIsSelect && RHS->hasOneUse()) {
1343 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1344 Cond = D;
1345 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1346 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1347 if (Value *NewSel = foldAddNegate(E, F, LHS))
1348 return NewSel;
1349 }
1350
1351 if (!True || !False)
1352 return nullptr;
1353
1354 Value *SI = Builder.CreateSelect(Cond, True, False);
1355 SI->takeName(&I);
1356 return SI;
1357}
1358
1359/// Freely adapt every user of V as-if V was changed to !V.
1360/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1362 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1363 for (User *U : make_early_inc_range(I->users())) {
1364 if (U == IgnoredUser)
1365 continue; // Don't consider this user.
1366 switch (cast<Instruction>(U)->getOpcode()) {
1367 case Instruction::Select: {
1368 auto *SI = cast<SelectInst>(U);
1369 SI->swapValues();
1370 SI->swapProfMetadata();
1371 break;
1372 }
1373 case Instruction::Br: {
1374 BranchInst *BI = cast<BranchInst>(U);
1375 BI->swapSuccessors(); // swaps prof metadata too
1376 if (BPI)
1378 break;
1379 }
1380 case Instruction::Xor:
1381 replaceInstUsesWith(cast<Instruction>(*U), I);
1382 // Add to worklist for DCE.
1383 addToWorklist(cast<Instruction>(U));
1384 break;
1385 default:
1386 llvm_unreachable("Got unexpected user - out of sync with "
1387 "canFreelyInvertAllUsersOf() ?");
1388 }
1389 }
1390}
1391
1392/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1393/// constant zero (which is the 'negate' form).
1394Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1395 Value *NegV;
1396 if (match(V, m_Neg(m_Value(NegV))))
1397 return NegV;
1398
1399 // Constants can be considered to be negated values if they can be folded.
1400 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1401 return ConstantExpr::getNeg(C);
1402
1403 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1404 if (C->getType()->getElementType()->isIntegerTy())
1405 return ConstantExpr::getNeg(C);
1406
1407 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1408 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1409 Constant *Elt = CV->getAggregateElement(i);
1410 if (!Elt)
1411 return nullptr;
1412
1413 if (isa<UndefValue>(Elt))
1414 continue;
1415
1416 if (!isa<ConstantInt>(Elt))
1417 return nullptr;
1418 }
1419 return ConstantExpr::getNeg(CV);
1420 }
1421
1422 // Negate integer vector splats.
1423 if (auto *CV = dyn_cast<Constant>(V))
1424 if (CV->getType()->isVectorTy() &&
1425 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1426 return ConstantExpr::getNeg(CV);
1427
1428 return nullptr;
1429}
1430
1431// Try to fold:
1432// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1433// -> ({s|u}itofp (int_binop x, y))
1434// 2) (fp_binop ({s|u}itofp x), FpC)
1435// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1436//
1437// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1438Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1439 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1441
1442 Type *FPTy = BO.getType();
1443 Type *IntTy = IntOps[0]->getType();
1444
1445 unsigned IntSz = IntTy->getScalarSizeInBits();
1446 // This is the maximum number of inuse bits by the integer where the int -> fp
1447 // casts are exact.
1448 unsigned MaxRepresentableBits =
1450
1451 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1452 // checks later on.
1453 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1454
1455 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1456 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1457 auto IsNonZero = [&](unsigned OpNo) -> bool {
1458 if (OpsKnown[OpNo].hasKnownBits() &&
1459 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1460 return true;
1461 return isKnownNonZero(IntOps[OpNo], SQ);
1462 };
1463
1464 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1465 // NB: This matches the impl in ValueTracking, we just try to use cached
1466 // knownbits here. If we ever start supporting WithCache for
1467 // `isKnownNonNegative`, change this to an explicit call.
1468 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1469 };
1470
1471 // Check if we know for certain that ({s|u}itofp op) is exact.
1472 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1473 // Can we treat this operand as the desired sign?
1474 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1475 !IsNonNeg(OpNo))
1476 return false;
1477
1478 // If fp precision >= bitwidth(op) then its exact.
1479 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1480 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1481 // handled specially. We can't, however, increase the bound arbitrarily for
1482 // `sitofp` as for larger sizes, it won't sign extend.
1483 if (MaxRepresentableBits < IntSz) {
1484 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1485 // numSignBits(op).
1486 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1487 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1488 if (OpsFromSigned)
1489 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1490 // Finally for unsigned check that fp precision >= bitwidth(op) -
1491 // numLeadingZeros(op).
1492 else {
1493 NumUsedLeadingBits[OpNo] =
1494 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1495 }
1496 }
1497 // NB: We could also check if op is known to be a power of 2 or zero (which
1498 // will always be representable). Its unlikely, however, that is we are
1499 // unable to bound op in any way we will be able to pass the overflow checks
1500 // later on.
1501
1502 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1503 return false;
1504 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1505 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1506 IsNonZero(OpNo);
1507 };
1508
1509 // If we have a constant rhs, see if we can losslessly convert it to an int.
1510 if (Op1FpC != nullptr) {
1511 // Signed + Mul req non-zero
1512 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1513 !match(Op1FpC, m_NonZeroFP()))
1514 return nullptr;
1515
1517 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1518 IntTy, DL);
1519 if (Op1IntC == nullptr)
1520 return nullptr;
1521 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1522 : Instruction::UIToFP,
1523 Op1IntC, FPTy, DL) != Op1FpC)
1524 return nullptr;
1525
1526 // First try to keep sign of cast the same.
1527 IntOps[1] = Op1IntC;
1528 }
1529
1530 // Ensure lhs/rhs integer types match.
1531 if (IntTy != IntOps[1]->getType())
1532 return nullptr;
1533
1534 if (Op1FpC == nullptr) {
1535 if (!IsValidPromotion(1))
1536 return nullptr;
1537 }
1538 if (!IsValidPromotion(0))
1539 return nullptr;
1540
1541 // Final we check if the integer version of the binop will not overflow.
1543 // Because of the precision check, we can often rule out overflows.
1544 bool NeedsOverflowCheck = true;
1545 // Try to conservatively rule out overflow based on the already done precision
1546 // checks.
1547 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1548 unsigned OverflowMaxCurBits =
1549 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1550 bool OutputSigned = OpsFromSigned;
1551 switch (BO.getOpcode()) {
1552 case Instruction::FAdd:
1553 IntOpc = Instruction::Add;
1554 OverflowMaxOutputBits += OverflowMaxCurBits;
1555 break;
1556 case Instruction::FSub:
1557 IntOpc = Instruction::Sub;
1558 OverflowMaxOutputBits += OverflowMaxCurBits;
1559 break;
1560 case Instruction::FMul:
1561 IntOpc = Instruction::Mul;
1562 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1563 break;
1564 default:
1565 llvm_unreachable("Unsupported binop");
1566 }
1567 // The precision check may have already ruled out overflow.
1568 if (OverflowMaxOutputBits < IntSz) {
1569 NeedsOverflowCheck = false;
1570 // We can bound unsigned overflow from sub to in range signed value (this is
1571 // what allows us to avoid the overflow check for sub).
1572 if (IntOpc == Instruction::Sub)
1573 OutputSigned = true;
1574 }
1575
1576 // Precision check did not rule out overflow, so need to check.
1577 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1578 // `IntOps[...]` arguments to `KnownOps[...]`.
1579 if (NeedsOverflowCheck &&
1580 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1581 return nullptr;
1582
1583 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1584 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1585 IntBO->setHasNoSignedWrap(OutputSigned);
1586 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1587 }
1588 if (OutputSigned)
1589 return new SIToFPInst(IntBinOp, FPTy);
1590 return new UIToFPInst(IntBinOp, FPTy);
1591}
1592
1593// Try to fold:
1594// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1595// -> ({s|u}itofp (int_binop x, y))
1596// 2) (fp_binop ({s|u}itofp x), FpC)
1597// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1598Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1599 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1600 Constant *Op1FpC = nullptr;
1601 // Check for:
1602 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1603 // 2) (binop ({s|u}itofp x), FpC)
1604 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1605 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1606 return nullptr;
1607
1608 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1609 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1610 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1611 return nullptr;
1612
1613 // Cache KnownBits a bit to potentially save some analysis.
1614 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1615
1616 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1617 // different constraints depending on the sign of the cast.
1618 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1619 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1620 IntOps, Op1FpC, OpsKnown))
1621 return R;
1622 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1623 Op1FpC, OpsKnown);
1624}
1625
1626/// A binop with a constant operand and a sign-extended boolean operand may be
1627/// converted into a select of constants by applying the binary operation to
1628/// the constant with the two possible values of the extended boolean (0 or -1).
1629Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1630 // TODO: Handle non-commutative binop (constant is operand 0).
1631 // TODO: Handle zext.
1632 // TODO: Peek through 'not' of cast.
1633 Value *BO0 = BO.getOperand(0);
1634 Value *BO1 = BO.getOperand(1);
1635 Value *X;
1636 Constant *C;
1637 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1638 !X->getType()->isIntOrIntVectorTy(1))
1639 return nullptr;
1640
1641 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1644 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1645 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1646 return SelectInst::Create(X, TVal, FVal);
1647}
1648
1650 SelectInst *SI,
1651 bool IsTrueArm) {
1652 SmallVector<Constant *> ConstOps;
1653 for (Value *Op : I.operands()) {
1654 CmpInst::Predicate Pred;
1655 Constant *C = nullptr;
1656 if (Op == SI) {
1657 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1658 : SI->getFalseValue());
1659 } else if (match(SI->getCondition(),
1660 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1661 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1663 // Pass
1664 } else {
1665 C = dyn_cast<Constant>(Op);
1666 }
1667 if (C == nullptr)
1668 return nullptr;
1669
1670 ConstOps.push_back(C);
1671 }
1672
1673 return ConstantFoldInstOperands(&I, ConstOps, I.getDataLayout());
1674}
1675
1677 Value *NewOp, InstCombiner &IC) {
1678 Instruction *Clone = I.clone();
1679 Clone->replaceUsesOfWith(SI, NewOp);
1681 IC.InsertNewInstBefore(Clone, SI->getIterator());
1682 return Clone;
1683}
1684
1686 bool FoldWithMultiUse) {
1687 // Don't modify shared select instructions unless set FoldWithMultiUse
1688 if (!SI->hasOneUse() && !FoldWithMultiUse)
1689 return nullptr;
1690
1691 Value *TV = SI->getTrueValue();
1692 Value *FV = SI->getFalseValue();
1693 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1694 return nullptr;
1695
1696 // Bool selects with constant operands can be folded to logical ops.
1697 if (SI->getType()->isIntOrIntVectorTy(1))
1698 return nullptr;
1699
1700 // Test if a FCmpInst instruction is used exclusively by a select as
1701 // part of a minimum or maximum operation. If so, refrain from doing
1702 // any other folding. This helps out other analyses which understand
1703 // non-obfuscated minimum and maximum idioms. And in this case, at
1704 // least one of the comparison operands has at least one user besides
1705 // the compare (the select), which would often largely negate the
1706 // benefit of folding anyway.
1707 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1708 if (CI->hasOneUse()) {
1709 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1710 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1711 return nullptr;
1712 }
1713 }
1714
1715 // Make sure that one of the select arms constant folds successfully.
1716 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1717 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1718 if (!NewTV && !NewFV)
1719 return nullptr;
1720
1721 // Create an instruction for the arm that did not fold.
1722 if (!NewTV)
1723 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1724 if (!NewFV)
1725 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1726 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1727}
1728
1730 Value *InValue, BasicBlock *InBB,
1731 const DataLayout &DL,
1732 const SimplifyQuery SQ) {
1733 // NB: It is a precondition of this transform that the operands be
1734 // phi translatable! This is usually trivially satisfied by limiting it
1735 // to constant ops, and for selects we do a more sophisticated check.
1737 for (Value *Op : I.operands()) {
1738 if (Op == PN)
1739 Ops.push_back(InValue);
1740 else
1741 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1742 }
1743
1744 // Don't consider the simplification successful if we get back a constant
1745 // expression. That's just an instruction in hiding.
1746 // Also reject the case where we simplify back to the phi node. We wouldn't
1747 // be able to remove it in that case.
1749 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1750 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1751 return NewVal;
1752
1753 // Check if incoming PHI value can be replaced with constant
1754 // based on implied condition.
1755 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1756 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1757 if (TerminatorBI && TerminatorBI->isConditional() &&
1758 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1759 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1760 std::optional<bool> ImpliedCond =
1761 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1762 Ops[0], Ops[1], DL, LHSIsTrue);
1763 if (ImpliedCond)
1764 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1765 }
1766
1767 return nullptr;
1768}
1769
1771 unsigned NumPHIValues = PN->getNumIncomingValues();
1772 if (NumPHIValues == 0)
1773 return nullptr;
1774
1775 // We normally only transform phis with a single use. However, if a PHI has
1776 // multiple uses and they are all the same operation, we can fold *all* of the
1777 // uses into the PHI.
1778 if (!PN->hasOneUse()) {
1779 // Walk the use list for the instruction, comparing them to I.
1780 for (User *U : PN->users()) {
1781 Instruction *UI = cast<Instruction>(U);
1782 if (UI != &I && !I.isIdenticalTo(UI))
1783 return nullptr;
1784 }
1785 // Otherwise, we can replace *all* users with the new PHI we form.
1786 }
1787
1788 // Check to see whether the instruction can be folded into each phi operand.
1789 // If there is one operand that does not fold, remember the BB it is in.
1790 // If there is more than one or if *it* is a PHI, bail out.
1791 SmallVector<Value *> NewPhiValues;
1792 BasicBlock *NonSimplifiedBB = nullptr;
1793 Value *NonSimplifiedInVal = nullptr;
1794 for (unsigned i = 0; i != NumPHIValues; ++i) {
1795 Value *InVal = PN->getIncomingValue(i);
1796 BasicBlock *InBB = PN->getIncomingBlock(i);
1797
1798 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1799 NewPhiValues.push_back(NewVal);
1800 continue;
1801 }
1802
1803 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1804
1805 NonSimplifiedBB = InBB;
1806 NonSimplifiedInVal = InVal;
1807 NewPhiValues.push_back(nullptr);
1808
1809 // If the InVal is an invoke at the end of the pred block, then we can't
1810 // insert a computation after it without breaking the edge.
1811 if (isa<InvokeInst>(InVal))
1812 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1813 return nullptr;
1814
1815 // If the incoming non-constant value is reachable from the phis block,
1816 // we'll push the operation across a loop backedge. This could result in
1817 // an infinite combine loop, and is generally non-profitable (especially
1818 // if the operation was originally outside the loop).
1819 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1820 LI))
1821 return nullptr;
1822 }
1823
1824 // If there is exactly one non-simplified value, we can insert a copy of the
1825 // operation in that block. However, if this is a critical edge, we would be
1826 // inserting the computation on some other paths (e.g. inside a loop). Only
1827 // do this if the pred block is unconditionally branching into the phi block.
1828 // Also, make sure that the pred block is not dead code.
1829 if (NonSimplifiedBB != nullptr) {
1830 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1831 if (!BI || !BI->isUnconditional() ||
1832 !DT.isReachableFromEntry(NonSimplifiedBB))
1833 return nullptr;
1834 }
1835
1836 // Okay, we can do the transformation: create the new PHI node.
1837 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1838 InsertNewInstBefore(NewPN, PN->getIterator());
1839 NewPN->takeName(PN);
1840 NewPN->setDebugLoc(PN->getDebugLoc());
1841
1842 // If we are going to have to insert a new computation, do so right before the
1843 // predecessor's terminator.
1844 Instruction *Clone = nullptr;
1845 if (NonSimplifiedBB) {
1846 Clone = I.clone();
1847 for (Use &U : Clone->operands()) {
1848 if (U == PN)
1849 U = NonSimplifiedInVal;
1850 else
1851 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1852 }
1853 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1854 }
1855
1856 for (unsigned i = 0; i != NumPHIValues; ++i) {
1857 if (NewPhiValues[i])
1858 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1859 else
1860 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1861 }
1862
1863 for (User *U : make_early_inc_range(PN->users())) {
1864 Instruction *User = cast<Instruction>(U);
1865 if (User == &I) continue;
1866 replaceInstUsesWith(*User, NewPN);
1868 }
1869
1870 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1871 const_cast<PHINode &>(*NewPN),
1872 const_cast<PHINode &>(*PN), DT);
1873 return replaceInstUsesWith(I, NewPN);
1874}
1875
1877 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1878 // we are guarding against replicating the binop in >1 predecessor.
1879 // This could miss matching a phi with 2 constant incoming values.
1880 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1881 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1882 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1883 Phi0->getNumOperands() != Phi1->getNumOperands())
1884 return nullptr;
1885
1886 // TODO: Remove the restriction for binop being in the same block as the phis.
1887 if (BO.getParent() != Phi0->getParent() ||
1888 BO.getParent() != Phi1->getParent())
1889 return nullptr;
1890
1891 // Fold if there is at least one specific constant value in phi0 or phi1's
1892 // incoming values that comes from the same block and this specific constant
1893 // value can be used to do optimization for specific binary operator.
1894 // For example:
1895 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1896 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1897 // %add = add i32 %phi0, %phi1
1898 // ==>
1899 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1901 /*AllowRHSConstant*/ false);
1902 if (C) {
1903 SmallVector<Value *, 4> NewIncomingValues;
1904 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1905 auto &Phi0Use = std::get<0>(T);
1906 auto &Phi1Use = std::get<1>(T);
1907 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1908 return false;
1909 Value *Phi0UseV = Phi0Use.get();
1910 Value *Phi1UseV = Phi1Use.get();
1911 if (Phi0UseV == C)
1912 NewIncomingValues.push_back(Phi1UseV);
1913 else if (Phi1UseV == C)
1914 NewIncomingValues.push_back(Phi0UseV);
1915 else
1916 return false;
1917 return true;
1918 };
1919
1920 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1921 CanFoldIncomingValuePair)) {
1922 PHINode *NewPhi =
1923 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1924 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1925 "The number of collected incoming values should equal the number "
1926 "of the original PHINode operands!");
1927 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1928 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1929 return NewPhi;
1930 }
1931 }
1932
1933 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1934 return nullptr;
1935
1936 // Match a pair of incoming constants for one of the predecessor blocks.
1937 BasicBlock *ConstBB, *OtherBB;
1938 Constant *C0, *C1;
1939 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1940 ConstBB = Phi0->getIncomingBlock(0);
1941 OtherBB = Phi0->getIncomingBlock(1);
1942 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1943 ConstBB = Phi0->getIncomingBlock(1);
1944 OtherBB = Phi0->getIncomingBlock(0);
1945 } else {
1946 return nullptr;
1947 }
1948 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1949 return nullptr;
1950
1951 // The block that we are hoisting to must reach here unconditionally.
1952 // Otherwise, we could be speculatively executing an expensive or
1953 // non-speculative op.
1954 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1955 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1956 !DT.isReachableFromEntry(OtherBB))
1957 return nullptr;
1958
1959 // TODO: This check could be tightened to only apply to binops (div/rem) that
1960 // are not safe to speculatively execute. But that could allow hoisting
1961 // potentially expensive instructions (fdiv for example).
1962 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1964 return nullptr;
1965
1966 // Fold constants for the predecessor block with constant incoming values.
1967 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1968 if (!NewC)
1969 return nullptr;
1970
1971 // Make a new binop in the predecessor block with the non-constant incoming
1972 // values.
1973 Builder.SetInsertPoint(PredBlockBranch);
1974 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1975 Phi0->getIncomingValueForBlock(OtherBB),
1976 Phi1->getIncomingValueForBlock(OtherBB));
1977 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1978 NotFoldedNewBO->copyIRFlags(&BO);
1979
1980 // Replace the binop with a phi of the new values. The old phis are dead.
1981 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1982 NewPhi->addIncoming(NewBO, OtherBB);
1983 NewPhi->addIncoming(NewC, ConstBB);
1984 return NewPhi;
1985}
1986
1988 if (!isa<Constant>(I.getOperand(1)))
1989 return nullptr;
1990
1991 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1992 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1993 return NewSel;
1994 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1995 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1996 return NewPhi;
1997 }
1998 return nullptr;
1999}
2000
2002 // If this GEP has only 0 indices, it is the same pointer as
2003 // Src. If Src is not a trivial GEP too, don't combine
2004 // the indices.
2005 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2006 !Src.hasOneUse())
2007 return false;
2008 return true;
2009}
2010
2012 if (!isa<VectorType>(Inst.getType()))
2013 return nullptr;
2014
2015 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2016 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2017 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2018 cast<VectorType>(Inst.getType())->getElementCount());
2019 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2020 cast<VectorType>(Inst.getType())->getElementCount());
2021
2022 // If both operands of the binop are vector concatenations, then perform the
2023 // narrow binop on each pair of the source operands followed by concatenation
2024 // of the results.
2025 Value *L0, *L1, *R0, *R1;
2026 ArrayRef<int> Mask;
2027 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2028 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2029 LHS->hasOneUse() && RHS->hasOneUse() &&
2030 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2031 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2032 // This transform does not have the speculative execution constraint as
2033 // below because the shuffle is a concatenation. The new binops are
2034 // operating on exactly the same elements as the existing binop.
2035 // TODO: We could ease the mask requirement to allow different undef lanes,
2036 // but that requires an analysis of the binop-with-undef output value.
2037 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2038 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2039 BO->copyIRFlags(&Inst);
2040 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2041 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2042 BO->copyIRFlags(&Inst);
2043 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2044 }
2045
2046 auto createBinOpReverse = [&](Value *X, Value *Y) {
2047 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2048 if (auto *BO = dyn_cast<BinaryOperator>(V))
2049 BO->copyIRFlags(&Inst);
2050 Module *M = Inst.getModule();
2051 Function *F =
2052 Intrinsic::getDeclaration(M, Intrinsic::vector_reverse, V->getType());
2053 return CallInst::Create(F, V);
2054 };
2055
2056 // NOTE: Reverse shuffles don't require the speculative execution protection
2057 // below because they don't affect which lanes take part in the computation.
2058
2059 Value *V1, *V2;
2060 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2061 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2062 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2063 (LHS->hasOneUse() || RHS->hasOneUse() ||
2064 (LHS == RHS && LHS->hasNUses(2))))
2065 return createBinOpReverse(V1, V2);
2066
2067 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2068 if (LHS->hasOneUse() && isSplatValue(RHS))
2069 return createBinOpReverse(V1, RHS);
2070 }
2071 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2072 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2073 return createBinOpReverse(LHS, V2);
2074
2075 // It may not be safe to reorder shuffles and things like div, urem, etc.
2076 // because we may trap when executing those ops on unknown vector elements.
2077 // See PR20059.
2078 if (!isSafeToSpeculativelyExecute(&Inst))
2079 return nullptr;
2080
2081 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2082 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2083 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2084 BO->copyIRFlags(&Inst);
2085 return new ShuffleVectorInst(XY, M);
2086 };
2087
2088 // If both arguments of the binary operation are shuffles that use the same
2089 // mask and shuffle within a single vector, move the shuffle after the binop.
2090 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2091 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2092 V1->getType() == V2->getType() &&
2093 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2094 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2095 return createBinOpShuffle(V1, V2, Mask);
2096 }
2097
2098 // If both arguments of a commutative binop are select-shuffles that use the
2099 // same mask with commuted operands, the shuffles are unnecessary.
2100 if (Inst.isCommutative() &&
2101 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2102 match(RHS,
2103 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2104 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2105 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2106 // TODO: Allow shuffles that contain undefs in the mask?
2107 // That is legal, but it reduces undef knowledge.
2108 // TODO: Allow arbitrary shuffles by shuffling after binop?
2109 // That might be legal, but we have to deal with poison.
2110 if (LShuf->isSelect() &&
2111 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2112 RShuf->isSelect() &&
2113 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2114 // Example:
2115 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2116 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2117 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2118 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2119 NewBO->copyIRFlags(&Inst);
2120 return NewBO;
2121 }
2122 }
2123
2124 // If one argument is a shuffle within one vector and the other is a constant,
2125 // try moving the shuffle after the binary operation. This canonicalization
2126 // intends to move shuffles closer to other shuffles and binops closer to
2127 // other binops, so they can be folded. It may also enable demanded elements
2128 // transforms.
2129 Constant *C;
2130 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
2131 if (InstVTy &&
2133 m_Mask(Mask))),
2134 m_ImmConstant(C))) &&
2135 cast<FixedVectorType>(V1->getType())->getNumElements() <=
2136 InstVTy->getNumElements()) {
2137 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2138 "Shuffle should not change scalar type");
2139
2140 // Find constant NewC that has property:
2141 // shuffle(NewC, ShMask) = C
2142 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2143 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2144 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2145 bool ConstOp1 = isa<Constant>(RHS);
2146 ArrayRef<int> ShMask = Mask;
2147 unsigned SrcVecNumElts =
2148 cast<FixedVectorType>(V1->getType())->getNumElements();
2149 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2150 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2151 bool MayChange = true;
2152 unsigned NumElts = InstVTy->getNumElements();
2153 for (unsigned I = 0; I < NumElts; ++I) {
2154 Constant *CElt = C->getAggregateElement(I);
2155 if (ShMask[I] >= 0) {
2156 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2157 Constant *NewCElt = NewVecC[ShMask[I]];
2158 // Bail out if:
2159 // 1. The constant vector contains a constant expression.
2160 // 2. The shuffle needs an element of the constant vector that can't
2161 // be mapped to a new constant vector.
2162 // 3. This is a widening shuffle that copies elements of V1 into the
2163 // extended elements (extending with poison is allowed).
2164 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2165 I >= SrcVecNumElts) {
2166 MayChange = false;
2167 break;
2168 }
2169 NewVecC[ShMask[I]] = CElt;
2170 }
2171 // If this is a widening shuffle, we must be able to extend with poison
2172 // elements. If the original binop does not produce a poison in the high
2173 // lanes, then this transform is not safe.
2174 // Similarly for poison lanes due to the shuffle mask, we can only
2175 // transform binops that preserve poison.
2176 // TODO: We could shuffle those non-poison constant values into the
2177 // result by using a constant vector (rather than an poison vector)
2178 // as operand 1 of the new binop, but that might be too aggressive
2179 // for target-independent shuffle creation.
2180 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2181 Constant *MaybePoison =
2182 ConstOp1
2183 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
2184 : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
2185 if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
2186 MayChange = false;
2187 break;
2188 }
2189 }
2190 }
2191 if (MayChange) {
2192 Constant *NewC = ConstantVector::get(NewVecC);
2193 // It may not be safe to execute a binop on a vector with poison elements
2194 // because the entire instruction can be folded to undef or create poison
2195 // that did not exist in the original code.
2196 // TODO: The shift case should not be necessary.
2197 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2198 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2199
2200 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2201 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2202 Value *NewLHS = ConstOp1 ? V1 : NewC;
2203 Value *NewRHS = ConstOp1 ? NewC : V1;
2204 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2205 }
2206 }
2207
2208 // Try to reassociate to sink a splat shuffle after a binary operation.
2209 if (Inst.isAssociative() && Inst.isCommutative()) {
2210 // Canonicalize shuffle operand as LHS.
2211 if (isa<ShuffleVectorInst>(RHS))
2212 std::swap(LHS, RHS);
2213
2214 Value *X;
2215 ArrayRef<int> MaskC;
2216 int SplatIndex;
2217 Value *Y, *OtherOp;
2218 if (!match(LHS,
2219 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2220 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2221 X->getType() != Inst.getType() ||
2222 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2223 return nullptr;
2224
2225 // FIXME: This may not be safe if the analysis allows undef elements. By
2226 // moving 'Y' before the splat shuffle, we are implicitly assuming
2227 // that it is not undef/poison at the splat index.
2228 if (isSplatValue(OtherOp, SplatIndex)) {
2229 std::swap(Y, OtherOp);
2230 } else if (!isSplatValue(Y, SplatIndex)) {
2231 return nullptr;
2232 }
2233
2234 // X and Y are splatted values, so perform the binary operation on those
2235 // values followed by a splat followed by the 2nd binary operation:
2236 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2237 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2238 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2239 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2240 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2241
2242 // Intersect FMF on both new binops. Other (poison-generating) flags are
2243 // dropped to be safe.
2244 if (isa<FPMathOperator>(R)) {
2245 R->copyFastMathFlags(&Inst);
2246 R->andIRFlags(RHS);
2247 }
2248 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2249 NewInstBO->copyIRFlags(R);
2250 return R;
2251 }
2252
2253 return nullptr;
2254}
2255
2256/// Try to narrow the width of a binop if at least 1 operand is an extend of
2257/// of a value. This requires a potentially expensive known bits check to make
2258/// sure the narrow op does not overflow.
2259Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2260 // We need at least one extended operand.
2261 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2262
2263 // If this is a sub, we swap the operands since we always want an extension
2264 // on the RHS. The LHS can be an extension or a constant.
2265 if (BO.getOpcode() == Instruction::Sub)
2266 std::swap(Op0, Op1);
2267
2268 Value *X;
2269 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2270 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2271 return nullptr;
2272
2273 // If both operands are the same extension from the same source type and we
2274 // can eliminate at least one (hasOneUse), this might work.
2275 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2276 Value *Y;
2277 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2278 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2279 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2280 // If that did not match, see if we have a suitable constant operand.
2281 // Truncating and extending must produce the same constant.
2282 Constant *WideC;
2283 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2284 return nullptr;
2285 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
2286 if (!NarrowC)
2287 return nullptr;
2288 Y = NarrowC;
2289 }
2290
2291 // Swap back now that we found our operands.
2292 if (BO.getOpcode() == Instruction::Sub)
2293 std::swap(X, Y);
2294
2295 // Both operands have narrow versions. Last step: the math must not overflow
2296 // in the narrow width.
2297 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2298 return nullptr;
2299
2300 // bo (ext X), (ext Y) --> ext (bo X, Y)
2301 // bo (ext X), C --> ext (bo X, C')
2302 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2303 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2304 if (IsSext)
2305 NewBinOp->setHasNoSignedWrap();
2306 else
2307 NewBinOp->setHasNoUnsignedWrap();
2308 }
2309 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2310}
2311
2313 return GEP1.isInBounds() && GEP2.isInBounds();
2314}
2315
2316/// Thread a GEP operation with constant indices through the constant true/false
2317/// arms of a select.
2319 InstCombiner::BuilderTy &Builder) {
2320 if (!GEP.hasAllConstantIndices())
2321 return nullptr;
2322
2323 Instruction *Sel;
2324 Value *Cond;
2325 Constant *TrueC, *FalseC;
2326 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2327 !match(Sel,
2328 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2329 return nullptr;
2330
2331 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2332 // Propagate 'inbounds' and metadata from existing instructions.
2333 // Note: using IRBuilder to create the constants for efficiency.
2334 SmallVector<Value *, 4> IndexC(GEP.indices());
2335 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2336 Type *Ty = GEP.getSourceElementType();
2337 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2338 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2339 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2340}
2341
2342// Canonicalization:
2343// gep T, (gep i8, base, C1), (Index + C2) into
2344// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2346 GEPOperator *Src,
2347 InstCombinerImpl &IC) {
2348 if (GEP.getNumIndices() != 1)
2349 return nullptr;
2350 auto &DL = IC.getDataLayout();
2351 Value *Base;
2352 const APInt *C1;
2353 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2354 return nullptr;
2355 Value *VarIndex;
2356 const APInt *C2;
2357 Type *PtrTy = Src->getType()->getScalarType();
2358 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2359 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2360 return nullptr;
2361 if (C1->getBitWidth() != IndexSizeInBits ||
2362 C2->getBitWidth() != IndexSizeInBits)
2363 return nullptr;
2364 Type *BaseType = GEP.getSourceElementType();
2365 if (isa<ScalableVectorType>(BaseType))
2366 return nullptr;
2367 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2368 APInt NewOffset = TypeSize * *C2 + *C1;
2369 if (NewOffset.isZero() ||
2370 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2371 Value *GEPConst =
2372 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset));
2373 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
2374 }
2375
2376 return nullptr;
2377}
2378
2380 GEPOperator *Src) {
2381 // Combine Indices - If the source pointer to this getelementptr instruction
2382 // is a getelementptr instruction with matching element type, combine the
2383 // indices of the two getelementptr instructions into a single instruction.
2384 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2385 return nullptr;
2386
2387 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2388 return I;
2389
2390 // For constant GEPs, use a more general offset-based folding approach.
2391 Type *PtrTy = Src->getType()->getScalarType();
2392 if (GEP.hasAllConstantIndices() &&
2393 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2394 // Split Src into a variable part and a constant suffix.
2396 Type *BaseType = GTI.getIndexedType();
2397 bool IsFirstType = true;
2398 unsigned NumVarIndices = 0;
2399 for (auto Pair : enumerate(Src->indices())) {
2400 if (!isa<ConstantInt>(Pair.value())) {
2401 BaseType = GTI.getIndexedType();
2402 IsFirstType = false;
2403 NumVarIndices = Pair.index() + 1;
2404 }
2405 ++GTI;
2406 }
2407
2408 // Determine the offset for the constant suffix of Src.
2410 if (NumVarIndices != Src->getNumIndices()) {
2411 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2412 if (BaseType->isScalableTy())
2413 return nullptr;
2414
2415 SmallVector<Value *> ConstantIndices;
2416 if (!IsFirstType)
2417 ConstantIndices.push_back(
2419 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2420 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2421 }
2422
2423 // Add the offset for GEP (which is fully constant).
2424 if (!GEP.accumulateConstantOffset(DL, Offset))
2425 return nullptr;
2426
2427 APInt OffsetOld = Offset;
2428 // Convert the total offset back into indices.
2429 SmallVector<APInt> ConstIndices =
2431 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2432 // If both GEP are constant-indexed, and cannot be merged in either way,
2433 // convert them to a GEP of i8.
2434 if (Src->hasAllConstantIndices())
2435 return replaceInstUsesWith(
2437 Builder.getInt8Ty(), Src->getOperand(0),
2438 Builder.getInt(OffsetOld), "",
2439 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2440 return nullptr;
2441 }
2442
2443 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2444 SmallVector<Value *> Indices;
2445 append_range(Indices, drop_end(Src->indices(),
2446 Src->getNumIndices() - NumVarIndices));
2447 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2448 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2449 // Even if the total offset is inbounds, we may end up representing it
2450 // by first performing a larger negative offset, and then a smaller
2451 // positive one. The large negative offset might go out of bounds. Only
2452 // preserve inbounds if all signs are the same.
2453 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2454 }
2455
2456 return replaceInstUsesWith(
2457 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2458 Indices, "", IsInBounds));
2459 }
2460
2461 if (Src->getResultElementType() != GEP.getSourceElementType())
2462 return nullptr;
2463
2464 SmallVector<Value*, 8> Indices;
2465
2466 // Find out whether the last index in the source GEP is a sequential idx.
2467 bool EndsWithSequential = false;
2468 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2469 I != E; ++I)
2470 EndsWithSequential = I.isSequential();
2471
2472 // Can we combine the two pointer arithmetics offsets?
2473 if (EndsWithSequential) {
2474 // Replace: gep (gep %P, long B), long A, ...
2475 // With: T = long A+B; gep %P, T, ...
2476 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2477 Value *GO1 = GEP.getOperand(1);
2478
2479 // If they aren't the same type, then the input hasn't been processed
2480 // by the loop above yet (which canonicalizes sequential index types to
2481 // intptr_t). Just avoid transforming this until the input has been
2482 // normalized.
2483 if (SO1->getType() != GO1->getType())
2484 return nullptr;
2485
2486 Value *Sum =
2487 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2488 // Only do the combine when we are sure the cost after the
2489 // merge is never more than that before the merge.
2490 if (Sum == nullptr)
2491 return nullptr;
2492
2493 // Update the GEP in place if possible.
2494 if (Src->getNumOperands() == 2) {
2495 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2496 replaceOperand(GEP, 0, Src->getOperand(0));
2497 replaceOperand(GEP, 1, Sum);
2498 return &GEP;
2499 }
2500 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2501 Indices.push_back(Sum);
2502 Indices.append(GEP.op_begin()+2, GEP.op_end());
2503 } else if (isa<Constant>(*GEP.idx_begin()) &&
2504 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2505 Src->getNumOperands() != 1) {
2506 // Otherwise we can do the fold if the first index of the GEP is a zero
2507 Indices.append(Src->op_begin()+1, Src->op_end());
2508 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2509 }
2510
2511 if (!Indices.empty())
2512 return replaceInstUsesWith(
2514 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2515 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2516
2517 return nullptr;
2518}
2519
2521 BuilderTy *Builder,
2522 bool &DoesConsume, unsigned Depth) {
2523 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2524 // ~(~(X)) -> X.
2525 Value *A, *B;
2526 if (match(V, m_Not(m_Value(A)))) {
2527 DoesConsume = true;
2528 return A;
2529 }
2530
2531 Constant *C;
2532 // Constants can be considered to be not'ed values.
2533 if (match(V, m_ImmConstant(C)))
2534 return ConstantExpr::getNot(C);
2535
2537 return nullptr;
2538
2539 // The rest of the cases require that we invert all uses so don't bother
2540 // doing the analysis if we know we can't use the result.
2541 if (!WillInvertAllUses)
2542 return nullptr;
2543
2544 // Compares can be inverted if all of their uses are being modified to use
2545 // the ~V.
2546 if (auto *I = dyn_cast<CmpInst>(V)) {
2547 if (Builder != nullptr)
2548 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2549 I->getOperand(1));
2550 return NonNull;
2551 }
2552
2553 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2554 // `(-1 - B) - A` if we are willing to invert all of the uses.
2555 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2556 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2557 DoesConsume, Depth))
2558 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2559 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2560 DoesConsume, Depth))
2561 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2562 return nullptr;
2563 }
2564
2565 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2566 // into `A ^ B` if we are willing to invert all of the uses.
2567 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2568 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2569 DoesConsume, Depth))
2570 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2571 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2572 DoesConsume, Depth))
2573 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2574 return nullptr;
2575 }
2576
2577 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2578 // `A + (-1 - B)` if we are willing to invert all of the uses.
2579 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2580 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2581 DoesConsume, Depth))
2582 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2583 return nullptr;
2584 }
2585
2586 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2587 // into `A s>> B` if we are willing to invert all of the uses.
2588 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2589 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2590 DoesConsume, Depth))
2591 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2592 return nullptr;
2593 }
2594
2595 Value *Cond;
2596 // LogicOps are special in that we canonicalize them at the cost of an
2597 // instruction.
2598 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2599 !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
2600 // Selects/min/max with invertible operands are freely invertible
2601 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2602 bool LocalDoesConsume = DoesConsume;
2603 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2604 LocalDoesConsume, Depth))
2605 return nullptr;
2606 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2607 LocalDoesConsume, Depth)) {
2608 DoesConsume = LocalDoesConsume;
2609 if (Builder != nullptr) {
2610 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2611 DoesConsume, Depth);
2612 assert(NotB != nullptr &&
2613 "Unable to build inverted value for known freely invertable op");
2614 if (auto *II = dyn_cast<IntrinsicInst>(V))
2616 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2617 return Builder->CreateSelect(Cond, NotA, NotB);
2618 }
2619 return NonNull;
2620 }
2621 }
2622
2623 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2624 bool LocalDoesConsume = DoesConsume;
2626 for (Use &U : PN->operands()) {
2627 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2628 Value *NewIncomingVal = getFreelyInvertedImpl(
2629 U.get(), /*WillInvertAllUses=*/false,
2630 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2631 if (NewIncomingVal == nullptr)
2632 return nullptr;
2633 // Make sure that we can safely erase the original PHI node.
2634 if (NewIncomingVal == V)
2635 return nullptr;
2636 if (Builder != nullptr)
2637 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2638 }
2639
2640 DoesConsume = LocalDoesConsume;
2641 if (Builder != nullptr) {
2644 PHINode *NewPN =
2645 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2646 for (auto [Val, Pred] : IncomingValues)
2647 NewPN->addIncoming(Val, Pred);
2648 return NewPN;
2649 }
2650 return NonNull;
2651 }
2652
2653 if (match(V, m_SExtLike(m_Value(A)))) {
2654 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2655 DoesConsume, Depth))
2656 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2657 return nullptr;
2658 }
2659
2660 if (match(V, m_Trunc(m_Value(A)))) {
2661 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2662 DoesConsume, Depth))
2663 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2664 return nullptr;
2665 }
2666
2667 // De Morgan's Laws:
2668 // (~(A | B)) -> (~A & ~B)
2669 // (~(A & B)) -> (~A | ~B)
2670 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2671 bool IsLogical, Value *A,
2672 Value *B) -> Value * {
2673 bool LocalDoesConsume = DoesConsume;
2674 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2675 LocalDoesConsume, Depth))
2676 return nullptr;
2677 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2678 LocalDoesConsume, Depth)) {
2679 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2680 LocalDoesConsume, Depth);
2681 DoesConsume = LocalDoesConsume;
2682 if (IsLogical)
2683 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2684 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2685 }
2686
2687 return nullptr;
2688 };
2689
2690 if (match(V, m_Or(m_Value(A), m_Value(B))))
2691 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2692 B);
2693
2694 if (match(V, m_And(m_Value(A), m_Value(B))))
2695 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2696 B);
2697
2698 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2699 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2700 B);
2701
2702 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2703 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2704 B);
2705
2706 return nullptr;
2707}
2708
2710 Value *PtrOp = GEP.getOperand(0);
2711 SmallVector<Value *, 8> Indices(GEP.indices());
2712 Type *GEPType = GEP.getType();
2713 Type *GEPEltType = GEP.getSourceElementType();
2714 if (Value *V =
2715 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
2717 return replaceInstUsesWith(GEP, V);
2718
2719 // For vector geps, use the generic demanded vector support.
2720 // Skip if GEP return type is scalable. The number of elements is unknown at
2721 // compile-time.
2722 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2723 auto VWidth = GEPFVTy->getNumElements();
2724 APInt PoisonElts(VWidth, 0);
2725 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2726 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2727 PoisonElts)) {
2728 if (V != &GEP)
2729 return replaceInstUsesWith(GEP, V);
2730 return &GEP;
2731 }
2732
2733 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2734 // possible (decide on canonical form for pointer broadcast), 3) exploit
2735 // undef elements to decrease demanded bits
2736 }
2737
2738 // Eliminate unneeded casts for indices, and replace indices which displace
2739 // by multiples of a zero size type with zero.
2740 bool MadeChange = false;
2741
2742 // Index width may not be the same width as pointer width.
2743 // Data layout chooses the right type based on supported integer types.
2744 Type *NewScalarIndexTy =
2745 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2746
2748 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2749 ++I, ++GTI) {
2750 // Skip indices into struct types.
2751 if (GTI.isStruct())
2752 continue;
2753
2754 Type *IndexTy = (*I)->getType();
2755 Type *NewIndexType =
2756 IndexTy->isVectorTy()
2757 ? VectorType::get(NewScalarIndexTy,
2758 cast<VectorType>(IndexTy)->getElementCount())
2759 : NewScalarIndexTy;
2760
2761 // If the element type has zero size then any index over it is equivalent
2762 // to an index of zero, so replace it with zero if it is not zero already.
2763 Type *EltTy = GTI.getIndexedType();
2764 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2765 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2766 *I = Constant::getNullValue(NewIndexType);
2767 MadeChange = true;
2768 }
2769
2770 if (IndexTy != NewIndexType) {
2771 // If we are using a wider index than needed for this platform, shrink
2772 // it to what we need. If narrower, sign-extend it to what we need.
2773 // This explicit cast can make subsequent optimizations more obvious.
2774 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2775 MadeChange = true;
2776 }
2777 }
2778 if (MadeChange)
2779 return &GEP;
2780
2781 // Canonicalize constant GEPs to i8 type.
2782 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2784 if (GEP.accumulateConstantOffset(DL, Offset))
2785 return replaceInstUsesWith(
2787 GEP.getNoWrapFlags()));
2788 }
2789
2790 // Canonicalize
2791 // - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2792 // This has better support in BasicAA.
2793 // - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
2794 // multiplies together.
2795 if (GEPEltType->isScalableTy() ||
2796 (!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2797 match(GEP.getOperand(1),
2799 m_Shl(m_Value(), m_ConstantInt())))))) {
2800 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
2801 return replaceInstUsesWith(
2802 GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
2803 }
2804
2805 // Check to see if the inputs to the PHI node are getelementptr instructions.
2806 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2807 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2808 if (!Op1)
2809 return nullptr;
2810
2811 // Don't fold a GEP into itself through a PHI node. This can only happen
2812 // through the back-edge of a loop. Folding a GEP into itself means that
2813 // the value of the previous iteration needs to be stored in the meantime,
2814 // thus requiring an additional register variable to be live, but not
2815 // actually achieving anything (the GEP still needs to be executed once per
2816 // loop iteration).
2817 if (Op1 == &GEP)
2818 return nullptr;
2819
2820 int DI = -1;
2821
2822 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2823 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2824 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2825 Op1->getSourceElementType() != Op2->getSourceElementType())
2826 return nullptr;
2827
2828 // As for Op1 above, don't try to fold a GEP into itself.
2829 if (Op2 == &GEP)
2830 return nullptr;
2831
2832 // Keep track of the type as we walk the GEP.
2833 Type *CurTy = nullptr;
2834
2835 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2836 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2837 return nullptr;
2838
2839 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2840 if (DI == -1) {
2841 // We have not seen any differences yet in the GEPs feeding the
2842 // PHI yet, so we record this one if it is allowed to be a
2843 // variable.
2844
2845 // The first two arguments can vary for any GEP, the rest have to be
2846 // static for struct slots
2847 if (J > 1) {
2848 assert(CurTy && "No current type?");
2849 if (CurTy->isStructTy())
2850 return nullptr;
2851 }
2852
2853 DI = J;
2854 } else {
2855 // The GEP is different by more than one input. While this could be
2856 // extended to support GEPs that vary by more than one variable it
2857 // doesn't make sense since it greatly increases the complexity and
2858 // would result in an R+R+R addressing mode which no backend
2859 // directly supports and would need to be broken into several
2860 // simpler instructions anyway.
2861 return nullptr;
2862 }
2863 }
2864
2865 // Sink down a layer of the type for the next iteration.
2866 if (J > 0) {
2867 if (J == 1) {
2868 CurTy = Op1->getSourceElementType();
2869 } else {
2870 CurTy =
2871 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2872 }
2873 }
2874 }
2875 }
2876
2877 // If not all GEPs are identical we'll have to create a new PHI node.
2878 // Check that the old PHI node has only one use so that it will get
2879 // removed.
2880 if (DI != -1 && !PN->hasOneUse())
2881 return nullptr;
2882
2883 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2884 if (DI == -1) {
2885 // All the GEPs feeding the PHI are identical. Clone one down into our
2886 // BB so that it can be merged with the current GEP.
2887 } else {
2888 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2889 // into the current block so it can be merged, and create a new PHI to
2890 // set that index.
2891 PHINode *NewPN;
2892 {
2895 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2896 PN->getNumOperands());
2897 }
2898
2899 for (auto &I : PN->operands())
2900 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2901 PN->getIncomingBlock(I));
2902
2903 NewGEP->setOperand(DI, NewPN);
2904 }
2905
2906 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2907 return replaceOperand(GEP, 0, NewGEP);
2908 }
2909
2910 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2911 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2912 return I;
2913
2914 if (GEP.getNumIndices() == 1) {
2915 unsigned AS = GEP.getPointerAddressSpace();
2916 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2917 DL.getIndexSizeInBits(AS)) {
2918 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2919
2920 if (TyAllocSize == 1) {
2921 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2922 // but only if the result pointer is only used as if it were an integer,
2923 // or both point to the same underlying object (otherwise provenance is
2924 // not necessarily retained).
2925 Value *X = GEP.getPointerOperand();
2926 Value *Y;
2927 if (match(GEP.getOperand(1),
2929 GEPType == Y->getType()) {
2930 bool HasSameUnderlyingObject =
2932 bool Changed = false;
2933 GEP.replaceUsesWithIf(Y, [&](Use &U) {
2934 bool ShouldReplace = HasSameUnderlyingObject ||
2935 isa<ICmpInst>(U.getUser()) ||
2936 isa<PtrToIntInst>(U.getUser());
2937 Changed |= ShouldReplace;
2938 return ShouldReplace;
2939 });
2940 return Changed ? &GEP : nullptr;
2941 }
2942 } else if (auto *ExactIns =
2943 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
2944 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2945 Value *V;
2946 if (ExactIns->isExact()) {
2947 if ((has_single_bit(TyAllocSize) &&
2948 match(GEP.getOperand(1),
2949 m_Shr(m_Value(V),
2950 m_SpecificInt(countr_zero(TyAllocSize))))) ||
2951 match(GEP.getOperand(1),
2952 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
2954 GEP.getPointerOperand(), V,
2955 GEP.getNoWrapFlags());
2956 }
2957 }
2958 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
2959 // Try to canonicalize non-i8 element type to i8 if the index is an
2960 // exact instruction. If the index is an exact instruction (div/shr)
2961 // with a constant RHS, we can fold the non-i8 element scale into the
2962 // div/shr (similiar to the mul case, just inverted).
2963 const APInt *C;
2964 std::optional<APInt> NewC;
2965 if (has_single_bit(TyAllocSize) &&
2966 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
2967 C->uge(countr_zero(TyAllocSize)))
2968 NewC = *C - countr_zero(TyAllocSize);
2969 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
2970 APInt Quot;
2971 uint64_t Rem;
2972 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
2973 if (Rem == 0)
2974 NewC = Quot;
2975 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
2976 APInt Quot;
2977 int64_t Rem;
2978 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
2979 // For sdiv we need to make sure we arent creating INT_MIN / -1.
2980 if (!Quot.isAllOnes() && Rem == 0)
2981 NewC = Quot;
2982 }
2983
2984 if (NewC.has_value()) {
2985 Value *NewOp = Builder.CreateBinOp(
2986 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
2987 ConstantInt::get(V->getType(), *NewC));
2988 cast<BinaryOperator>(NewOp)->setIsExact();
2990 GEP.getPointerOperand(), NewOp,
2991 GEP.getNoWrapFlags());
2992 }
2993 }
2994 }
2995 }
2996 }
2997 // We do not handle pointer-vector geps here.
2998 if (GEPType->isVectorTy())
2999 return nullptr;
3000
3001 if (GEP.getNumIndices() == 1) {
3002 // We can only preserve inbounds if the original gep is inbounds, the add
3003 // is nsw, and the add operands are non-negative.
3004 auto CanPreserveInBounds = [&](bool AddIsNSW, Value *Idx1, Value *Idx2) {
3006 return GEP.isInBounds() && AddIsNSW && isKnownNonNegative(Idx1, Q) &&
3007 isKnownNonNegative(Idx2, Q);
3008 };
3009
3010 // Try to replace ADD + GEP with GEP + GEP.
3011 Value *Idx1, *Idx2;
3012 if (match(GEP.getOperand(1),
3013 m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
3014 // %idx = add i64 %idx1, %idx2
3015 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3016 // as:
3017 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3018 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3019 bool IsInBounds = CanPreserveInBounds(
3020 cast<OverflowingBinaryOperator>(GEP.getOperand(1))->hasNoSignedWrap(),
3021 Idx1, Idx2);
3022 auto *NewPtr =
3023 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3024 Idx1, "", IsInBounds);
3025 return replaceInstUsesWith(
3026 GEP, Builder.CreateGEP(GEP.getSourceElementType(), NewPtr, Idx2, "",
3027 IsInBounds));
3028 }
3029 ConstantInt *C;
3030 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
3031 m_Value(Idx1), m_ConstantInt(C))))))) {
3032 // %add = add nsw i32 %idx1, idx2
3033 // %sidx = sext i32 %add to i64
3034 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3035 // as:
3036 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3037 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3038 bool IsInBounds = CanPreserveInBounds(
3039 /*IsNSW=*/true, Idx1, C);
3040 auto *NewPtr = Builder.CreateGEP(
3041 GEP.getSourceElementType(), GEP.getPointerOperand(),
3042 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "",
3043 IsInBounds);
3044 return replaceInstUsesWith(
3045 GEP,
3046 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3047 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3048 "", IsInBounds));
3049 }
3050 }
3051
3052 if (!GEP.isInBounds()) {
3053 unsigned IdxWidth =
3055 APInt BasePtrOffset(IdxWidth, 0);
3056 Value *UnderlyingPtrOp =
3058 BasePtrOffset);
3059 bool CanBeNull, CanBeFreed;
3060 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3061 DL, CanBeNull, CanBeFreed);
3062 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3063 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3064 BasePtrOffset.isNonNegative()) {
3065 APInt AllocSize(IdxWidth, DerefBytes);
3066 if (BasePtrOffset.ule(AllocSize)) {
3068 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3069 }
3070 }
3071 }
3072 }
3073
3075 return R;
3076
3077 return nullptr;
3078}
3079
3081 Instruction *AI) {
3082 if (isa<ConstantPointerNull>(V))
3083 return true;
3084 if (auto *LI = dyn_cast<LoadInst>(V))
3085 return isa<GlobalVariable>(LI->getPointerOperand());
3086 // Two distinct allocations will never be equal.
3087 return isAllocLikeFn(V, &TLI) && V != AI;
3088}
3089
3090/// Given a call CB which uses an address UsedV, return true if we can prove the
3091/// call's only possible effect is storing to V.
3092static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3093 const TargetLibraryInfo &TLI) {
3094 if (!CB.use_empty())
3095 // TODO: add recursion if returned attribute is present
3096 return false;
3097
3098 if (CB.isTerminator())
3099 // TODO: remove implementation restriction
3100 return false;
3101
3102 if (!CB.willReturn() || !CB.doesNotThrow())
3103 return false;
3104
3105 // If the only possible side effect of the call is writing to the alloca,
3106 // and the result isn't used, we can safely remove any reads implied by the
3107 // call including those which might read the alloca itself.
3108 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3109 return Dest && Dest->Ptr == UsedV;
3110}
3111
3114 const TargetLibraryInfo &TLI) {
3116 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3117 Worklist.push_back(AI);
3118
3119 do {
3120 Instruction *PI = Worklist.pop_back_val();
3121 for (User *U : PI->users()) {
3122 Instruction *I = cast<Instruction>(U);
3123 switch (I->getOpcode()) {
3124 default:
3125 // Give up the moment we see something we can't handle.
3126 return false;
3127
3128 case Instruction::AddrSpaceCast:
3129 case Instruction::BitCast:
3130 case Instruction::GetElementPtr:
3131 Users.emplace_back(I);
3132 Worklist.push_back(I);
3133 continue;
3134
3135 case Instruction::ICmp: {
3136 ICmpInst *ICI = cast<ICmpInst>(I);
3137 // We can fold eq/ne comparisons with null to false/true, respectively.
3138 // We also fold comparisons in some conditions provided the alloc has
3139 // not escaped (see isNeverEqualToUnescapedAlloc).
3140 if (!ICI->isEquality())
3141 return false;
3142 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3143 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3144 return false;
3145
3146 // Do not fold compares to aligned_alloc calls, as they may have to
3147 // return null in case the required alignment cannot be satisfied,
3148 // unless we can prove that both alignment and size are valid.
3149 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3150 // Check if alignment and size of a call to aligned_alloc is valid,
3151 // that is alignment is a power-of-2 and the size is a multiple of the
3152 // alignment.
3153 const APInt *Alignment;
3154 const APInt *Size;
3155 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3156 match(CB->getArgOperand(1), m_APInt(Size)) &&
3157 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3158 };
3159 auto *CB = dyn_cast<CallBase>(AI);
3160 LibFunc TheLibFunc;
3161 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3162 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3163 !AlignmentAndSizeKnownValid(CB))
3164 return false;
3165 Users.emplace_back(I);
3166 continue;
3167 }
3168
3169 case Instruction::Call:
3170 // Ignore no-op and store intrinsics.
3171 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3172 switch (II->getIntrinsicID()) {
3173 default:
3174 return false;
3175
3176 case Intrinsic::memmove:
3177 case Intrinsic::memcpy:
3178 case Intrinsic::memset: {
3179 MemIntrinsic *MI = cast<MemIntrinsic>(II);
3180 if (MI->isVolatile() || MI->getRawDest() != PI)
3181 return false;
3182 [[fallthrough]];
3183 }
3184 case Intrinsic::assume:
3185 case Intrinsic::invariant_start:
3186 case Intrinsic::invariant_end:
3187 case Intrinsic::lifetime_start:
3188 case Intrinsic::lifetime_end:
3189 case Intrinsic::objectsize:
3190 Users.emplace_back(I);
3191 continue;
3192 case Intrinsic::launder_invariant_group:
3193 case Intrinsic::strip_invariant_group:
3194 Users.emplace_back(I);
3195 Worklist.push_back(I);
3196 continue;
3197 }
3198 }
3199
3200 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3201 Users.emplace_back(I);
3202 continue;
3203 }
3204
3205 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3206 getAllocationFamily(I, &TLI) == Family) {
3207 assert(Family);
3208 Users.emplace_back(I);
3209 continue;
3210 }
3211
3212 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
3213 getAllocationFamily(I, &TLI) == Family) {
3214 assert(Family);
3215 Users.emplace_back(I);
3216 Worklist.push_back(I);
3217 continue;
3218 }
3219
3220 return false;
3221
3222 case Instruction::Store: {
3223 StoreInst *SI = cast<StoreInst>(I);
3224 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3225 return false;
3226 Users.emplace_back(I);
3227 continue;
3228 }
3229 }
3230 llvm_unreachable("missing a return?");
3231 }
3232 } while (!Worklist.empty());
3233 return true;
3234}
3235
3237 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3238
3239 // If we have a malloc call which is only used in any amount of comparisons to
3240 // null and free calls, delete the calls and replace the comparisons with true
3241 // or false as appropriate.
3242
3243 // This is based on the principle that we can substitute our own allocation
3244 // function (which will never return null) rather than knowledge of the
3245 // specific function being called. In some sense this can change the permitted
3246 // outputs of a program (when we convert a malloc to an alloca, the fact that
3247 // the allocation is now on the stack is potentially visible, for example),
3248 // but we believe in a permissible manner.
3250
3251 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3252 // before each store.
3255 std::unique_ptr<DIBuilder> DIB;
3256 if (isa<AllocaInst>(MI)) {
3257 findDbgUsers(DVIs, &MI, &DVRs);
3258 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3259 }
3260
3261 if (isAllocSiteRemovable(&MI, Users, TLI)) {
3262 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3263 // Lowering all @llvm.objectsize calls first because they may
3264 // use a bitcast/GEP of the alloca we are removing.
3265 if (!Users[i])
3266 continue;
3267
3268 Instruction *I = cast<Instruction>(&*Users[i]);
3269
3270 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3271 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3272 SmallVector<Instruction *> InsertedInstructions;
3273 Value *Result = lowerObjectSizeCall(
3274 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3275 for (Instruction *Inserted : InsertedInstructions)
3276 Worklist.add(Inserted);
3277 replaceInstUsesWith(*I, Result);
3279 Users[i] = nullptr; // Skip examining in the next loop.
3280 }
3281 }
3282 }
3283 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3284 if (!Users[i])
3285 continue;
3286
3287 Instruction *I = cast<Instruction>(&*Users[i]);
3288
3289 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3291 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3292 C->isFalseWhenEqual()));
3293 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3294 for (auto *DVI : DVIs)
3295 if (DVI->isAddressOfVariable())
3296 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
3297 for (auto *DVR : DVRs)
3298 if (DVR->isAddressOfVariable())
3299 ConvertDebugDeclareToDebugValue(DVR, SI, *DIB);
3300 } else {
3301 // Casts, GEP, or anything else: we're about to delete this instruction,
3302 // so it can not have any valid uses.
3303 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
3304 }
3306 }
3307
3308 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
3309 // Replace invoke with a NOP intrinsic to maintain the original CFG
3310 Module *M = II->getModule();
3311 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
3312 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
3313 std::nullopt, "", II->getParent());
3314 }
3315
3316 // Remove debug intrinsics which describe the value contained within the
3317 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3318 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3319 //
3320 // ```
3321 // define void @foo(i32 %0) {
3322 // %a = alloca i32 ; Deleted.
3323 // store i32 %0, i32* %a
3324 // dbg.value(i32 %0, "arg0") ; Not deleted.
3325 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3326 // call void @trivially_inlinable_no_op(i32* %a)
3327 // ret void
3328 // }
3329 // ```
3330 //
3331 // This may not be required if we stop describing the contents of allocas
3332 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3333 // the LowerDbgDeclare utility.
3334 //
3335 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3336 // "arg0" dbg.value may be stale after the call. However, failing to remove
3337 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3338 //
3339 // FIXME: the Assignment Tracking project has now likely made this
3340 // redundant (and it's sometimes harmful).
3341 for (auto *DVI : DVIs)
3342 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3343 DVI->eraseFromParent();
3344 for (auto *DVR : DVRs)
3345 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3346 DVR->eraseFromParent();
3347
3348 return eraseInstFromFunction(MI);
3349 }
3350 return nullptr;
3351}
3352
3353/// Move the call to free before a NULL test.
3354///
3355/// Check if this free is accessed after its argument has been test
3356/// against NULL (property 0).
3357/// If yes, it is legal to move this call in its predecessor block.
3358///
3359/// The move is performed only if the block containing the call to free
3360/// will be removed, i.e.:
3361/// 1. it has only one predecessor P, and P has two successors
3362/// 2. it contains the call, noops, and an unconditional branch
3363/// 3. its successor is the same as its predecessor's successor
3364///
3365/// The profitability is out-of concern here and this function should
3366/// be called only if the caller knows this transformation would be
3367/// profitable (e.g., for code size).
3369 const DataLayout &DL) {
3370 Value *Op = FI.getArgOperand(0);
3371 BasicBlock *FreeInstrBB = FI.getParent();
3372 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3373
3374 // Validate part of constraint #1: Only one predecessor
3375 // FIXME: We can extend the number of predecessor, but in that case, we
3376 // would duplicate the call to free in each predecessor and it may
3377 // not be profitable even for code size.
3378 if (!PredBB)
3379 return nullptr;
3380
3381 // Validate constraint #2: Does this block contains only the call to
3382 // free, noops, and an unconditional branch?
3383 BasicBlock *SuccBB;
3384 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3385 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3386 return nullptr;
3387
3388 // If there are only 2 instructions in the block, at this point,
3389 // this is the call to free and unconditional.
3390 // If there are more than 2 instructions, check that they are noops
3391 // i.e., they won't hurt the performance of the generated code.
3392 if (FreeInstrBB->size() != 2) {
3393 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3394 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3395 continue;
3396 auto *Cast = dyn_cast<CastInst>(&Inst);
3397 if (!Cast || !Cast->isNoopCast(DL))
3398 return nullptr;
3399 }
3400 }
3401 // Validate the rest of constraint #1 by matching on the pred branch.
3402 Instruction *TI = PredBB->getTerminator();
3403 BasicBlock *TrueBB, *FalseBB;
3405 if (!match(TI, m_Br(m_ICmp(Pred,
3407 m_Specific(Op->stripPointerCasts())),
3408 m_Zero()),
3409 TrueBB, FalseBB)))
3410 return nullptr;
3411 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3412 return nullptr;
3413
3414 // Validate constraint #3: Ensure the null case just falls through.
3415 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3416 return nullptr;
3417 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3418 "Broken CFG: missing edge from predecessor to successor");
3419
3420 // At this point, we know that everything in FreeInstrBB can be moved
3421 // before TI.
3422 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3423 if (&Instr == FreeInstrBBTerminator)
3424 break;
3425 Instr.moveBeforePreserving(TI);
3426 }
3427 assert(FreeInstrBB->size() == 1 &&
3428 "Only the branch instruction should remain");
3429
3430 // Now that we've moved the call to free before the NULL check, we have to
3431 // remove any attributes on its parameter that imply it's non-null, because
3432 // those attributes might have only been valid because of the NULL check, and
3433 // we can get miscompiles if we keep them. This is conservative if non-null is
3434 // also implied by something other than the NULL check, but it's guaranteed to
3435 // be correct, and the conservativeness won't matter in practice, since the
3436 // attributes are irrelevant for the call to free itself and the pointer
3437 // shouldn't be used after the call.
3438 AttributeList Attrs = FI.getAttributes();
3439 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3440 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3441 if (Dereferenceable.isValid()) {
3442 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3443 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3444 Attribute::Dereferenceable);
3445 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3446 }
3447 FI.setAttributes(Attrs);
3448
3449 return &FI;
3450}
3451
3453 // free undef -> unreachable.
3454 if (isa<UndefValue>(Op)) {
3455 // Leave a marker since we can't modify the CFG here.
3457 return eraseInstFromFunction(FI);
3458 }
3459
3460 // If we have 'free null' delete the instruction. This can happen in stl code
3461 // when lots of inlining happens.
3462 if (isa<ConstantPointerNull>(Op))
3463 return eraseInstFromFunction(FI);
3464
3465 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3466 // realloc() entirely.
3467 CallInst *CI = dyn_cast<CallInst>(Op);
3468 if (CI && CI->hasOneUse())
3469 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3470 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3471
3472 // If we optimize for code size, try to move the call to free before the null
3473 // test so that simplify cfg can remove the empty block and dead code
3474 // elimination the branch. I.e., helps to turn something like:
3475 // if (foo) free(foo);
3476 // into
3477 // free(foo);
3478 //
3479 // Note that we can only do this for 'free' and not for any flavor of
3480 // 'operator delete'; there is no 'operator delete' symbol for which we are
3481 // permitted to invent a call, even if we're passing in a null pointer.
3482 if (MinimizeSize) {
3483 LibFunc Func;
3484 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3486 return I;
3487 }
3488
3489 return nullptr;
3490}
3491
3493 Value *RetVal = RI.getReturnValue();
3494 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
3495 return nullptr;
3496
3497 Function *F = RI.getFunction();
3498 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3499 if (ReturnClass == fcNone)
3500 return nullptr;
3501
3502 KnownFPClass KnownClass;
3503 Value *Simplified =
3504 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
3505 if (!Simplified)
3506 return nullptr;
3507
3508 return ReturnInst::Create(RI.getContext(), Simplified);
3509}
3510
3511// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3513 // Try to remove the previous instruction if it must lead to unreachable.
3514 // This includes instructions like stores and "llvm.assume" that may not get
3515 // removed by simple dead code elimination.
3516 bool Changed = false;
3517 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3518 // While we theoretically can erase EH, that would result in a block that
3519 // used to start with an EH no longer starting with EH, which is invalid.
3520 // To make it valid, we'd need to fixup predecessors to no longer refer to
3521 // this block, but that changes CFG, which is not allowed in InstCombine.
3522 if (Prev->isEHPad())
3523 break; // Can not drop any more instructions. We're done here.
3524
3526 break; // Can not drop any more instructions. We're done here.
3527 // Otherwise, this instruction can be freely erased,
3528 // even if it is not side-effect free.
3529
3530 // A value may still have uses before we process it here (for example, in
3531 // another unreachable block), so convert those to poison.
3532 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3533 eraseInstFromFunction(*Prev);
3534 Changed = true;
3535 }
3536 return Changed;
3537}
3538
3541 return nullptr;
3542}
3543
3545 assert(BI.isUnconditional() && "Only for unconditional branches.");
3546
3547 // If this store is the second-to-last instruction in the basic block
3548 // (excluding debug info and bitcasts of pointers) and if the block ends with
3549 // an unconditional branch, try to move the store to the successor block.
3550
3551 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3552 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3553 return BBI->isDebugOrPseudoInst() ||
3554 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3555 };
3556
3557 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3558 do {
3559 if (BBI != FirstInstr)
3560 --BBI;
3561 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3562
3563 return dyn_cast<StoreInst>(BBI);
3564 };
3565
3566 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3567 if (mergeStoreIntoSuccessor(*SI))
3568 return &BI;
3569
3570 return nullptr;
3571}
3572
3575 if (!DeadEdges.insert({From, To}).second)
3576 return;
3577
3578 // Replace phi node operands in successor with poison.
3579 for (PHINode &PN : To->phis())
3580 for (Use &U : PN.incoming_values())
3581 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
3582 replaceUse(U, PoisonValue::get(PN.getType()));
3583 addToWorklist(&PN);
3584 MadeIRChange = true;
3585 }
3586
3587 Worklist.push_back(To);
3588}
3589
3590// Under the assumption that I is unreachable, remove it and following
3591// instructions. Changes are reported directly to MadeIRChange.
3594 BasicBlock *BB = I->getParent();
3595 for (Instruction &Inst : make_early_inc_range(
3596 make_range(std::next(BB->getTerminator()->getReverseIterator()),
3597 std::next(I->getReverseIterator())))) {
3598 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3599 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
3600 MadeIRChange = true;
3601 }
3602 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3603 continue;
3604 // RemoveDIs: erase debug-info on this instruction manually.
3605 Inst.dropDbgRecords();
3607 MadeIRChange = true;
3608 }
3609
3610 SmallVector<Value *> Changed;
3611 if (handleUnreachableTerminator(BB->getTerminator(), Changed)) {
3612 MadeIRChange = true;
3613 for (Value *V : Changed)
3614 addToWorklist(cast<Instruction>(V));
3615 }
3616
3617 // Handle potentially dead successors.
3618 for (BasicBlock *Succ : successors(BB))
3619 addDeadEdge(BB, Succ, Worklist);
3620}
3621
3624 while (!Worklist.empty()) {
3625 BasicBlock *BB = Worklist.pop_back_val();
3626 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
3627 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
3628 }))
3629 continue;
3630
3632 }
3633}
3634
3636 BasicBlock *LiveSucc) {
3638 for (BasicBlock *Succ : successors(BB)) {
3639 // The live successor isn't dead.
3640 if (Succ == LiveSucc)
3641 continue;
3642
3643 addDeadEdge(BB, Succ, Worklist);
3644 }
3645
3647}
3648
3650 if (BI.isUnconditional())
3652
3653 // Change br (not X), label True, label False to: br X, label False, True
3654 Value *Cond = BI.getCondition();
3655 Value *X;
3656 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3657 // Swap Destinations and condition...
3658 BI.swapSuccessors();
3659 if (BPI)
3661 return replaceOperand(BI, 0, X);
3662 }
3663
3664 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3665 // This is done by inverting the condition and swapping successors:
3666 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3667 Value *Y;
3668 if (isa<SelectInst>(Cond) &&
3669 match(Cond,
3671 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3672 Value *Or = Builder.CreateLogicalOr(NotX, Y);
3673 BI.swapSuccessors();
3674 if (BPI)
3676 return replaceOperand(BI, 0, Or);
3677 }
3678
3679 // If the condition is irrelevant, remove the use so that other
3680 // transforms on the condition become more effective.
3681 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3682 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3683
3684 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3685 CmpInst::Predicate Pred;
3686 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3687 !isCanonicalPredicate(Pred)) {
3688 // Swap destinations and condition.
3689 auto *Cmp = cast<CmpInst>(Cond);
3690 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3691 BI.swapSuccessors();
3692 if (BPI)
3694 Worklist.push(Cmp);
3695 return &BI;
3696 }
3697
3698 if (isa<UndefValue>(Cond)) {
3699 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
3700 return nullptr;
3701 }
3702 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3704 BI.getSuccessor(!CI->getZExtValue()));
3705 return nullptr;
3706 }
3707
3708 DC.registerBranch(&BI);
3709 return nullptr;
3710}
3711
3712// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3713// we can prove that both (switch C) and (switch X) go to the default when cond
3714// is false/true.
3717 bool IsTrueArm) {
3718 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
3719 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
3720 if (!C)
3721 return nullptr;
3722
3723 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3724 if (CstBB != SI.getDefaultDest())
3725 return nullptr;
3726 Value *X = Select->getOperand(3 - CstOpIdx);
3728 const APInt *RHSC;
3729 if (!match(Select->getCondition(),
3730 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
3731 return nullptr;
3732 if (IsTrueArm)
3733 Pred = ICmpInst::getInversePredicate(Pred);
3734
3735 // See whether we can replace the select with X
3737 for (auto Case : SI.cases())
3738 if (!CR.contains(Case.getCaseValue()->getValue()))
3739 return nullptr;
3740
3741 return X;
3742}
3743
3745 Value *Cond = SI.getCondition();
3746 Value *Op0;
3747 ConstantInt *AddRHS;
3748 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3749 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3750 for (auto Case : SI.cases()) {
3751 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3752 assert(isa<ConstantInt>(NewCase) &&
3753 "Result of expression should be constant");
3754 Case.setValue(cast<ConstantInt>(NewCase));
3755 }
3756 return replaceOperand(SI, 0, Op0);
3757 }
3758
3759 ConstantInt *SubLHS;
3760 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
3761 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3762 for (auto Case : SI.cases()) {
3763 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
3764 assert(isa<ConstantInt>(NewCase) &&
3765 "Result of expression should be constant");
3766 Case.setValue(cast<ConstantInt>(NewCase));
3767 }
3768 return replaceOperand(SI, 0, Op0);
3769 }
3770
3771 uint64_t ShiftAmt;
3772 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
3773 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3774 all_of(SI.cases(), [&](const auto &Case) {
3775 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3776 })) {
3777 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3778 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond);
3779 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3780 Shl->hasOneUse()) {
3781 Value *NewCond = Op0;
3782 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3783 // If the shift may wrap, we need to mask off the shifted bits.
3784 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3785 NewCond = Builder.CreateAnd(
3786 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
3787 }
3788 for (auto Case : SI.cases()) {
3789 const APInt &CaseVal = Case.getCaseValue()->getValue();
3790 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3791 : CaseVal.lshr(ShiftAmt);
3792 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
3793 }
3794 return replaceOperand(SI, 0, NewCond);
3795 }
3796 }
3797
3798 // Fold switch(zext/sext(X)) into switch(X) if possible.
3799 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
3800 bool IsZExt = isa<ZExtInst>(Cond);
3801 Type *SrcTy = Op0->getType();
3802 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3803
3804 if (all_of(SI.cases(), [&](const auto &Case) {
3805 const APInt &CaseVal = Case.getCaseValue()->getValue();
3806 return IsZExt ? CaseVal.isIntN(NewWidth)
3807 : CaseVal.isSignedIntN(NewWidth);
3808 })) {
3809 for (auto &Case : SI.cases()) {
3810 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3811 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3812 }
3813 return replaceOperand(SI, 0, Op0);
3814 }
3815 }
3816
3817 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
3818 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
3819 if (Value *V =
3820 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
3821 return replaceOperand(SI, 0, V);
3822 if (Value *V =
3823 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
3824 return replaceOperand(SI, 0, V);
3825 }
3826
3827 KnownBits Known = computeKnownBits(Cond, 0, &SI);
3828 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3829 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3830
3831 // Compute the number of leading bits we can ignore.
3832 // TODO: A better way to determine this would use ComputeNumSignBits().
3833 for (const auto &C : SI.cases()) {
3834 LeadingKnownZeros =
3835 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
3836 LeadingKnownOnes =
3837 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
3838 }
3839
3840 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3841
3842 // Shrink the condition operand if the new type is smaller than the old type.
3843 // But do not shrink to a non-standard type, because backend can't generate
3844 // good code for that yet.
3845 // TODO: We can make it aggressive again after fixing PR39569.
3846 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3847 shouldChangeType(Known.getBitWidth(), NewWidth)) {
3848 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3850 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3851
3852 for (auto Case : SI.cases()) {
3853 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3854 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3855 }
3856 return replaceOperand(SI, 0, NewCond);
3857 }
3858
3859 if (isa<UndefValue>(Cond)) {
3860 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
3861 return nullptr;
3862 }
3863 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3864 handlePotentiallyDeadSuccessors(SI.getParent(),
3865 SI.findCaseValue(CI)->getCaseSuccessor());
3866 return nullptr;
3867 }
3868
3869 return nullptr;
3870}
3871
3873InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3874 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3875 if (!WO)
3876 return nullptr;
3877
3878 Intrinsic::ID OvID = WO->getIntrinsicID();
3879 const APInt *C = nullptr;
3880 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
3881 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3882 OvID == Intrinsic::umul_with_overflow)) {
3883 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3884 if (C->isAllOnes())
3885 return BinaryOperator::CreateNeg(WO->getLHS());
3886 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3887 if (C->isPowerOf2()) {
3888 return BinaryOperator::CreateShl(
3889 WO->getLHS(),
3890 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3891 }
3892 }
3893 }
3894
3895 // We're extracting from an overflow intrinsic. See if we're the only user.
3896 // That allows us to simplify multiple result intrinsics to simpler things
3897 // that just get one value.
3898 if (!WO->hasOneUse())
3899 return nullptr;
3900
3901 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3902 // and replace it with a traditional binary instruction.
3903 if (*EV.idx_begin() == 0) {
3904 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3905 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3906 // Replace the old instruction's uses with poison.
3907 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3909 return BinaryOperator::Create(BinOp, LHS, RHS);
3910 }
3911
3912 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3913
3914 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3915 if (OvID == Intrinsic::usub_with_overflow)
3916 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3917
3918 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3919 // +1 is not possible because we assume signed values.
3920 if (OvID == Intrinsic::smul_with_overflow &&
3921 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3922 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3923
3924 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3925 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3926 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3927 // Only handle even bitwidths for performance reasons.
3928 if (BitWidth % 2 == 0)
3929 return new ICmpInst(
3930 ICmpInst::ICMP_UGT, WO->getLHS(),
3931 ConstantInt::get(WO->getLHS()->getType(),
3933 }
3934
3935 // If only the overflow result is used, and the right hand side is a
3936 // constant (or constant splat), we can remove the intrinsic by directly
3937 // checking for overflow.
3938 if (C) {
3939 // Compute the no-wrap range for LHS given RHS=C, then construct an
3940 // equivalent icmp, potentially using an offset.
3942 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3943
3944 CmpInst::Predicate Pred;
3945 APInt NewRHSC, Offset;
3946 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3947 auto *OpTy = WO->getRHS()->getType();
3948 auto *NewLHS = WO->getLHS();
3949 if (Offset != 0)
3950 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3951 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3952 ConstantInt::get(OpTy, NewRHSC));
3953 }
3954
3955 return nullptr;
3956}
3957
3959 Value *Agg = EV.getAggregateOperand();
3960
3961 if (!EV.hasIndices())
3962 return replaceInstUsesWith(EV, Agg);
3963
3964 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3965 SQ.getWithInstruction(&EV)))
3966 return replaceInstUsesWith(EV, V);
3967
3968 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3969 // We're extracting from an insertvalue instruction, compare the indices
3970 const unsigned *exti, *exte, *insi, *inse;
3971 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3972 exte = EV.idx_end(), inse = IV->idx_end();
3973 exti != exte && insi != inse;
3974 ++exti, ++insi) {
3975 if (*insi != *exti)
3976 // The insert and extract both reference distinctly different elements.
3977 // This means the extract is not influenced by the insert, and we can
3978 // replace the aggregate operand of the extract with the aggregate
3979 // operand of the insert. i.e., replace
3980 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3981 // %E = extractvalue { i32, { i32 } } %I, 0
3982 // with
3983 // %E = extractvalue { i32, { i32 } } %A, 0
3984 return ExtractValueInst::Create(IV->getAggregateOperand(),
3985 EV.getIndices());
3986 }
3987 if (exti == exte && insi == inse)
3988 // Both iterators are at the end: Index lists are identical. Replace
3989 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3990 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3991 // with "i32 42"
3992 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3993 if (exti == exte) {
3994 // The extract list is a prefix of the insert list. i.e. replace
3995 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3996 // %E = extractvalue { i32, { i32 } } %I, 1
3997 // with
3998 // %X = extractvalue { i32, { i32 } } %A, 1
3999 // %E = insertvalue { i32 } %X, i32 42, 0
4000 // by switching the order of the insert and extract (though the
4001 // insertvalue should be left in, since it may have other uses).
4002 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4003 EV.getIndices());
4004 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4005 ArrayRef(insi, inse));
4006 }
4007 if (insi == inse)
4008 // The insert list is a prefix of the extract list
4009 // We can simply remove the common indices from the extract and make it
4010 // operate on the inserted value instead of the insertvalue result.
4011 // i.e., replace
4012 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4013 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4014 // with
4015 // %E extractvalue { i32 } { i32 42 }, 0
4016 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4017 ArrayRef(exti, exte));
4018 }
4019
4020 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4021 return R;
4022
4023 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4024 // Bail out if the aggregate contains scalable vector type
4025 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4026 STy && STy->containsScalableVectorType())
4027 return nullptr;
4028
4029 // If the (non-volatile) load only has one use, we can rewrite this to a
4030 // load from a GEP. This reduces the size of the load. If a load is used
4031 // only by extractvalue instructions then this either must have been
4032 // optimized before, or it is a struct with padding, in which case we
4033 // don't want to do the transformation as it loses padding knowledge.
4034 if (L->isSimple() && L->hasOneUse()) {
4035 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4036 SmallVector<Value*, 4> Indices;
4037 // Prefix an i32 0 since we need the first element.
4038 Indices.push_back(Builder.getInt32(0));
4039 for (unsigned Idx : EV.indices())
4040 Indices.push_back(Builder.getInt32(Idx));
4041
4042 // We need to insert these at the location of the old load, not at that of
4043 // the extractvalue.
4045 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4046 L->getPointerOperand(), Indices);
4048 // Whatever aliasing information we had for the orignal load must also
4049 // hold for the smaller load, so propagate the annotations.
4050 NL->setAAMetadata(L->getAAMetadata());
4051 // Returning the load directly will cause the main loop to insert it in
4052 // the wrong spot, so use replaceInstUsesWith().
4053 return replaceInstUsesWith(EV, NL);
4054 }
4055 }
4056
4057 if (auto *PN = dyn_cast<PHINode>(Agg))
4058 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4059 return Res;
4060
4061 // Canonicalize extract (select Cond, TV, FV)
4062 // -> select cond, (extract TV), (extract FV)
4063 if (auto *SI = dyn_cast<SelectInst>(Agg))
4064 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4065 return R;
4066
4067 // We could simplify extracts from other values. Note that nested extracts may
4068 // already be simplified implicitly by the above: extract (extract (insert) )
4069 // will be translated into extract ( insert ( extract ) ) first and then just
4070 // the value inserted, if appropriate. Similarly for extracts from single-use
4071 // loads: extract (extract (load)) will be translated to extract (load (gep))
4072 // and if again single-use then via load (gep (gep)) to load (gep).
4073 // However, double extracts from e.g. function arguments or return values
4074 // aren't handled yet.
4075 return nullptr;
4076}
4077
4078/// Return 'true' if the given typeinfo will match anything.
4079static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4080 switch (Personality) {
4084 // The GCC C EH and Rust personality only exists to support cleanups, so
4085 // it's not clear what the semantics of catch clauses are.
4086 return false;
4088 return false;
4090 // While __gnat_all_others_value will match any Ada exception, it doesn't
4091 // match foreign exceptions (or didn't, before gcc-4.7).
4092 return false;
4103 return TypeInfo->isNullValue();
4104 }
4105 llvm_unreachable("invalid enum");
4106}
4107
4108static bool shorter_filter(const Value *LHS, const Value *RHS) {
4109 return
4110 cast<ArrayType>(LHS->getType())->getNumElements()
4111 <
4112 cast<ArrayType>(RHS->getType())->getNumElements();
4113}
4114
4116 // The logic here should be correct for any real-world personality function.
4117 // However if that turns out not to be true, the offending logic can always
4118 // be conditioned on the personality function, like the catch-all logic is.
4119 EHPersonality Personality =
4120 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4121
4122 // Simplify the list of clauses, eg by removing repeated catch clauses
4123 // (these are often created by inlining).
4124 bool MakeNewInstruction = false; // If true, recreate using the following:
4125 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4126 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4127
4128 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4129 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4130 bool isLastClause = i + 1 == e;
4131 if (LI.isCatch(i)) {
4132 // A catch clause.
4133 Constant *CatchClause = LI.getClause(i);
4134 Constant *TypeInfo = CatchClause->stripPointerCasts();
4135
4136 // If we already saw this clause, there is no point in having a second
4137 // copy of it.
4138 if (AlreadyCaught.insert(TypeInfo).second) {
4139 // This catch clause was not already seen.
4140 NewClauses.push_back(CatchClause);
4141 } else {
4142 // Repeated catch clause - drop the redundant copy.
4143 MakeNewInstruction = true;
4144 }
4145
4146 // If this is a catch-all then there is no point in keeping any following
4147 // clauses or marking the landingpad as having a cleanup.
4148 if (isCatchAll(Personality, TypeInfo)) {
4149 if (!isLastClause)
4150 MakeNewInstruction = true;
4151 CleanupFlag = false;
4152 break;
4153 }
4154 } else {
4155 // A filter clause. If any of the filter elements were already caught
4156 // then they can be dropped from the filter. It is tempting to try to
4157 // exploit the filter further by saying that any typeinfo that does not
4158 // occur in the filter can't be caught later (and thus can be dropped).
4159 // However this would be wrong, since typeinfos can match without being
4160 // equal (for example if one represents a C++ class, and the other some
4161 // class derived from it).
4162 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4163 Constant *FilterClause = LI.getClause(i);
4164 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4165 unsigned NumTypeInfos = FilterType->getNumElements();
4166
4167 // An empty filter catches everything, so there is no point in keeping any
4168 // following clauses or marking the landingpad as having a cleanup. By
4169 // dealing with this case here the following code is made a bit simpler.
4170 if (!NumTypeInfos) {
4171 NewClauses.push_back(FilterClause);
4172 if (!isLastClause)
4173 MakeNewInstruction = true;
4174 CleanupFlag = false;
4175 break;
4176 }
4177
4178 bool MakeNewFilter = false; // If true, make a new filter.
4179 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4180 if (isa<ConstantAggregateZero>(FilterClause)) {
4181 // Not an empty filter - it contains at least one null typeinfo.
4182 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4183 Constant *TypeInfo =
4185 // If this typeinfo is a catch-all then the filter can never match.
4186 if (isCatchAll(Personality, TypeInfo)) {
4187 // Throw the filter away.
4188 MakeNewInstruction = true;
4189 continue;
4190 }
4191
4192 // There is no point in having multiple copies of this typeinfo, so
4193 // discard all but the first copy if there is more than one.
4194 NewFilterElts.push_back(TypeInfo);
4195 if (NumTypeInfos > 1)
4196 MakeNewFilter = true;
4197 } else {
4198 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4199 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4200 NewFilterElts.reserve(NumTypeInfos);
4201
4202 // Remove any filter elements that were already caught or that already
4203 // occurred in the filter. While there, see if any of the elements are
4204 // catch-alls. If so, the filter can be discarded.
4205 bool SawCatchAll = false;
4206 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4207 Constant *Elt = Filter->getOperand(j);
4208 Constant *TypeInfo = Elt->stripPointerCasts();
4209 if (isCatchAll(Personality, TypeInfo)) {
4210 // This element is a catch-all. Bail out, noting this fact.
4211 SawCatchAll = true;
4212 break;
4213 }
4214
4215 // Even if we've seen a type in a catch clause, we don't want to
4216 // remove it from the filter. An unexpected type handler may be
4217 // set up for a call site which throws an exception of the same
4218 // type caught. In order for the exception thrown by the unexpected
4219 // handler to propagate correctly, the filter must be correctly
4220 // described for the call site.
4221 //
4222 // Example:
4223 //
4224 // void unexpected() { throw 1;}
4225 // void foo() throw (int) {
4226 // std::set_unexpected(unexpected);
4227 // try {
4228 // throw 2.0;
4229 // } catch (int i) {}
4230 // }
4231
4232 // There is no point in having multiple copies of the same typeinfo in
4233 // a filter, so only add it if we didn't already.
4234 if (SeenInFilter.insert(TypeInfo).second)
4235 NewFilterElts.push_back(cast<Constant>(Elt));
4236 }
4237 // A filter containing a catch-all cannot match anything by definition.
4238 if (SawCatchAll) {
4239 // Throw the filter away.
4240 MakeNewInstruction = true;
4241 continue;
4242 }
4243
4244 // If we dropped something from the filter, make a new one.
4245 if (NewFilterElts.size() < NumTypeInfos)
4246 MakeNewFilter = true;
4247 }
4248 if (MakeNewFilter) {
4249 FilterType = ArrayType::get(FilterType->getElementType(),
4250 NewFilterElts.size());
4251 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4252 MakeNewInstruction = true;
4253 }
4254
4255 NewClauses.push_back(FilterClause);
4256
4257 // If the new filter is empty then it will catch everything so there is
4258 // no point in keeping any following clauses or marking the landingpad
4259 // as having a cleanup. The case of the original filter being empty was
4260 // already handled above.
4261 if (MakeNewFilter && !NewFilterElts.size()) {
4262 assert(MakeNewInstruction && "New filter but not a new instruction!");
4263 CleanupFlag = false;
4264 break;
4265 }
4266 }
4267 }
4268
4269 // If several filters occur in a row then reorder them so that the shortest
4270 // filters come first (those with the smallest number of elements). This is
4271 // advantageous because shorter filters are more likely to match, speeding up
4272 // unwinding, but mostly because it increases the effectiveness of the other
4273 // filter optimizations below.
4274 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4275 unsigned j;
4276 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4277 for (j = i; j != e; ++j)
4278 if (!isa<ArrayType>(NewClauses[j]->getType()))
4279 break;
4280
4281 // Check whether the filters are already sorted by length. We need to know
4282 // if sorting them is actually going to do anything so that we only make a
4283 // new landingpad instruction if it does.
4284 for (unsigned k = i; k + 1 < j; ++k)
4285 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4286 // Not sorted, so sort the filters now. Doing an unstable sort would be
4287 // correct too but reordering filters pointlessly might confuse users.
4288 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4290 MakeNewInstruction = true;
4291 break;
4292 }
4293
4294 // Look for the next batch of filters.
4295 i = j + 1;
4296 }
4297
4298 // If typeinfos matched if and only if equal, then the elements of a filter L
4299 // that occurs later than a filter F could be replaced by the intersection of
4300 // the elements of F and L. In reality two typeinfos can match without being
4301 // equal (for example if one represents a C++ class, and the other some class
4302 // derived from it) so it would be wrong to perform this transform in general.
4303 // However the transform is correct and useful if F is a subset of L. In that
4304 // case L can be replaced by F, and thus removed altogether since repeating a
4305 // filter is pointless. So here we look at all pairs of filters F and L where
4306 // L follows F in the list of clauses, and remove L if every element of F is
4307 // an element of L. This can occur when inlining C++ functions with exception
4308 // specifications.
4309 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4310 // Examine each filter in turn.
4311 Value *Filter = NewClauses[i];
4312 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4313 if (!FTy)
4314 // Not a filter - skip it.
4315 continue;
4316 unsigned FElts = FTy->getNumElements();
4317 // Examine each filter following this one. Doing this backwards means that
4318 // we don't have to worry about filters disappearing under us when removed.
4319 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4320 Value *LFilter = NewClauses[j];
4321 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4322 if (!LTy)
4323 // Not a filter - skip it.
4324 continue;
4325 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4326 // an element of LFilter, then discard LFilter.
4327 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4328 // If Filter is empty then it is a subset of LFilter.
4329 if (!FElts) {
4330 // Discard LFilter.
4331 NewClauses.erase(J);
4332 MakeNewInstruction = true;
4333 // Move on to the next filter.
4334 continue;
4335 }
4336 unsigned LElts = LTy->getNumElements();
4337 // If Filter is longer than LFilter then it cannot be a subset of it.
4338 if (FElts > LElts)
4339 // Move on to the next filter.
4340 continue;
4341 // At this point we know that LFilter has at least one element.
4342 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4343 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4344 // already know that Filter is not longer than LFilter).
4345 if (isa<ConstantAggregateZero>(Filter)) {
4346 assert(FElts <= LElts && "Should have handled this case earlier!");
4347 // Discard LFilter.
4348 NewClauses.erase(J);
4349 MakeNewInstruction = true;
4350 }
4351 // Move on to the next filter.
4352 continue;
4353 }
4354 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4355 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4356 // Since Filter is non-empty and contains only zeros, it is a subset of
4357 // LFilter iff LFilter contains a zero.
4358 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4359 for (unsigned l = 0; l != LElts; ++l)
4360 if (LArray->getOperand(l)->isNullValue()) {
4361 // LFilter contains a zero - discard it.
4362 NewClauses.erase(J);
4363 MakeNewInstruction = true;
4364 break;
4365 }
4366 // Move on to the next filter.
4367 continue;
4368 }
4369 // At this point we know that both filters are ConstantArrays. Loop over
4370 // operands to see whether every element of Filter is also an element of
4371 // LFilter. Since filters tend to be short this is probably faster than
4372 // using a method that scales nicely.
4373 ConstantArray *FArray = cast<ConstantArray>(Filter);
4374 bool AllFound = true;
4375 for (unsigned f = 0; f != FElts; ++f) {
4376 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4377 AllFound = false;
4378 for (unsigned l = 0; l != LElts; ++l) {
4379 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4380 if (LTypeInfo == FTypeInfo) {
4381 AllFound = true;
4382 break;
4383 }
4384 }
4385 if (!AllFound)
4386 break;
4387 }
4388 if (AllFound) {
4389 // Discard LFilter.
4390 NewClauses.erase(J);
4391 MakeNewInstruction = true;
4392 }
4393 // Move on to the next filter.
4394 }
4395 }
4396
4397 // If we changed any of the clauses, replace the old landingpad instruction
4398 // with a new one.
4399 if (MakeNewInstruction) {
4400 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
4401 NewClauses.size());
4402 for (Constant *C : NewClauses)
4403 NLI->addClause(C);
4404 // A landing pad with no clauses must have the cleanup flag set. It is
4405 // theoretically possible, though highly unlikely, that we eliminated all
4406 // clauses. If so, force the cleanup flag to true.
4407 if (NewClauses.empty())
4408 CleanupFlag = true;
4409 NLI->setCleanup(CleanupFlag);
4410 return NLI;
4411 }
4412
4413 // Even if none of the clauses changed, we may nonetheless have understood
4414 // that the cleanup flag is pointless. Clear it if so.
4415 if (LI.isCleanup() != CleanupFlag) {
4416 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4417 LI.setCleanup(CleanupFlag);
4418 return &LI;
4419 }
4420
4421 return nullptr;
4422}
4423
4424Value *
4426 // Try to push freeze through instructions that propagate but don't produce
4427 // poison as far as possible. If an operand of freeze follows three
4428 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4429 // guaranteed-non-poison operands then push the freeze through to the one
4430 // operand that is not guaranteed non-poison. The actual transform is as
4431 // follows.
4432 // Op1 = ... ; Op1 can be posion
4433 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4434 // ; single guaranteed-non-poison operands
4435 // ... = Freeze(Op0)
4436 // =>
4437 // Op1 = ...
4438 // Op1.fr = Freeze(Op1)
4439 // ... = Inst(Op1.fr, NonPoisonOps...)
4440 auto *OrigOp = OrigFI.getOperand(0);
4441 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
4442
4443 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4444 // potentially reduces their optimization potential, so let's only do this iff
4445 // the OrigOp is only used by the freeze.
4446 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
4447 return nullptr;
4448
4449 // We can't push the freeze through an instruction which can itself create
4450 // poison. If the only source of new poison is flags, we can simply
4451 // strip them (since we know the only use is the freeze and nothing can
4452 // benefit from them.)
4453 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
4454 /*ConsiderFlagsAndMetadata*/ false))
4455 return nullptr;
4456
4457 // If operand is guaranteed not to be poison, there is no need to add freeze
4458 // to the operand. So we first find the operand that is not guaranteed to be
4459 // poison.
4460 Use *MaybePoisonOperand = nullptr;
4461 for (Use &U : OrigOpInst->operands()) {
4462 if (isa<MetadataAsValue>(U.get()) ||
4464 continue;
4465 if (!MaybePoisonOperand)
4466 MaybePoisonOperand = &U;
4467 else
4468 return nullptr;
4469 }
4470
4471 OrigOpInst->dropPoisonGeneratingAnnotations();
4472
4473 // If all operands are guaranteed to be non-poison, we can drop freeze.
4474 if (!MaybePoisonOperand)
4475 return OrigOp;
4476
4477 Builder.SetInsertPoint(OrigOpInst);
4478 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4479 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
4480
4481 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
4482 return OrigOp;
4483}
4484
4486 PHINode *PN) {
4487 // Detect whether this is a recurrence with a start value and some number of
4488 // backedge values. We'll check whether we can push the freeze through the
4489 // backedge values (possibly dropping poison flags along the way) until we
4490 // reach the phi again. In that case, we can move the freeze to the start
4491 // value.
4492 Use *StartU = nullptr;
4494 for (Use &U : PN->incoming_values()) {
4495 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
4496 // Add backedge value to worklist.
4497 Worklist.push_back(U.get());
4498 continue;
4499 }
4500
4501 // Don't bother handling multiple start values.
4502 if (StartU)
4503 return nullptr;
4504 StartU = &U;
4505 }
4506
4507 if (!StartU || Worklist.empty())
4508 return nullptr; // Not a recurrence.
4509
4510 Value *StartV = StartU->get();
4511 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
4512 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
4513 // We can't insert freeze if the start value is the result of the
4514 // terminator (e.g. an invoke).
4515 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4516 return nullptr;
4517
4520 while (!Worklist.empty()) {
4521 Value *V = Worklist.pop_back_val();
4522 if (!Visited.insert(V).second)
4523 continue;
4524
4525 if (Visited.size() > 32)
4526 return nullptr; // Limit the total number of values we inspect.
4527
4528 // Assume that PN is non-poison, because it will be after the transform.
4529 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4530 continue;
4531
4532 Instruction *I = dyn_cast<Instruction>(V);
4533 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
4534 /*ConsiderFlagsAndMetadata*/ false))
4535 return nullptr;
4536
4537 DropFlags.push_back(I);
4538 append_range(Worklist, I->operands());
4539 }
4540
4541 for (Instruction *I : DropFlags)
4542 I->dropPoisonGeneratingAnnotations();
4543
4544 if (StartNeedsFreeze) {
4546 Value *FrozenStartV = Builder.CreateFreeze(StartV,
4547 StartV->getName() + ".fr");
4548 replaceUse(*StartU, FrozenStartV);
4549 }
4550 return replaceInstUsesWith(FI, PN);
4551}
4552
4554 Value *Op = FI.getOperand(0);
4555
4556 if (isa<Constant>(Op) || Op->hasOneUse())
4557 return false;
4558
4559 // Move the freeze directly after the definition of its operand, so that
4560 // it dominates the maximum number of uses. Note that it may not dominate
4561 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4562 // the normal/default destination. This is why the domination check in the
4563 // replacement below is still necessary.
4564 BasicBlock::iterator MoveBefore;
4565 if (isa<Argument>(Op)) {
4566 MoveBefore =
4568 } else {
4569 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
4570 if (!MoveBeforeOpt)
4571 return false;
4572 MoveBefore = *MoveBeforeOpt;
4573 }
4574
4575 // Don't move to the position of a debug intrinsic.
4576 if (isa<DbgInfoIntrinsic>(MoveBefore))
4577 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4578 // Re-point iterator to come after any debug-info records, if we're
4579 // running in "RemoveDIs" mode
4580 MoveBefore.setHeadBit(false);
4581
4582 bool Changed = false;
4583 if (&FI != &*MoveBefore) {
4584 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
4585 Changed = true;
4586 }
4587
4588 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
4589 bool Dominates = DT.dominates(&FI, U);
4590 Changed |= Dominates;
4591 return Dominates;
4592 });
4593
4594 return Changed;
4595}
4596
4597// Check if any direct or bitcast user of this value is a shuffle instruction.
4599 for (auto *U : V->users()) {
4600 if (isa<ShuffleVectorInst>(U))
4601 return true;
4602 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
4603 return true;
4604 }
4605 return false;
4606}
4607
4609 Value *Op0 = I.getOperand(0);
4610
4612 return replaceInstUsesWith(I, V);
4613
4614 // freeze (phi const, x) --> phi const, (freeze x)
4615 if (auto *PN = dyn_cast<PHINode>(Op0)) {
4616 if (Instruction *NV = foldOpIntoPhi(I, PN))
4617 return NV;
4618 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
4619 return NV;
4620 }
4621
4623 return replaceInstUsesWith(I, NI);
4624
4625 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4626 // - or: pick -1
4627 // - select's condition: if the true value is constant, choose it by making
4628 // the condition true.
4629 // - default: pick 0
4630 //
4631 // Note that this transform is intentionally done here rather than
4632 // via an analysis in InstSimplify or at individual user sites. That is
4633 // because we must produce the same value for all uses of the freeze -
4634 // it's the reason "freeze" exists!
4635 //
4636 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4637 // duplicating logic for binops at least.
4638 auto getUndefReplacement = [&I](Type *Ty) {
4639 Constant *BestValue = nullptr;
4640 Constant *NullValue = Constant::getNullValue(Ty);
4641 for (const auto *U : I.users()) {
4642 Constant *C = NullValue;
4643 if (match(U, m_Or(m_Value(), m_Value())))
4645 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4646 C = ConstantInt::getTrue(Ty);
4647
4648 if (!BestValue)
4649 BestValue = C;
4650 else if (BestValue != C)
4651 BestValue = NullValue;
4652 }
4653 assert(BestValue && "Must have at least one use");
4654 return BestValue;
4655 };
4656
4657 if (match(Op0, m_Undef())) {
4658 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4659 // a shuffle. This may improve codegen for shuffles that allow
4660 // unspecified inputs.
4662 return nullptr;
4663 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4664 }
4665
4666 Constant *C;
4667 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4668 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4670 }
4671
4672 // Replace uses of Op with freeze(Op).
4673 if (freezeOtherUses(I))
4674 return &I;
4675
4676 return nullptr;
4677}
4678
4679/// Check for case where the call writes to an otherwise dead alloca. This
4680/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4681/// helper *only* analyzes the write; doesn't check any other legality aspect.
4683 auto *CB = dyn_cast<CallBase>(I);
4684 if (!CB)
4685 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4686 // to allow reload along used path as described below. Otherwise, this
4687 // is simply a store to a dead allocation which will be removed.
4688 return false;
4689 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4690 if (!Dest)
4691 return false;
4692 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4693 if (!AI)
4694 // TODO: allow malloc?
4695 return false;
4696 // TODO: allow memory access dominated by move point? Note that since AI
4697 // could have a reference to itself captured by the call, we would need to
4698 // account for cycles in doing so.
4699 SmallVector<const User *> AllocaUsers;
4701 auto pushUsers = [&](const Instruction &I) {
4702 for (const User *U : I.users()) {
4703 if (Visited.insert(U).second)
4704 AllocaUsers.push_back(U);
4705 }
4706 };
4707 pushUsers(*AI);
4708 while (!AllocaUsers.empty()) {
4709 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4710 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4711 isa<AddrSpaceCastInst>(UserI)) {
4712 pushUsers(*UserI);
4713 continue;
4714 }
4715 if (UserI == CB)
4716 continue;
4717 // TODO: support lifetime.start/end here
4718 return false;
4719 }
4720 return true;
4721}
4722
4723/// Try to move the specified instruction from its current block into the
4724/// beginning of DestBlock, which can only happen if it's safe to move the
4725/// instruction past all of the instructions between it and the end of its
4726/// block.
4728 BasicBlock *DestBlock) {
4729 BasicBlock *SrcBlock = I->getParent();
4730
4731 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4732 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4733 I->isTerminator())
4734 return false;
4735
4736 // Do not sink static or dynamic alloca instructions. Static allocas must
4737 // remain in the entry block, and dynamic allocas must not be sunk in between
4738 // a stacksave / stackrestore pair, which would incorrectly shorten its
4739 // lifetime.
4740 if (isa<AllocaInst>(I))
4741 return false;
4742
4743 // Do not sink into catchswitch blocks.
4744 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4745 return false;
4746
4747 // Do not sink convergent call instructions.
4748 if (auto *CI = dyn_cast<CallInst>(I)) {
4749 if (CI->isConvergent())
4750 return false;
4751 }
4752
4753 // Unless we can prove that the memory write isn't visibile except on the
4754 // path we're sinking to, we must bail.
4755 if (I->mayWriteToMemory()) {
4756 if (!SoleWriteToDeadLocal(I, TLI))
4757 return false;
4758 }
4759
4760 // We can only sink load instructions if there is nothing between the load and
4761 // the end of block that could change the value.
4762 if (I->mayReadFromMemory()) {
4763 // We don't want to do any sophisticated alias analysis, so we only check
4764 // the instructions after I in I's parent block if we try to sink to its
4765 // successor block.
4766 if (DestBlock->getUniquePredecessor() != I->getParent())
4767 return false;
4768 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4769 E = I->getParent()->end();
4770 Scan != E; ++Scan)
4771 if (Scan->mayWriteToMemory())
4772 return false;
4773 }
4774
4775 I->dropDroppableUses([&](const Use *U) {
4776 auto *I = dyn_cast<Instruction>(U->getUser());
4777 if (I && I->getParent() != DestBlock) {
4778 Worklist.add(I);
4779 return true;
4780 }
4781 return false;
4782 });
4783 /// FIXME: We could remove droppable uses that are not dominated by
4784 /// the new position.
4785
4786 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4787 I->moveBefore(*DestBlock, InsertPos);
4788 ++NumSunkInst;
4789
4790 // Also sink all related debug uses from the source basic block. Otherwise we
4791 // get debug use before the def. Attempt to salvage debug uses first, to
4792 // maximise the range variables have location for. If we cannot salvage, then
4793 // mark the location undef: we know it was supposed to receive a new location
4794 // here, but that computation has been sunk.
4796 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
4797 findDbgUsers(DbgUsers, I, &DbgVariableRecords);
4798 if (!DbgUsers.empty())
4799 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4800 if (!DbgVariableRecords.empty())
4801 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4802 DbgVariableRecords);
4803
4804 // PS: there are numerous flaws with this behaviour, not least that right now
4805 // assignments can be re-ordered past other assignments to the same variable
4806 // if they use different Values. Creating more undef assignements can never be
4807 // undone. And salvaging all users outside of this block can un-necessarily
4808 // alter the lifetime of the live-value that the variable refers to.
4809 // Some of these things can be resolved by tolerating debug use-before-defs in
4810 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4811 // being used for more architectures.
4812
4813 return true;
4814}
4815
4817 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4819 // For all debug values in the destination block, the sunk instruction
4820 // will still be available, so they do not need to be dropped.
4822 for (auto &DbgUser : DbgUsers)
4823 if (DbgUser->getParent() != DestBlock)
4824 DbgUsersToSalvage.push_back(DbgUser);
4825
4826 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4827 // to clone the last appearing debug intrinsic for each given variable.
4829 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4830 if (DVI->getParent() == SrcBlock)
4831 DbgUsersToSink.push_back(DVI);
4832 llvm::sort(DbgUsersToSink,
4833 [](auto *A, auto *B) { return B->comesBefore(A); });
4834
4836 SmallSet<DebugVariable, 4> SunkVariables;
4837 for (auto *User : DbgUsersToSink) {
4838 // A dbg.declare instruction should not be cloned, since there can only be
4839 // one per variable fragment. It should be left in the original place
4840 // because the sunk instruction is not an alloca (otherwise we could not be
4841 // here).
4842 if (isa<DbgDeclareInst>(User))
4843 continue;
4844
4845 DebugVariable DbgUserVariable =
4846 DebugVariable(User->getVariable(), User->getExpression(),
4847 User->getDebugLoc()->getInlinedAt());
4848
4849 if (!SunkVariables.insert(DbgUserVariable).second)
4850 continue;
4851
4852 // Leave dbg.assign intrinsics in their original positions and there should
4853 // be no need to insert a clone.
4854 if (isa<DbgAssignIntrinsic>(User))
4855 continue;
4856
4857 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4858 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4859 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4860 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4861 }
4862
4863 // Perform salvaging without the clones, then sink the clones.
4864 if (!DIIClones.empty()) {
4865 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
4866 // The clones are in reverse order of original appearance, reverse again to
4867 // maintain the original order.
4868 for (auto &DIIClone : llvm::reverse(DIIClones)) {
4869 DIIClone->insertBefore(&*InsertPos);
4870 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4871 }
4872 }
4873}
4874
4876 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4877 BasicBlock *DestBlock,
4878 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4879 // Implementation of tryToSinkInstructionDbgValues, but for the
4880 // DbgVariableRecord of variable assignments rather than dbg.values.
4881
4882 // Fetch all DbgVariableRecords not already in the destination.
4883 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
4884 for (auto &DVR : DbgVariableRecords)
4885 if (DVR->getParent() != DestBlock)
4886 DbgVariableRecordsToSalvage.push_back(DVR);
4887
4888 // Fetch a second collection, of DbgVariableRecords in the source block that
4889 // we're going to sink.
4890 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4891 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4892 if (DVR->getParent() == SrcBlock)
4893 DbgVariableRecordsToSink.push_back(DVR);
4894
4895 // Sort DbgVariableRecords according to their position in the block. This is a
4896 // partial order: DbgVariableRecords attached to different instructions will
4897 // be ordered by the instruction order, but DbgVariableRecords attached to the
4898 // same instruction won't have an order.
4899 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
4900 return B->getInstruction()->comesBefore(A->getInstruction());
4901 };
4902 llvm::stable_sort(DbgVariableRecordsToSink, Order);
4903
4904 // If there are two assignments to the same variable attached to the same
4905 // instruction, the ordering between the two assignments is important. Scan
4906 // for this (rare) case and establish which is the last assignment.
4907 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4909 if (DbgVariableRecordsToSink.size() > 1) {
4911 // Count how many assignments to each variable there is per instruction.
4912 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4913 DebugVariable DbgUserVariable =
4914 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4915 DVR->getDebugLoc()->getInlinedAt());
4916 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
4917 }
4918
4919 // If there are any instructions with two assignments, add them to the
4920 // FilterOutMap to record that they need extra filtering.
4922 for (auto It : CountMap) {
4923 if (It.second > 1) {
4924 FilterOutMap[It.first] = nullptr;
4925 DupSet.insert(It.first.first);
4926 }
4927 }
4928
4929 // For all instruction/variable pairs needing extra filtering, find the
4930 // latest assignment.
4931 for (const Instruction *Inst : DupSet) {
4932 for (DbgVariableRecord &DVR :
4933 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
4934 DebugVariable DbgUserVariable =
4935 DebugVariable(DVR.getVariable(), DVR.getExpression(),
4936 DVR.getDebugLoc()->getInlinedAt());
4937 auto FilterIt =
4938 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
4939 if (FilterIt == FilterOutMap.end())
4940 continue;
4941 if (FilterIt->second != nullptr)
4942 continue;
4943 FilterIt->second = &DVR;
4944 }
4945 }
4946 }
4947
4948 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4949 // out any duplicate assignments identified above.
4951 SmallSet<DebugVariable, 4> SunkVariables;
4952 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4954 continue;
4955
4956 DebugVariable DbgUserVariable =
4957 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4958 DVR->getDebugLoc()->getInlinedAt());
4959
4960 // For any variable where there were multiple assignments in the same place,
4961 // ignore all but the last assignment.
4962 if (!FilterOutMap.empty()) {
4963 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
4964 auto It = FilterOutMap.find(IVP);
4965
4966 // Filter out.
4967 if (It != FilterOutMap.end() && It->second != DVR)
4968 continue;
4969 }
4970
4971 if (!SunkVariables.insert(DbgUserVariable).second)
4972 continue;
4973
4974 if (DVR->isDbgAssign())
4975 continue;
4976
4977 DVRClones.emplace_back(DVR->clone());
4978 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
4979 }
4980
4981 // Perform salvaging without the clones, then sink the clones.
4982 if (DVRClones.empty())
4983 return;
4984
4985 salvageDebugInfoForDbgValues(*I, {}, DbgVariableRecordsToSalvage);
4986
4987 // The clones are in reverse order of original appearance. Assert that the
4988 // head bit is set on the iterator as we _should_ have received it via
4989 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4990 // we'll repeatedly insert at the head, such as:
4991 // DVR-3 (third insertion goes here)
4992 // DVR-2 (second insertion goes here)
4993 // DVR-1 (first insertion goes here)
4994 // Any-Prior-DVRs
4995 // InsertPtInst
4996 assert(InsertPos.getHeadBit());
4997 for (DbgVariableRecord *DVRClone : DVRClones) {
4998 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
4999 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5000 }
5001}
5002
5004 while (!Worklist.isEmpty()) {
5005 // Walk deferred instructions in reverse order, and push them to the
5006 // worklist, which means they'll end up popped from the worklist in-order.
5007 while (Instruction *I = Worklist.popDeferred()) {
5008 // Check to see if we can DCE the instruction. We do this already here to
5009 // reduce the number of uses and thus allow other folds to trigger.
5010 // Note that eraseInstFromFunction() may push additional instructions on
5011 // the deferred worklist, so this will DCE whole instruction chains.
5014 ++NumDeadInst;
5015 continue;
5016 }
5017
5018 Worklist.push(I);
5019 }
5020
5022 if (I == nullptr) continue; // skip null values.
5023
5024 // Check to see if we can DCE the instruction.
5027 ++NumDeadInst;
5028 continue;
5029 }
5030
5031 if (!DebugCounter::shouldExecute(VisitCounter))
5032 continue;
5033
5034 // See if we can trivially sink this instruction to its user if we can
5035 // prove that the successor is not executed more frequently than our block.
5036 // Return the UserBlock if successful.
5037 auto getOptionalSinkBlockForInst =
5038 [this](Instruction *I) -> std::optional<BasicBlock *> {
5039 if (!EnableCodeSinking)
5040 return std::nullopt;
5041
5042 BasicBlock *BB = I->getParent();
5043 BasicBlock *UserParent = nullptr;
5044 unsigned NumUsers = 0;
5045
5046 for (Use &U : I->uses()) {
5047 User *User = U.getUser();
5048 if (User->isDroppable())
5049 continue;
5050 if (NumUsers > MaxSinkNumUsers)
5051 return std::nullopt;
5052
5053 Instruction *UserInst = cast<Instruction>(User);
5054 // Special handling for Phi nodes - get the block the use occurs in.
5055 BasicBlock *UserBB = UserInst->getParent();
5056 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5057 UserBB = PN->getIncomingBlock(U);
5058 // Bail out if we have uses in different blocks. We don't do any
5059 // sophisticated analysis (i.e finding NearestCommonDominator of these
5060 // use blocks).
5061 if (UserParent && UserParent != UserBB)
5062 return std::nullopt;
5063 UserParent = UserBB;
5064
5065 // Make sure these checks are done only once, naturally we do the checks
5066 // the first time we get the userparent, this will save compile time.
5067 if (NumUsers == 0) {
5068 // Try sinking to another block. If that block is unreachable, then do
5069 // not bother. SimplifyCFG should handle it.
5070 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5071 return std::nullopt;
5072
5073 auto *Term = UserParent->getTerminator();
5074 // See if the user is one of our successors that has only one
5075 // predecessor, so that we don't have to split the critical edge.
5076 // Another option where we can sink is a block that ends with a
5077 // terminator that does not pass control to other block (such as
5078 // return or unreachable or resume). In this case:
5079 // - I dominates the User (by SSA form);
5080 // - the User will be executed at most once.
5081 // So sinking I down to User is always profitable or neutral.
5082 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5083 return std::nullopt;
5084
5085 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5086 }
5087
5088 NumUsers++;
5089 }
5090
5091 // No user or only has droppable users.
5092 if (!UserParent)
5093 return std::nullopt;
5094
5095 return UserParent;
5096 };
5097
5098 auto OptBB = getOptionalSinkBlockForInst(I);
5099 if (OptBB) {
5100 auto *UserParent = *OptBB;
5101 // Okay, the CFG is simple enough, try to sink this instruction.
5102 if (tryToSinkInstruction(I, UserParent)) {
5103 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5104 MadeIRChange = true;
5105 // We'll add uses of the sunk instruction below, but since
5106 // sinking can expose opportunities for it's *operands* add
5107 // them to the worklist
5108 for (Use &U : I->operands())
5109 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5110 Worklist.push(OpI);
5111 }
5112 }
5113
5114 // Now that we have an instruction, try combining it to simplify it.
5117 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5118
5119#ifndef NDEBUG
5120 std::string OrigI;
5121#endif
5122 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5123 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5124
5125 if (Instruction *Result = visit(*I)) {
5126 ++NumCombined;
5127 // Should we replace the old instruction with a new one?
5128 if (Result != I) {
5129 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5130 << " New = " << *Result << '\n');
5131
5132 Result->copyMetadata(*I,
5133 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5134 // Everything uses the new instruction now.
5135 I->replaceAllUsesWith(Result);
5136
5137 // Move the name to the new instruction first.
5138 Result->takeName(I);
5139
5140 // Insert the new instruction into the basic block...
5141 BasicBlock *InstParent = I->getParent();
5142 BasicBlock::iterator InsertPos = I->getIterator();
5143
5144 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5145 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5146 // We need to fix up the insertion point.
5147 if (isa<PHINode>(I)) // PHI -> Non-PHI
5148 InsertPos = InstParent->getFirstInsertionPt();
5149 else // Non-PHI -> PHI
5150 InsertPos = InstParent->getFirstNonPHIIt();
5151 }
5152
5153 Result->insertInto(InstParent, InsertPos);
5154
5155 // Push the new instruction and any users onto the worklist.
5157 Worklist.push(Result);
5158
5160 } else {
5161 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5162 << " New = " << *I << '\n');
5163
5164 // If the instruction was modified, it's possible that it is now dead.
5165 // if so, remove it.
5168 } else {
5170 Worklist.push(I);
5171 }
5172 }
5173 MadeIRChange = true;
5174 }
5175 }
5176
5177 Worklist.zap();
5178 return MadeIRChange;
5179}
5180
5181// Track the scopes used by !alias.scope and !noalias. In a function, a
5182// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5183// by both sets. If not, the declaration of the scope can be safely omitted.
5184// The MDNode of the scope can be omitted as well for the instructions that are
5185// part of this function. We do not do that at this point, as this might become
5186// too time consuming to do.
5188 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5189 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5190
5191public:
5193 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5194 if (!I->hasMetadataOtherThanDebugLoc())
5195 return;
5196
5197 auto Track = [](Metadata *ScopeList, auto &Container) {
5198 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5199 if (!MDScopeList || !Container.insert(MDScopeList).second)
5200 return;
5201 for (const auto &MDOperand : MDScopeList->operands())
5202 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5203 Container.insert(MDScope);
5204 };
5205
5206 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5207 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5208 }
5209
5211 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
5212 if (!Decl)
5213 return false;
5214
5215 assert(Decl->use_empty() &&
5216 "llvm.experimental.noalias.scope.decl in use ?");
5217 const MDNode *MDSL = Decl->getScopeList();
5218 assert(MDSL->getNumOperands() == 1 &&
5219 "llvm.experimental.noalias.scope should refer to a single scope");
5220 auto &MDOperand = MDSL->getOperand(0);
5221 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5222 return !UsedAliasScopesAndLists.contains(MD) ||
5223 !UsedNoAliasScopesAndLists.contains(MD);
5224
5225 // Not an MDNode ? throw away.
5226 return true;
5227 }
5228};
5229
5230/// Populate the IC worklist from a function, by walking it in reverse
5231/// post-order and adding all reachable code to the worklist.
5232///
5233/// This has a couple of tricks to make the code faster and more powerful. In
5234/// particular, we constant fold and DCE instructions as we go, to avoid adding
5235/// them to the worklist (this significantly speeds up instcombine on code where
5236/// many instructions are dead or constant). Additionally, if we find a branch
5237/// whose condition is a known constant, we only visit the reachable successors.
5240 bool MadeIRChange = false;
5242 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5243 DenseMap<Constant *, Constant *> FoldedConstants;
5244 AliasScopeTracker SeenAliasScopes;
5245
5246 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5247 for (BasicBlock *Succ : successors(BB))
5248 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5249 for (PHINode &PN : Succ->phis())
5250 for (Use &U : PN.incoming_values())
5251 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5252 U.set(PoisonValue::get(PN.getType()));
5253 MadeIRChange = true;
5254 }
5255 };
5256
5257 for (BasicBlock *BB : RPOT) {
5258 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5259 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5260 })) {
5261 HandleOnlyLiveSuccessor(BB, nullptr);
5262 continue;
5263 }
5264 LiveBlocks.insert(BB);
5265
5266 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5267 // ConstantProp instruction if trivially constant.
5268 if (!Inst.use_empty() &&
5269 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5270 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5271 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5272 << '\n');
5273 Inst.replaceAllUsesWith(C);
5274 ++NumConstProp;
5275 if (isInstructionTriviallyDead(&Inst, &TLI))
5276 Inst.eraseFromParent();
5277 MadeIRChange = true;
5278 continue;
5279 }
5280
5281 // See if we can constant fold its operands.
5282 for (Use &U : Inst.operands()) {
5283 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
5284 continue;
5285
5286 auto *C = cast<Constant>(U);
5287 Constant *&FoldRes = FoldedConstants[C];
5288 if (!FoldRes)
5289 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5290
5291 if (FoldRes != C) {
5292 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5293 << "\n Old = " << *C
5294 << "\n New = " << *FoldRes << '\n');
5295 U = FoldRes;
5296 MadeIRChange = true;
5297 }
5298 }
5299
5300 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5301 // these call instructions consumes non-trivial amount of time and
5302 // provides no value for the optimization.
5303 if (!Inst.isDebugOrPseudoInst()) {
5304 InstrsForInstructionWorklist.push_back(&Inst);
5305 SeenAliasScopes.analyse(&Inst);
5306 }
5307 }
5308
5309 // If this is a branch or switch on a constant, mark only the single
5310 // live successor. Otherwise assume all successors are live.
5311 Instruction *TI = BB->getTerminator();
5312 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5313 if (isa<UndefValue>(BI->getCondition())) {
5314 // Branch on undef is UB.
5315 HandleOnlyLiveSuccessor(BB, nullptr);
5316 continue;
5317 }
5318 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5319 bool CondVal = Cond->getZExtValue();
5320 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5321 continue;
5322 }
5323 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5324 if (isa<UndefValue>(SI->getCondition())) {
5325 // Switch on undef is UB.
5326 HandleOnlyLiveSuccessor(BB, nullptr);
5327 continue;
5328 }
5329 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5330 HandleOnlyLiveSuccessor(BB,
5331 SI->findCaseValue(Cond)->getCaseSuccessor());
5332 continue;
5333 }
5334 }
5335 }
5336
5337 // Remove instructions inside unreachable blocks. This prevents the
5338 // instcombine code from having to deal with some bad special cases, and
5339 // reduces use counts of instructions.
5340 for (BasicBlock &BB : F) {
5341 if (LiveBlocks.count(&BB))
5342 continue;
5343
5344 unsigned NumDeadInstInBB;
5345 unsigned NumDeadDbgInstInBB;
5346 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
5348
5349 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5350 NumDeadInst += NumDeadInstInBB;
5351 }
5352
5353 // Once we've found all of the instructions to add to instcombine's worklist,
5354 // add them in reverse order. This way instcombine will visit from the top
5355 // of the function down. This jives well with the way that it adds all uses
5356 // of instructions to the worklist after doing a transformation, thus avoiding
5357 // some N^2 behavior in pathological cases.
5358 Worklist.reserve(InstrsForInstructionWorklist.size());
5359 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5360 // DCE instruction if trivially dead. As we iterate in reverse program
5361 // order here, we will clean up whole chains of dead instructions.
5362 if (isInstructionTriviallyDead(Inst, &TLI) ||
5363 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5364 ++NumDeadInst;
5365 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5366 salvageDebugInfo(*Inst);
5367 Inst->eraseFromParent();
5368 MadeIRChange = true;
5369 continue;
5370 }
5371
5372 Worklist.push(Inst);
5373 }
5374
5375 return MadeIRChange;
5376}
5377
5383 const InstCombineOptions &Opts) {
5384 auto &DL = F.getDataLayout();
5385
5386 /// Builder - This is an IRBuilder that automatically inserts new
5387 /// instructions into the worklist when they are created.
5389 F.getContext(), TargetFolder(DL),
5390 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5391 Worklist.add(I);
5392 if (auto *Assume = dyn_cast<AssumeInst>(I))
5393 AC.registerAssumption(Assume);
5394 }));
5395
5397
5398 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5399 // by instcombiner.
5400 bool MadeIRChange = false;
5402 MadeIRChange = LowerDbgDeclare(F);
5403
5404 // Iterate while there is work to do.
5405 unsigned Iteration = 0;
5406 while (true) {
5407 ++Iteration;
5408
5409 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5410 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5411 << " on " << F.getName()
5412 << " reached; stopping without verifying fixpoint\n");
5413 break;
5414 }
5415
5416 ++NumWorklistIterations;
5417 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5418 << F.getName() << "\n");
5419
5420 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5421 ORE, BFI, BPI, PSI, DL, LI);
5423 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5424 MadeChangeInThisIteration |= IC.run();
5425 if (!MadeChangeInThisIteration)
5426 break;
5427
5428 MadeIRChange = true;
5429 if (Iteration > Opts.MaxIterations) {
5431 "Instruction Combining did not reach a fixpoint after " +
5432 Twine(Opts.MaxIterations) + " iterations",
5433 /*GenCrashDiag=*/false);
5434 }
5435 }
5436
5437 if (Iteration == 1)
5438 ++NumOneIteration;
5439 else if (Iteration == 2)
5440 ++NumTwoIterations;
5441 else if (Iteration == 3)
5442 ++NumThreeIterations;
5443 else
5444 ++NumFourOrMoreIterations;
5445
5446 return MadeIRChange;
5447}
5448
5450
5452 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5453 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5454 OS, MapClassName2PassName);
5455 OS << '<';
5456 OS << "max-iterations=" << Options.MaxIterations << ";";
5457 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5458 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5459 OS << '>';
5460}
5461
5464 auto &AC = AM.getResult<AssumptionAnalysis>(F);
5465 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
5466 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5468 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
5469
5470 // TODO: Only use LoopInfo when the option is set. This requires that the
5471 // callers in the pass pipeline explicitly set the option.
5472 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
5473 if (!LI && Options.UseLoopInfo)
5474 LI = &AM.getResult<LoopAnalysis>(F);
5475
5476 auto *AA = &AM.getResult<AAManager>(F);
5477 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
5478 ProfileSummaryInfo *PSI =
5479 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
5480 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5481 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
5483
5484 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5485 BFI, BPI, PSI, LI, Options))
5486 // No changes, all analyses are preserved.
5487 return PreservedAnalyses::all();
5488
5489 // Mark all the analyses that instcombine updates as preserved.
5492 return PA;
5493}
5494
5496 AU.setPreservesCFG();
5509}
5510
5512 if (skipFunction(F))
5513 return false;
5514
5515 // Required analyses.
5516 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5517 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5518 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5519 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5520 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5521 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5522
5523 // Optional analyses.
5524 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5525 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5526 ProfileSummaryInfo *PSI =
5527 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5528 BlockFrequencyInfo *BFI =
5529 (PSI && PSI->hasProfileSummary()) ?
5530 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5531 nullptr;
5532 BranchProbabilityInfo *BPI = nullptr;
5533 if (auto *WrapperPass =
5534 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5535 BPI = &WrapperPass->getBPI();
5536
5537 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5538 BFI, BPI, PSI, LI,
5540}
5541
5543
5546}
5547
5549 "Combine redundant instructions", false, false)
5561
5562// Initialization Routines
5565}
5566
5568 return new InstructionCombiningPass();
5569}
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static bool IsSelect(MachineInstr &MI)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:403
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1860
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1898
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:807
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1130
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Class to represent array types.
Definition: DerivedTypes.h:371
uint64_t getNumElements() const
Definition: DerivedTypes.h:383
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
Type * getElementType() const
Definition: DerivedTypes.h:384
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:438
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:203
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:507
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:248
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:372
const Instruction & front() const
Definition: BasicBlock.h:461
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:569
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:465
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:428
size_t size() const
Definition: BasicBlock.h:459
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:331
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1546
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:2015
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
ConstantArray - Constant Array Declarations.
Definition: Constants.h:424
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:767
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2606
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2593
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2599
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2653
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2587
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:864
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:508
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:768
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
const Constant * stripPointerCasts() const
Definition: Constant.h:218
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
Definition: DataLayout.cpp:998
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:774
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:905
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:920
This is the common base class for debug info intrinsics for variables.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
void registerBranch(BranchInst *BI)
Add a branch condition to the cache.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
Represents flags for the getelementptr instruction/expression.
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:411
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:938
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:961
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:922
Value * CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1693
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2521
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2540
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1981
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1879
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1871
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2371
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2402
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1795
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2201
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1461
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1524
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2356
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1687
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:513
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:499
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:74
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
InstCombinePass(InstCombineOptions Opts={})
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
bool prepareWorklist(Function &F, ReversePostOrderTraversal< BasicBlock * > &RPOT)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Constant * getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth, Instruction *CxtI)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void tryToSinkInstructionDbgValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableIntrinsic * > &DbgUsers)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Definition: InstCombiner.h:157
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
Definition: InstCombiner.h:232
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
Definition: InstCombiner.h:139
TargetLibraryInfo & TLI
Definition: InstCombiner.h:73
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:366
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
Definition: InstCombiner.h:191
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
Definition: InstCombiner.h:418
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
BranchProbabilityInfo * BPI
Definition: InstCombiner.h:79
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DomConditionCache DC
Definition: InstCombiner.h:81
const bool MinimizeSize
Definition: InstCombiner.h:67
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
DominatorTree & DT
Definition: InstCombiner.h:74
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
Definition: InstCombiner.h:90
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
Definition: InstCombiner.h:213
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition: InstCombine.h:71
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void add(Instruction *I)
Add instruction to the worklist.
void push(Instruction *I)
Push the instruction onto the worklist stack.
void zap()
Check that the worklist is empty and nuke the backing store for the map.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:323
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
bool isTerminator() const
Definition: Instruction.h:277
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:328
bool isShift() const
Definition: Instruction.h:281
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isIntDivRem() const
Definition: Instruction.h:280
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Definition: Instructions.h:174
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:571
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
This is the common base class for memset/memcpy/memmove.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition: Metadata.h:62
This class represents min/max intrinsics.
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:688
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:77
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:110
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:104
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1433
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
This class represents a cast from signed integer to floating point.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This class represents a cast unsigned integer to floating point.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
Value * getOperand(unsigned i) const
Definition: User.h:169
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:852
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isZero() const
Definition: TypeSize.h:156
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:135
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNoFPClassCompatibleType(Type *Ty)
Returns true if this is a type legal for the 'nofpclass' attribute.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
Definition: PatternMatch.h:186
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
Definition: PatternMatch.h:560
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, FCmpInst, FCmpInst::Predicate > m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
Definition: PatternMatch.h:305
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
Definition: PatternMatch.h:791
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
FunctionPass * createInstructionCombiningPass()
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2805
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic * > Insns, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:2244
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1671
auto successors(const MachineBasicBlock *BB)
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
gep_type_iterator gep_type_end(const User *GEP)
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition: Local.cpp:2787
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1916
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1691
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2715
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Or
Bitwise or logical OR of integers.
DWARFExpression::Operation Op
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
void initializeInstructionCombiningPassPass(PassRegistry &)
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:281
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:323
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:240
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
SimplifyQuery getWithInstruction(const Instruction *I) const
SimplifyQuery getWithoutUndef() const