LLVM 19.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
183 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
195}
196
197/// Legal integers and common types are considered desirable. This is used to
198/// avoid creating instructions with types that may not be supported well by the
199/// the backend.
200/// NOTE: This treats i8, i16 and i32 specially because they are common
201/// types in frontend languages.
202bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
203 switch (BitWidth) {
204 case 8:
205 case 16:
206 case 32:
207 return true;
208 default:
209 return DL.isLegalInteger(BitWidth);
210 }
211}
212
213/// Return true if it is desirable to convert an integer computation from a
214/// given bit width to a new bit width.
215/// We don't want to convert from a legal or desirable type (like i8) to an
216/// illegal type or from a smaller to a larger illegal type. A width of '1'
217/// is always treated as a desirable type because i1 is a fundamental type in
218/// IR, and there are many specialized optimizations for i1 types.
219/// Common/desirable widths are equally treated as legal to convert to, in
220/// order to open up more combining opportunities.
221bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
222 unsigned ToWidth) const {
223 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
224 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
225
226 // Convert to desirable widths even if they are not legal types.
227 // Only shrink types, to prevent infinite loops.
228 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
229 return true;
230
231 // If this is a legal or desiable integer from type, and the result would be
232 // an illegal type, don't do the transformation.
233 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
234 return false;
235
236 // Otherwise, if both are illegal, do not increase the size of the result. We
237 // do allow things like i160 -> i64, but not i64 -> i160.
238 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
239 return false;
240
241 return true;
242}
243
244/// Return true if it is desirable to convert a computation from 'From' to 'To'.
245/// We don't want to convert from a legal to an illegal type or from a smaller
246/// to a larger illegal type. i1 is always treated as a legal type because it is
247/// a fundamental type in IR, and there are many specialized optimizations for
248/// i1 types.
249bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
250 // TODO: This could be extended to allow vectors. Datalayout changes might be
251 // needed to properly support that.
252 if (!From->isIntegerTy() || !To->isIntegerTy())
253 return false;
254
255 unsigned FromWidth = From->getPrimitiveSizeInBits();
256 unsigned ToWidth = To->getPrimitiveSizeInBits();
257 return shouldChangeType(FromWidth, ToWidth);
258}
259
260// Return true, if No Signed Wrap should be maintained for I.
261// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
262// where both B and C should be ConstantInts, results in a constant that does
263// not overflow. This function only handles the Add and Sub opcodes. For
264// all other opcodes, the function conservatively returns false.
266 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
267 if (!OBO || !OBO->hasNoSignedWrap())
268 return false;
269
270 // We reason about Add and Sub Only.
271 Instruction::BinaryOps Opcode = I.getOpcode();
272 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
273 return false;
274
275 const APInt *BVal, *CVal;
276 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
277 return false;
278
279 bool Overflow = false;
280 if (Opcode == Instruction::Add)
281 (void)BVal->sadd_ov(*CVal, Overflow);
282 else
283 (void)BVal->ssub_ov(*CVal, Overflow);
284
285 return !Overflow;
286}
287
289 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
290 return OBO && OBO->hasNoUnsignedWrap();
291}
292
294 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
295 return OBO && OBO->hasNoSignedWrap();
296}
297
298/// Conservatively clears subclassOptionalData after a reassociation or
299/// commutation. We preserve fast-math flags when applicable as they can be
300/// preserved.
302 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
303 if (!FPMO) {
304 I.clearSubclassOptionalData();
305 return;
306 }
307
308 FastMathFlags FMF = I.getFastMathFlags();
309 I.clearSubclassOptionalData();
310 I.setFastMathFlags(FMF);
311}
312
313/// Combine constant operands of associative operations either before or after a
314/// cast to eliminate one of the associative operations:
315/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
316/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
318 InstCombinerImpl &IC) {
319 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
320 if (!Cast || !Cast->hasOneUse())
321 return false;
322
323 // TODO: Enhance logic for other casts and remove this check.
324 auto CastOpcode = Cast->getOpcode();
325 if (CastOpcode != Instruction::ZExt)
326 return false;
327
328 // TODO: Enhance logic for other BinOps and remove this check.
329 if (!BinOp1->isBitwiseLogicOp())
330 return false;
331
332 auto AssocOpcode = BinOp1->getOpcode();
333 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
334 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
335 return false;
336
337 Constant *C1, *C2;
338 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
339 !match(BinOp2->getOperand(1), m_Constant(C2)))
340 return false;
341
342 // TODO: This assumes a zext cast.
343 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
344 // to the destination type might lose bits.
345
346 // Fold the constants together in the destination type:
347 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
348 const DataLayout &DL = IC.getDataLayout();
349 Type *DestTy = C1->getType();
350 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
351 if (!CastC2)
352 return false;
353 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
354 if (!FoldedC)
355 return false;
356
357 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
358 IC.replaceOperand(*BinOp1, 1, FoldedC);
360 Cast->dropPoisonGeneratingFlags();
361 return true;
362}
363
364// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
365// inttoptr ( ptrtoint (x) ) --> x
366Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
367 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
368 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
369 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
370 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
371 Type *CastTy = IntToPtr->getDestTy();
372 if (PtrToInt &&
373 CastTy->getPointerAddressSpace() ==
374 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
375 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
376 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
377 return PtrToInt->getOperand(0);
378 }
379 return nullptr;
380}
381
382/// This performs a few simplifications for operators that are associative or
383/// commutative:
384///
385/// Commutative operators:
386///
387/// 1. Order operands such that they are listed from right (least complex) to
388/// left (most complex). This puts constants before unary operators before
389/// binary operators.
390///
391/// Associative operators:
392///
393/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
394/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
395///
396/// Associative and commutative operators:
397///
398/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
399/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
400/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
401/// if C1 and C2 are constants.
403 Instruction::BinaryOps Opcode = I.getOpcode();
404 bool Changed = false;
405
406 do {
407 // Order operands such that they are listed from right (least complex) to
408 // left (most complex). This puts constants before unary operators before
409 // binary operators.
410 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
411 getComplexity(I.getOperand(1)))
412 Changed = !I.swapOperands();
413
414 if (I.isCommutative()) {
415 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
416 replaceOperand(I, 0, Pair->first);
417 replaceOperand(I, 1, Pair->second);
418 Changed = true;
419 }
420 }
421
422 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
423 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
424
425 if (I.isAssociative()) {
426 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
427 if (Op0 && Op0->getOpcode() == Opcode) {
428 Value *A = Op0->getOperand(0);
429 Value *B = Op0->getOperand(1);
430 Value *C = I.getOperand(1);
431
432 // Does "B op C" simplify?
433 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
434 // It simplifies to V. Form "A op V".
435 replaceOperand(I, 0, A);
436 replaceOperand(I, 1, V);
437 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
438 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
439
440 // Conservatively clear all optional flags since they may not be
441 // preserved by the reassociation. Reset nsw/nuw based on the above
442 // analysis.
444
445 // Note: this is only valid because SimplifyBinOp doesn't look at
446 // the operands to Op0.
447 if (IsNUW)
448 I.setHasNoUnsignedWrap(true);
449
450 if (IsNSW)
451 I.setHasNoSignedWrap(true);
452
453 Changed = true;
454 ++NumReassoc;
455 continue;
456 }
457 }
458
459 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
460 if (Op1 && Op1->getOpcode() == Opcode) {
461 Value *A = I.getOperand(0);
462 Value *B = Op1->getOperand(0);
463 Value *C = Op1->getOperand(1);
464
465 // Does "A op B" simplify?
466 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
467 // It simplifies to V. Form "V op C".
468 replaceOperand(I, 0, V);
469 replaceOperand(I, 1, C);
470 // Conservatively clear the optional flags, since they may not be
471 // preserved by the reassociation.
473 Changed = true;
474 ++NumReassoc;
475 continue;
476 }
477 }
478 }
479
480 if (I.isAssociative() && I.isCommutative()) {
481 if (simplifyAssocCastAssoc(&I, *this)) {
482 Changed = true;
483 ++NumReassoc;
484 continue;
485 }
486
487 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
488 if (Op0 && Op0->getOpcode() == Opcode) {
489 Value *A = Op0->getOperand(0);
490 Value *B = Op0->getOperand(1);
491 Value *C = I.getOperand(1);
492
493 // Does "C op A" simplify?
494 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
495 // It simplifies to V. Form "V op B".
496 replaceOperand(I, 0, V);
497 replaceOperand(I, 1, B);
498 // Conservatively clear the optional flags, since they may not be
499 // preserved by the reassociation.
501 Changed = true;
502 ++NumReassoc;
503 continue;
504 }
505 }
506
507 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
508 if (Op1 && Op1->getOpcode() == Opcode) {
509 Value *A = I.getOperand(0);
510 Value *B = Op1->getOperand(0);
511 Value *C = Op1->getOperand(1);
512
513 // Does "C op A" simplify?
514 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
515 // It simplifies to V. Form "B op V".
516 replaceOperand(I, 0, B);
517 replaceOperand(I, 1, V);
518 // Conservatively clear the optional flags, since they may not be
519 // preserved by the reassociation.
521 Changed = true;
522 ++NumReassoc;
523 continue;
524 }
525 }
526
527 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
528 // if C1 and C2 are constants.
529 Value *A, *B;
530 Constant *C1, *C2, *CRes;
531 if (Op0 && Op1 &&
532 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
533 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
534 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
535 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
536 bool IsNUW = hasNoUnsignedWrap(I) &&
537 hasNoUnsignedWrap(*Op0) &&
538 hasNoUnsignedWrap(*Op1);
539 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
540 BinaryOperator::CreateNUW(Opcode, A, B) :
541 BinaryOperator::Create(Opcode, A, B);
542
543 if (isa<FPMathOperator>(NewBO)) {
544 FastMathFlags Flags = I.getFastMathFlags() &
545 Op0->getFastMathFlags() &
546 Op1->getFastMathFlags();
547 NewBO->setFastMathFlags(Flags);
548 }
549 InsertNewInstWith(NewBO, I.getIterator());
550 NewBO->takeName(Op1);
551 replaceOperand(I, 0, NewBO);
552 replaceOperand(I, 1, CRes);
553 // Conservatively clear the optional flags, since they may not be
554 // preserved by the reassociation.
556 if (IsNUW)
557 I.setHasNoUnsignedWrap(true);
558
559 Changed = true;
560 continue;
561 }
562 }
563
564 // No further simplifications.
565 return Changed;
566 } while (true);
567}
568
569/// Return whether "X LOp (Y ROp Z)" is always equal to
570/// "(X LOp Y) ROp (X LOp Z)".
573 // X & (Y | Z) <--> (X & Y) | (X & Z)
574 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
575 if (LOp == Instruction::And)
576 return ROp == Instruction::Or || ROp == Instruction::Xor;
577
578 // X | (Y & Z) <--> (X | Y) & (X | Z)
579 if (LOp == Instruction::Or)
580 return ROp == Instruction::And;
581
582 // X * (Y + Z) <--> (X * Y) + (X * Z)
583 // X * (Y - Z) <--> (X * Y) - (X * Z)
584 if (LOp == Instruction::Mul)
585 return ROp == Instruction::Add || ROp == Instruction::Sub;
586
587 return false;
588}
589
590/// Return whether "(X LOp Y) ROp Z" is always equal to
591/// "(X ROp Z) LOp (Y ROp Z)".
595 return leftDistributesOverRight(ROp, LOp);
596
597 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
599
600 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
601 // but this requires knowing that the addition does not overflow and other
602 // such subtleties.
603}
604
605/// This function returns identity value for given opcode, which can be used to
606/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
608 if (isa<Constant>(V))
609 return nullptr;
610
611 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
612}
613
614/// This function predicates factorization using distributive laws. By default,
615/// it just returns the 'Op' inputs. But for special-cases like
616/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
617/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
618/// allow more factorization opportunities.
621 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
622 assert(Op && "Expected a binary operator");
623 LHS = Op->getOperand(0);
624 RHS = Op->getOperand(1);
625 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
626 Constant *C;
627 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
628 // X << C --> X * (1 << C)
629 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
630 return Instruction::Mul;
631 }
632 // TODO: We can add other conversions e.g. shr => div etc.
633 }
634 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
635 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
637 // lshr nneg C, X --> ashr nneg C, X
638 return Instruction::AShr;
639 }
640 }
641 return Op->getOpcode();
642}
643
644/// This tries to simplify binary operations by factorizing out common terms
645/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
648 Instruction::BinaryOps InnerOpcode, Value *A,
649 Value *B, Value *C, Value *D) {
650 assert(A && B && C && D && "All values must be provided");
651
652 Value *V = nullptr;
653 Value *RetVal = nullptr;
654 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
655 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
656
657 // Does "X op' Y" always equal "Y op' X"?
658 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
659
660 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
661 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
662 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
663 // commutative case, "(A op' B) op (C op' A)"?
664 if (A == C || (InnerCommutative && A == D)) {
665 if (A != C)
666 std::swap(C, D);
667 // Consider forming "A op' (B op D)".
668 // If "B op D" simplifies then it can be formed with no cost.
669 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
670
671 // If "B op D" doesn't simplify then only go on if one of the existing
672 // operations "A op' B" and "C op' D" will be zapped as no longer used.
673 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
674 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
675 if (V)
676 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
677 }
678 }
679
680 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
681 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
682 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
683 // commutative case, "(A op' B) op (B op' D)"?
684 if (B == D || (InnerCommutative && B == C)) {
685 if (B != D)
686 std::swap(C, D);
687 // Consider forming "(A op C) op' B".
688 // If "A op C" simplifies then it can be formed with no cost.
689 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
690
691 // If "A op C" doesn't simplify then only go on if one of the existing
692 // operations "A op' B" and "C op' D" will be zapped as no longer used.
693 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
694 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
695 if (V)
696 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
697 }
698 }
699
700 if (!RetVal)
701 return nullptr;
702
703 ++NumFactor;
704 RetVal->takeName(&I);
705
706 // Try to add no-overflow flags to the final value.
707 if (isa<OverflowingBinaryOperator>(RetVal)) {
708 bool HasNSW = false;
709 bool HasNUW = false;
710 if (isa<OverflowingBinaryOperator>(&I)) {
711 HasNSW = I.hasNoSignedWrap();
712 HasNUW = I.hasNoUnsignedWrap();
713 }
714 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
715 HasNSW &= LOBO->hasNoSignedWrap();
716 HasNUW &= LOBO->hasNoUnsignedWrap();
717 }
718
719 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
720 HasNSW &= ROBO->hasNoSignedWrap();
721 HasNUW &= ROBO->hasNoUnsignedWrap();
722 }
723
724 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
725 // We can propagate 'nsw' if we know that
726 // %Y = mul nsw i16 %X, C
727 // %Z = add nsw i16 %Y, %X
728 // =>
729 // %Z = mul nsw i16 %X, C+1
730 //
731 // iff C+1 isn't INT_MIN
732 const APInt *CInt;
733 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
734 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
735
736 // nuw can be propagated with any constant or nuw value.
737 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
738 }
739 }
740 return RetVal;
741}
742
743// If `I` has one Const operand and the other matches `(ctpop (not x))`,
744// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
745// This is only useful is the new subtract can fold so we only handle the
746// following cases:
747// 1) (add/sub/disjoint_or C, (ctpop (not x))
748// -> (add/sub/disjoint_or C', (ctpop x))
749// 1) (cmp pred C, (ctpop (not x))
750// -> (cmp pred C', (ctpop x))
752 unsigned Opc = I->getOpcode();
753 unsigned ConstIdx = 1;
754 switch (Opc) {
755 default:
756 return nullptr;
757 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
758 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
759 // is constant.
760 case Instruction::Sub:
761 ConstIdx = 0;
762 break;
763 case Instruction::ICmp:
764 // Signed predicates aren't correct in some edge cases like for i2 types, as
765 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
766 // comparisons against it are simplfied to unsigned.
767 if (cast<ICmpInst>(I)->isSigned())
768 return nullptr;
769 break;
770 case Instruction::Or:
771 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
772 return nullptr;
773 [[fallthrough]];
774 case Instruction::Add:
775 break;
776 }
777
778 Value *Op;
779 // Find ctpop.
780 if (!match(I->getOperand(1 - ConstIdx),
781 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
782 return nullptr;
783
784 Constant *C;
785 // Check other operand is ImmConstant.
786 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
787 return nullptr;
788
789 Type *Ty = Op->getType();
790 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
791 // Need extra check for icmp. Note if this check is true, it generally means
792 // the icmp will simplify to true/false.
793 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality() &&
794 !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue())
795 return nullptr;
796
797 // Check we can invert `(not x)` for free.
798 bool Consumes = false;
799 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
800 return nullptr;
801 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
802 assert(NotOp != nullptr &&
803 "Desync between isFreeToInvert and getFreelyInverted");
804
805 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
806
807 Value *R = nullptr;
808
809 // Do the transformation here to avoid potentially introducing an infinite
810 // loop.
811 switch (Opc) {
812 case Instruction::Sub:
813 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
814 break;
815 case Instruction::Or:
816 case Instruction::Add:
817 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
818 break;
819 case Instruction::ICmp:
820 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
821 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
822 break;
823 default:
824 llvm_unreachable("Unhandled Opcode");
825 }
826 assert(R != nullptr);
827 return replaceInstUsesWith(*I, R);
828}
829
830// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
831// IFF
832// 1) the logic_shifts match
833// 2) either both binops are binops and one is `and` or
834// BinOp1 is `and`
835// (logic_shift (inv_logic_shift C1, C), C) == C1 or
836//
837// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
838//
839// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
840// IFF
841// 1) the logic_shifts match
842// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
843//
844// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
845//
846// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
847// IFF
848// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
849// 2) Binop2 is `not`
850//
851// -> (arithmetic_shift Binop1((not X), Y), Amt)
852
854 const DataLayout &DL = I.getModule()->getDataLayout();
855 auto IsValidBinOpc = [](unsigned Opc) {
856 switch (Opc) {
857 default:
858 return false;
859 case Instruction::And:
860 case Instruction::Or:
861 case Instruction::Xor:
862 case Instruction::Add:
863 // Skip Sub as we only match constant masks which will canonicalize to use
864 // add.
865 return true;
866 }
867 };
868
869 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
870 // constraints.
871 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
872 unsigned ShOpc) {
873 assert(ShOpc != Instruction::AShr);
874 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
875 ShOpc == Instruction::Shl;
876 };
877
878 auto GetInvShift = [](unsigned ShOpc) {
879 assert(ShOpc != Instruction::AShr);
880 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
881 };
882
883 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
884 unsigned ShOpc, Constant *CMask,
885 Constant *CShift) {
886 // If the BinOp1 is `and` we don't need to check the mask.
887 if (BinOpc1 == Instruction::And)
888 return true;
889
890 // For all other possible transfers we need complete distributable
891 // binop/shift (anything but `add` + `lshr`).
892 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
893 return false;
894
895 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
896 // vecs, otherwise the mask will be simplified and the following check will
897 // handle it).
898 if (BinOpc2 == Instruction::And)
899 return true;
900
901 // Otherwise, need mask that meets the below requirement.
902 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
903 Constant *MaskInvShift =
904 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
905 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
906 CMask;
907 };
908
909 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
910 Constant *CMask, *CShift;
911 Value *X, *Y, *ShiftedX, *Mask, *Shift;
912 if (!match(I.getOperand(ShOpnum),
913 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
914 return nullptr;
915 if (!match(I.getOperand(1 - ShOpnum),
916 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
917 return nullptr;
918
919 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
920 return nullptr;
921
922 // Make sure we are matching instruction shifts and not ConstantExpr
923 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
924 auto *IX = dyn_cast<Instruction>(ShiftedX);
925 if (!IY || !IX)
926 return nullptr;
927
928 // LHS and RHS need same shift opcode
929 unsigned ShOpc = IY->getOpcode();
930 if (ShOpc != IX->getOpcode())
931 return nullptr;
932
933 // Make sure binop is real instruction and not ConstantExpr
934 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
935 if (!BO2)
936 return nullptr;
937
938 unsigned BinOpc = BO2->getOpcode();
939 // Make sure we have valid binops.
940 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
941 return nullptr;
942
943 if (ShOpc == Instruction::AShr) {
944 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
945 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
946 Value *NotX = Builder.CreateNot(X);
947 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
949 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
950 }
951
952 return nullptr;
953 }
954
955 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
956 // distribute to drop the shift irrelevant of constants.
957 if (BinOpc == I.getOpcode() &&
958 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
959 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
960 Value *NewBinOp1 = Builder.CreateBinOp(
961 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
962 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
963 }
964
965 // Otherwise we can only distribute by constant shifting the mask, so
966 // ensure we have constants.
967 if (!match(Shift, m_ImmConstant(CShift)))
968 return nullptr;
969 if (!match(Mask, m_ImmConstant(CMask)))
970 return nullptr;
971
972 // Check if we can distribute the binops.
973 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
974 return nullptr;
975
976 Constant *NewCMask =
977 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
978 Value *NewBinOp2 = Builder.CreateBinOp(
979 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
980 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
981 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
982 NewBinOp1, CShift);
983 };
984
985 if (Instruction *R = MatchBinOp(0))
986 return R;
987 return MatchBinOp(1);
988}
989
990// (Binop (zext C), (select C, T, F))
991// -> (select C, (binop 1, T), (binop 0, F))
992//
993// (Binop (sext C), (select C, T, F))
994// -> (select C, (binop -1, T), (binop 0, F))
995//
996// Attempt to simplify binary operations into a select with folded args, when
997// one operand of the binop is a select instruction and the other operand is a
998// zext/sext extension, whose value is the select condition.
1001 // TODO: this simplification may be extended to any speculatable instruction,
1002 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1003 Instruction::BinaryOps Opc = I.getOpcode();
1004 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1005 Value *A, *CondVal, *TrueVal, *FalseVal;
1006 Value *CastOp;
1007
1008 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1009 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1010 A->getType()->getScalarSizeInBits() == 1 &&
1011 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1012 m_Value(FalseVal)));
1013 };
1014
1015 // Make sure one side of the binop is a select instruction, and the other is a
1016 // zero/sign extension operating on a i1.
1017 if (MatchSelectAndCast(LHS, RHS))
1018 CastOp = LHS;
1019 else if (MatchSelectAndCast(RHS, LHS))
1020 CastOp = RHS;
1021 else
1022 return nullptr;
1023
1024 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1025 bool IsCastOpRHS = (CastOp == RHS);
1026 bool IsZExt = isa<ZExtInst>(CastOp);
1027 Constant *C;
1028
1029 if (IsTrueArm) {
1030 C = Constant::getNullValue(V->getType());
1031 } else if (IsZExt) {
1032 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1033 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1034 } else {
1035 C = Constant::getAllOnesValue(V->getType());
1036 }
1037
1038 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1039 : Builder.CreateBinOp(Opc, C, V);
1040 };
1041
1042 // If the value used in the zext/sext is the select condition, or the negated
1043 // of the select condition, the binop can be simplified.
1044 if (CondVal == A) {
1045 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1046 return SelectInst::Create(CondVal, NewTrueVal,
1047 NewFoldedConst(true, FalseVal));
1048 }
1049
1050 if (match(A, m_Not(m_Specific(CondVal)))) {
1051 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1052 return SelectInst::Create(CondVal, NewTrueVal,
1053 NewFoldedConst(false, FalseVal));
1054 }
1055
1056 return nullptr;
1057}
1058
1060 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1061 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1062 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1063 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1064 Value *A, *B, *C, *D;
1065 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1066
1067 if (Op0)
1068 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1069 if (Op1)
1070 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1071
1072 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1073 // a common term.
1074 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1075 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1076 return V;
1077
1078 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1079 // term.
1080 if (Op0)
1081 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1082 if (Value *V =
1083 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1084 return V;
1085
1086 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1087 // term.
1088 if (Op1)
1089 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1090 if (Value *V =
1091 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1092 return V;
1093
1094 return nullptr;
1095}
1096
1097/// This tries to simplify binary operations which some other binary operation
1098/// distributes over either by factorizing out common terms
1099/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1100/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1101/// Returns the simplified value, or null if it didn't simplify.
1103 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1104 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1105 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1106 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1107
1108 // Factorization.
1109 if (Value *R = tryFactorizationFolds(I))
1110 return R;
1111
1112 // Expansion.
1113 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1114 // The instruction has the form "(A op' B) op C". See if expanding it out
1115 // to "(A op C) op' (B op C)" results in simplifications.
1116 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1117 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1118
1119 // Disable the use of undef because it's not safe to distribute undef.
1120 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1121 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1122 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1123
1124 // Do "A op C" and "B op C" both simplify?
1125 if (L && R) {
1126 // They do! Return "L op' R".
1127 ++NumExpand;
1128 C = Builder.CreateBinOp(InnerOpcode, L, R);
1129 C->takeName(&I);
1130 return C;
1131 }
1132
1133 // Does "A op C" simplify to the identity value for the inner opcode?
1134 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1135 // They do! Return "B op C".
1136 ++NumExpand;
1137 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1138 C->takeName(&I);
1139 return C;
1140 }
1141
1142 // Does "B op C" simplify to the identity value for the inner opcode?
1143 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1144 // They do! Return "A op C".
1145 ++NumExpand;
1146 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1147 C->takeName(&I);
1148 return C;
1149 }
1150 }
1151
1152 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1153 // The instruction has the form "A op (B op' C)". See if expanding it out
1154 // to "(A op B) op' (A op C)" results in simplifications.
1155 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1156 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1157
1158 // Disable the use of undef because it's not safe to distribute undef.
1159 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1160 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1161 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1162
1163 // Do "A op B" and "A op C" both simplify?
1164 if (L && R) {
1165 // They do! Return "L op' R".
1166 ++NumExpand;
1167 A = Builder.CreateBinOp(InnerOpcode, L, R);
1168 A->takeName(&I);
1169 return A;
1170 }
1171
1172 // Does "A op B" simplify to the identity value for the inner opcode?
1173 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1174 // They do! Return "A op C".
1175 ++NumExpand;
1176 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1177 A->takeName(&I);
1178 return A;
1179 }
1180
1181 // Does "A op C" simplify to the identity value for the inner opcode?
1182 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1183 // They do! Return "A op B".
1184 ++NumExpand;
1185 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1186 A->takeName(&I);
1187 return A;
1188 }
1189 }
1190
1192}
1193
1194static std::optional<std::pair<Value *, Value *>>
1196 if (LHS->getParent() != RHS->getParent())
1197 return std::nullopt;
1198
1199 if (LHS->getNumIncomingValues() < 2)
1200 return std::nullopt;
1201
1202 if (!equal(LHS->blocks(), RHS->blocks()))
1203 return std::nullopt;
1204
1205 Value *L0 = LHS->getIncomingValue(0);
1206 Value *R0 = RHS->getIncomingValue(0);
1207
1208 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1209 Value *L1 = LHS->getIncomingValue(I);
1210 Value *R1 = RHS->getIncomingValue(I);
1211
1212 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1213 continue;
1214
1215 return std::nullopt;
1216 }
1217
1218 return std::optional(std::pair(L0, R0));
1219}
1220
1221std::optional<std::pair<Value *, Value *>>
1222InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1223 Instruction *LHSInst = dyn_cast<Instruction>(LHS);
1224 Instruction *RHSInst = dyn_cast<Instruction>(RHS);
1225 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1226 return std::nullopt;
1227 switch (LHSInst->getOpcode()) {
1228 case Instruction::PHI:
1229 return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS));
1230 case Instruction::Select: {
1231 Value *Cond = LHSInst->getOperand(0);
1232 Value *TrueVal = LHSInst->getOperand(1);
1233 Value *FalseVal = LHSInst->getOperand(2);
1234 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1235 FalseVal == RHSInst->getOperand(1))
1236 return std::pair(TrueVal, FalseVal);
1237 return std::nullopt;
1238 }
1239 case Instruction::Call: {
1240 // Match min(a, b) and max(a, b)
1241 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1242 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1243 if (LHSMinMax && RHSMinMax &&
1244 LHSMinMax->getPredicate() ==
1246 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1247 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1248 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1249 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1250 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1251 return std::nullopt;
1252 }
1253 default:
1254 return std::nullopt;
1255 }
1256}
1257
1259 Value *LHS,
1260 Value *RHS) {
1261 Value *A, *B, *C, *D, *E, *F;
1262 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1263 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1264 if (!LHSIsSelect && !RHSIsSelect)
1265 return nullptr;
1266
1267 FastMathFlags FMF;
1269 if (isa<FPMathOperator>(&I)) {
1270 FMF = I.getFastMathFlags();
1272 }
1273
1274 Instruction::BinaryOps Opcode = I.getOpcode();
1276
1277 Value *Cond, *True = nullptr, *False = nullptr;
1278
1279 // Special-case for add/negate combination. Replace the zero in the negation
1280 // with the trailing add operand:
1281 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1282 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1283 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1284 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1285 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1286 return nullptr;
1287
1288 Value *N;
1289 if (True && match(FVal, m_Neg(m_Value(N)))) {
1290 Value *Sub = Builder.CreateSub(Z, N);
1291 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1292 }
1293 if (False && match(TVal, m_Neg(m_Value(N)))) {
1294 Value *Sub = Builder.CreateSub(Z, N);
1295 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1296 }
1297 return nullptr;
1298 };
1299
1300 if (LHSIsSelect && RHSIsSelect && A == D) {
1301 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1302 Cond = A;
1303 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1304 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1305
1306 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1307 if (False && !True)
1308 True = Builder.CreateBinOp(Opcode, B, E);
1309 else if (True && !False)
1310 False = Builder.CreateBinOp(Opcode, C, F);
1311 }
1312 } else if (LHSIsSelect && LHS->hasOneUse()) {
1313 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1314 Cond = A;
1315 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1316 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1317 if (Value *NewSel = foldAddNegate(B, C, RHS))
1318 return NewSel;
1319 } else if (RHSIsSelect && RHS->hasOneUse()) {
1320 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1321 Cond = D;
1322 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1323 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1324 if (Value *NewSel = foldAddNegate(E, F, LHS))
1325 return NewSel;
1326 }
1327
1328 if (!True || !False)
1329 return nullptr;
1330
1331 Value *SI = Builder.CreateSelect(Cond, True, False);
1332 SI->takeName(&I);
1333 return SI;
1334}
1335
1336/// Freely adapt every user of V as-if V was changed to !V.
1337/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1339 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1340 for (User *U : make_early_inc_range(I->users())) {
1341 if (U == IgnoredUser)
1342 continue; // Don't consider this user.
1343 switch (cast<Instruction>(U)->getOpcode()) {
1344 case Instruction::Select: {
1345 auto *SI = cast<SelectInst>(U);
1346 SI->swapValues();
1347 SI->swapProfMetadata();
1348 break;
1349 }
1350 case Instruction::Br: {
1351 BranchInst *BI = cast<BranchInst>(U);
1352 BI->swapSuccessors(); // swaps prof metadata too
1353 if (BPI)
1355 break;
1356 }
1357 case Instruction::Xor:
1358 replaceInstUsesWith(cast<Instruction>(*U), I);
1359 // Add to worklist for DCE.
1360 addToWorklist(cast<Instruction>(U));
1361 break;
1362 default:
1363 llvm_unreachable("Got unexpected user - out of sync with "
1364 "canFreelyInvertAllUsersOf() ?");
1365 }
1366 }
1367}
1368
1369/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1370/// constant zero (which is the 'negate' form).
1371Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1372 Value *NegV;
1373 if (match(V, m_Neg(m_Value(NegV))))
1374 return NegV;
1375
1376 // Constants can be considered to be negated values if they can be folded.
1377 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1378 return ConstantExpr::getNeg(C);
1379
1380 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1381 if (C->getType()->getElementType()->isIntegerTy())
1382 return ConstantExpr::getNeg(C);
1383
1384 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1385 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1386 Constant *Elt = CV->getAggregateElement(i);
1387 if (!Elt)
1388 return nullptr;
1389
1390 if (isa<UndefValue>(Elt))
1391 continue;
1392
1393 if (!isa<ConstantInt>(Elt))
1394 return nullptr;
1395 }
1396 return ConstantExpr::getNeg(CV);
1397 }
1398
1399 // Negate integer vector splats.
1400 if (auto *CV = dyn_cast<Constant>(V))
1401 if (CV->getType()->isVectorTy() &&
1402 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1403 return ConstantExpr::getNeg(CV);
1404
1405 return nullptr;
1406}
1407
1408// Try to fold:
1409// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1410// -> ({s|u}itofp (int_binop x, y))
1411// 2) (fp_binop ({s|u}itofp x), FpC)
1412// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1413//
1414// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1415Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1416 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1418
1419 Type *FPTy = BO.getType();
1420 Type *IntTy = IntOps[0]->getType();
1421
1422 unsigned IntSz = IntTy->getScalarSizeInBits();
1423 // This is the maximum number of inuse bits by the integer where the int -> fp
1424 // casts are exact.
1425 unsigned MaxRepresentableBits =
1427
1428 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1429 // checks later on.
1430 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1431
1432 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1433 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1434 auto IsNonZero = [&](unsigned OpNo) -> bool {
1435 if (OpsKnown[OpNo].hasKnownBits() &&
1436 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1437 return true;
1438 return isKnownNonZero(IntOps[OpNo], SQ);
1439 };
1440
1441 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1442 // NB: This matches the impl in ValueTracking, we just try to use cached
1443 // knownbits here. If we ever start supporting WithCache for
1444 // `isKnownNonNegative`, change this to an explicit call.
1445 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1446 };
1447
1448 // Check if we know for certain that ({s|u}itofp op) is exact.
1449 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1450 // Can we treat this operand as the desired sign?
1451 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1452 !IsNonNeg(OpNo))
1453 return false;
1454
1455 // If fp precision >= bitwidth(op) then its exact.
1456 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1457 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1458 // handled specially. We can't, however, increase the bound arbitrarily for
1459 // `sitofp` as for larger sizes, it won't sign extend.
1460 if (MaxRepresentableBits < IntSz) {
1461 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1462 // numSignBits(op).
1463 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1464 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1465 if (OpsFromSigned)
1466 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1467 // Finally for unsigned check that fp precision >= bitwidth(op) -
1468 // numLeadingZeros(op).
1469 else {
1470 NumUsedLeadingBits[OpNo] =
1471 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1472 }
1473 }
1474 // NB: We could also check if op is known to be a power of 2 or zero (which
1475 // will always be representable). Its unlikely, however, that is we are
1476 // unable to bound op in any way we will be able to pass the overflow checks
1477 // later on.
1478
1479 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1480 return false;
1481 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1482 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1483 IsNonZero(OpNo);
1484 };
1485
1486 // If we have a constant rhs, see if we can losslessly convert it to an int.
1487 if (Op1FpC != nullptr) {
1488 // Signed + Mul req non-zero
1489 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1490 !match(Op1FpC, m_NonZeroFP()))
1491 return nullptr;
1492
1494 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1495 IntTy, DL);
1496 if (Op1IntC == nullptr)
1497 return nullptr;
1498 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1499 : Instruction::UIToFP,
1500 Op1IntC, FPTy, DL) != Op1FpC)
1501 return nullptr;
1502
1503 // First try to keep sign of cast the same.
1504 IntOps[1] = Op1IntC;
1505 }
1506
1507 // Ensure lhs/rhs integer types match.
1508 if (IntTy != IntOps[1]->getType())
1509 return nullptr;
1510
1511 if (Op1FpC == nullptr) {
1512 if (!IsValidPromotion(1))
1513 return nullptr;
1514 }
1515 if (!IsValidPromotion(0))
1516 return nullptr;
1517
1518 // Final we check if the integer version of the binop will not overflow.
1520 // Because of the precision check, we can often rule out overflows.
1521 bool NeedsOverflowCheck = true;
1522 // Try to conservatively rule out overflow based on the already done precision
1523 // checks.
1524 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1525 unsigned OverflowMaxCurBits =
1526 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1527 bool OutputSigned = OpsFromSigned;
1528 switch (BO.getOpcode()) {
1529 case Instruction::FAdd:
1530 IntOpc = Instruction::Add;
1531 OverflowMaxOutputBits += OverflowMaxCurBits;
1532 break;
1533 case Instruction::FSub:
1534 IntOpc = Instruction::Sub;
1535 OverflowMaxOutputBits += OverflowMaxCurBits;
1536 break;
1537 case Instruction::FMul:
1538 IntOpc = Instruction::Mul;
1539 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1540 break;
1541 default:
1542 llvm_unreachable("Unsupported binop");
1543 }
1544 // The precision check may have already ruled out overflow.
1545 if (OverflowMaxOutputBits < IntSz) {
1546 NeedsOverflowCheck = false;
1547 // We can bound unsigned overflow from sub to in range signed value (this is
1548 // what allows us to avoid the overflow check for sub).
1549 if (IntOpc == Instruction::Sub)
1550 OutputSigned = true;
1551 }
1552
1553 // Precision check did not rule out overflow, so need to check.
1554 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1555 // `IntOps[...]` arguments to `KnownOps[...]`.
1556 if (NeedsOverflowCheck &&
1557 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1558 return nullptr;
1559
1560 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1561 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1562 IntBO->setHasNoSignedWrap(OutputSigned);
1563 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1564 }
1565 if (OutputSigned)
1566 return new SIToFPInst(IntBinOp, FPTy);
1567 return new UIToFPInst(IntBinOp, FPTy);
1568}
1569
1570// Try to fold:
1571// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1572// -> ({s|u}itofp (int_binop x, y))
1573// 2) (fp_binop ({s|u}itofp x), FpC)
1574// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1575Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1576 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1577 Constant *Op1FpC = nullptr;
1578 // Check for:
1579 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1580 // 2) (binop ({s|u}itofp x), FpC)
1581 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1582 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1583 return nullptr;
1584
1585 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1586 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1587 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1588 return nullptr;
1589
1590 // Cache KnownBits a bit to potentially save some analysis.
1591 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1592
1593 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1594 // different constraints depending on the sign of the cast.
1595 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1596 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1597 IntOps, Op1FpC, OpsKnown))
1598 return R;
1599 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1600 Op1FpC, OpsKnown);
1601}
1602
1603/// A binop with a constant operand and a sign-extended boolean operand may be
1604/// converted into a select of constants by applying the binary operation to
1605/// the constant with the two possible values of the extended boolean (0 or -1).
1606Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1607 // TODO: Handle non-commutative binop (constant is operand 0).
1608 // TODO: Handle zext.
1609 // TODO: Peek through 'not' of cast.
1610 Value *BO0 = BO.getOperand(0);
1611 Value *BO1 = BO.getOperand(1);
1612 Value *X;
1613 Constant *C;
1614 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1615 !X->getType()->isIntOrIntVectorTy(1))
1616 return nullptr;
1617
1618 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1621 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1622 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1623 return SelectInst::Create(X, TVal, FVal);
1624}
1625
1627 SelectInst *SI,
1628 bool IsTrueArm) {
1629 SmallVector<Constant *> ConstOps;
1630 for (Value *Op : I.operands()) {
1631 CmpInst::Predicate Pred;
1632 Constant *C = nullptr;
1633 if (Op == SI) {
1634 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1635 : SI->getFalseValue());
1636 } else if (match(SI->getCondition(),
1637 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1638 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1640 // Pass
1641 } else {
1642 C = dyn_cast<Constant>(Op);
1643 }
1644 if (C == nullptr)
1645 return nullptr;
1646
1647 ConstOps.push_back(C);
1648 }
1649
1650 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1651}
1652
1654 Value *NewOp, InstCombiner &IC) {
1655 Instruction *Clone = I.clone();
1656 Clone->replaceUsesOfWith(SI, NewOp);
1658 IC.InsertNewInstBefore(Clone, SI->getIterator());
1659 return Clone;
1660}
1661
1663 bool FoldWithMultiUse) {
1664 // Don't modify shared select instructions unless set FoldWithMultiUse
1665 if (!SI->hasOneUse() && !FoldWithMultiUse)
1666 return nullptr;
1667
1668 Value *TV = SI->getTrueValue();
1669 Value *FV = SI->getFalseValue();
1670 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1671 return nullptr;
1672
1673 // Bool selects with constant operands can be folded to logical ops.
1674 if (SI->getType()->isIntOrIntVectorTy(1))
1675 return nullptr;
1676
1677 // Test if a FCmpInst instruction is used exclusively by a select as
1678 // part of a minimum or maximum operation. If so, refrain from doing
1679 // any other folding. This helps out other analyses which understand
1680 // non-obfuscated minimum and maximum idioms. And in this case, at
1681 // least one of the comparison operands has at least one user besides
1682 // the compare (the select), which would often largely negate the
1683 // benefit of folding anyway.
1684 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1685 if (CI->hasOneUse()) {
1686 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1687 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1688 return nullptr;
1689 }
1690 }
1691
1692 // Make sure that one of the select arms constant folds successfully.
1693 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1694 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1695 if (!NewTV && !NewFV)
1696 return nullptr;
1697
1698 // Create an instruction for the arm that did not fold.
1699 if (!NewTV)
1700 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1701 if (!NewFV)
1702 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1703 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1704}
1705
1707 Value *InValue, BasicBlock *InBB,
1708 const DataLayout &DL,
1709 const SimplifyQuery SQ) {
1710 // NB: It is a precondition of this transform that the operands be
1711 // phi translatable! This is usually trivially satisfied by limiting it
1712 // to constant ops, and for selects we do a more sophisticated check.
1714 for (Value *Op : I.operands()) {
1715 if (Op == PN)
1716 Ops.push_back(InValue);
1717 else
1718 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1719 }
1720
1721 // Don't consider the simplification successful if we get back a constant
1722 // expression. That's just an instruction in hiding.
1723 // Also reject the case where we simplify back to the phi node. We wouldn't
1724 // be able to remove it in that case.
1726 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1727 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1728 return NewVal;
1729
1730 // Check if incoming PHI value can be replaced with constant
1731 // based on implied condition.
1732 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1733 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1734 if (TerminatorBI && TerminatorBI->isConditional() &&
1735 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1736 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1737 std::optional<bool> ImpliedCond =
1738 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1739 Ops[0], Ops[1], DL, LHSIsTrue);
1740 if (ImpliedCond)
1741 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1742 }
1743
1744 return nullptr;
1745}
1746
1748 unsigned NumPHIValues = PN->getNumIncomingValues();
1749 if (NumPHIValues == 0)
1750 return nullptr;
1751
1752 // We normally only transform phis with a single use. However, if a PHI has
1753 // multiple uses and they are all the same operation, we can fold *all* of the
1754 // uses into the PHI.
1755 if (!PN->hasOneUse()) {
1756 // Walk the use list for the instruction, comparing them to I.
1757 for (User *U : PN->users()) {
1758 Instruction *UI = cast<Instruction>(U);
1759 if (UI != &I && !I.isIdenticalTo(UI))
1760 return nullptr;
1761 }
1762 // Otherwise, we can replace *all* users with the new PHI we form.
1763 }
1764
1765 // Check to see whether the instruction can be folded into each phi operand.
1766 // If there is one operand that does not fold, remember the BB it is in.
1767 // If there is more than one or if *it* is a PHI, bail out.
1768 SmallVector<Value *> NewPhiValues;
1769 BasicBlock *NonSimplifiedBB = nullptr;
1770 Value *NonSimplifiedInVal = nullptr;
1771 for (unsigned i = 0; i != NumPHIValues; ++i) {
1772 Value *InVal = PN->getIncomingValue(i);
1773 BasicBlock *InBB = PN->getIncomingBlock(i);
1774
1775 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1776 NewPhiValues.push_back(NewVal);
1777 continue;
1778 }
1779
1780 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1781
1782 NonSimplifiedBB = InBB;
1783 NonSimplifiedInVal = InVal;
1784 NewPhiValues.push_back(nullptr);
1785
1786 // If the InVal is an invoke at the end of the pred block, then we can't
1787 // insert a computation after it without breaking the edge.
1788 if (isa<InvokeInst>(InVal))
1789 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1790 return nullptr;
1791
1792 // If the incoming non-constant value is reachable from the phis block,
1793 // we'll push the operation across a loop backedge. This could result in
1794 // an infinite combine loop, and is generally non-profitable (especially
1795 // if the operation was originally outside the loop).
1796 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1797 LI))
1798 return nullptr;
1799 }
1800
1801 // If there is exactly one non-simplified value, we can insert a copy of the
1802 // operation in that block. However, if this is a critical edge, we would be
1803 // inserting the computation on some other paths (e.g. inside a loop). Only
1804 // do this if the pred block is unconditionally branching into the phi block.
1805 // Also, make sure that the pred block is not dead code.
1806 if (NonSimplifiedBB != nullptr) {
1807 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1808 if (!BI || !BI->isUnconditional() ||
1809 !DT.isReachableFromEntry(NonSimplifiedBB))
1810 return nullptr;
1811 }
1812
1813 // Okay, we can do the transformation: create the new PHI node.
1814 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1815 InsertNewInstBefore(NewPN, PN->getIterator());
1816 NewPN->takeName(PN);
1817 NewPN->setDebugLoc(PN->getDebugLoc());
1818
1819 // If we are going to have to insert a new computation, do so right before the
1820 // predecessor's terminator.
1821 Instruction *Clone = nullptr;
1822 if (NonSimplifiedBB) {
1823 Clone = I.clone();
1824 for (Use &U : Clone->operands()) {
1825 if (U == PN)
1826 U = NonSimplifiedInVal;
1827 else
1828 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1829 }
1830 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1831 }
1832
1833 for (unsigned i = 0; i != NumPHIValues; ++i) {
1834 if (NewPhiValues[i])
1835 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1836 else
1837 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1838 }
1839
1840 for (User *U : make_early_inc_range(PN->users())) {
1841 Instruction *User = cast<Instruction>(U);
1842 if (User == &I) continue;
1843 replaceInstUsesWith(*User, NewPN);
1845 }
1846
1847 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1848 const_cast<PHINode &>(*NewPN),
1849 const_cast<PHINode &>(*PN), DT);
1850 return replaceInstUsesWith(I, NewPN);
1851}
1852
1854 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1855 // we are guarding against replicating the binop in >1 predecessor.
1856 // This could miss matching a phi with 2 constant incoming values.
1857 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1858 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1859 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1860 Phi0->getNumOperands() != Phi1->getNumOperands())
1861 return nullptr;
1862
1863 // TODO: Remove the restriction for binop being in the same block as the phis.
1864 if (BO.getParent() != Phi0->getParent() ||
1865 BO.getParent() != Phi1->getParent())
1866 return nullptr;
1867
1868 // Fold if there is at least one specific constant value in phi0 or phi1's
1869 // incoming values that comes from the same block and this specific constant
1870 // value can be used to do optimization for specific binary operator.
1871 // For example:
1872 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1873 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1874 // %add = add i32 %phi0, %phi1
1875 // ==>
1876 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1878 /*AllowRHSConstant*/ false);
1879 if (C) {
1880 SmallVector<Value *, 4> NewIncomingValues;
1881 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1882 auto &Phi0Use = std::get<0>(T);
1883 auto &Phi1Use = std::get<1>(T);
1884 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1885 return false;
1886 Value *Phi0UseV = Phi0Use.get();
1887 Value *Phi1UseV = Phi1Use.get();
1888 if (Phi0UseV == C)
1889 NewIncomingValues.push_back(Phi1UseV);
1890 else if (Phi1UseV == C)
1891 NewIncomingValues.push_back(Phi0UseV);
1892 else
1893 return false;
1894 return true;
1895 };
1896
1897 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1898 CanFoldIncomingValuePair)) {
1899 PHINode *NewPhi =
1900 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1901 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1902 "The number of collected incoming values should equal the number "
1903 "of the original PHINode operands!");
1904 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1905 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1906 return NewPhi;
1907 }
1908 }
1909
1910 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1911 return nullptr;
1912
1913 // Match a pair of incoming constants for one of the predecessor blocks.
1914 BasicBlock *ConstBB, *OtherBB;
1915 Constant *C0, *C1;
1916 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1917 ConstBB = Phi0->getIncomingBlock(0);
1918 OtherBB = Phi0->getIncomingBlock(1);
1919 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1920 ConstBB = Phi0->getIncomingBlock(1);
1921 OtherBB = Phi0->getIncomingBlock(0);
1922 } else {
1923 return nullptr;
1924 }
1925 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1926 return nullptr;
1927
1928 // The block that we are hoisting to must reach here unconditionally.
1929 // Otherwise, we could be speculatively executing an expensive or
1930 // non-speculative op.
1931 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1932 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1933 !DT.isReachableFromEntry(OtherBB))
1934 return nullptr;
1935
1936 // TODO: This check could be tightened to only apply to binops (div/rem) that
1937 // are not safe to speculatively execute. But that could allow hoisting
1938 // potentially expensive instructions (fdiv for example).
1939 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1941 return nullptr;
1942
1943 // Fold constants for the predecessor block with constant incoming values.
1944 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1945 if (!NewC)
1946 return nullptr;
1947
1948 // Make a new binop in the predecessor block with the non-constant incoming
1949 // values.
1950 Builder.SetInsertPoint(PredBlockBranch);
1951 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1952 Phi0->getIncomingValueForBlock(OtherBB),
1953 Phi1->getIncomingValueForBlock(OtherBB));
1954 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1955 NotFoldedNewBO->copyIRFlags(&BO);
1956
1957 // Replace the binop with a phi of the new values. The old phis are dead.
1958 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1959 NewPhi->addIncoming(NewBO, OtherBB);
1960 NewPhi->addIncoming(NewC, ConstBB);
1961 return NewPhi;
1962}
1963
1965 if (!isa<Constant>(I.getOperand(1)))
1966 return nullptr;
1967
1968 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1969 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1970 return NewSel;
1971 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1972 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1973 return NewPhi;
1974 }
1975 return nullptr;
1976}
1977
1979 // If this GEP has only 0 indices, it is the same pointer as
1980 // Src. If Src is not a trivial GEP too, don't combine
1981 // the indices.
1982 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1983 !Src.hasOneUse())
1984 return false;
1985 return true;
1986}
1987
1989 if (!isa<VectorType>(Inst.getType()))
1990 return nullptr;
1991
1992 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1993 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1994 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1995 cast<VectorType>(Inst.getType())->getElementCount());
1996 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1997 cast<VectorType>(Inst.getType())->getElementCount());
1998
1999 // If both operands of the binop are vector concatenations, then perform the
2000 // narrow binop on each pair of the source operands followed by concatenation
2001 // of the results.
2002 Value *L0, *L1, *R0, *R1;
2003 ArrayRef<int> Mask;
2004 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2005 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2006 LHS->hasOneUse() && RHS->hasOneUse() &&
2007 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2008 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2009 // This transform does not have the speculative execution constraint as
2010 // below because the shuffle is a concatenation. The new binops are
2011 // operating on exactly the same elements as the existing binop.
2012 // TODO: We could ease the mask requirement to allow different undef lanes,
2013 // but that requires an analysis of the binop-with-undef output value.
2014 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2015 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2016 BO->copyIRFlags(&Inst);
2017 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2018 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2019 BO->copyIRFlags(&Inst);
2020 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2021 }
2022
2023 auto createBinOpReverse = [&](Value *X, Value *Y) {
2024 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2025 if (auto *BO = dyn_cast<BinaryOperator>(V))
2026 BO->copyIRFlags(&Inst);
2027 Module *M = Inst.getModule();
2029 M, Intrinsic::experimental_vector_reverse, V->getType());
2030 return CallInst::Create(F, V);
2031 };
2032
2033 // NOTE: Reverse shuffles don't require the speculative execution protection
2034 // below because they don't affect which lanes take part in the computation.
2035
2036 Value *V1, *V2;
2037 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2038 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2039 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2040 (LHS->hasOneUse() || RHS->hasOneUse() ||
2041 (LHS == RHS && LHS->hasNUses(2))))
2042 return createBinOpReverse(V1, V2);
2043
2044 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2045 if (LHS->hasOneUse() && isSplatValue(RHS))
2046 return createBinOpReverse(V1, RHS);
2047 }
2048 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2049 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2050 return createBinOpReverse(LHS, V2);
2051
2052 // It may not be safe to reorder shuffles and things like div, urem, etc.
2053 // because we may trap when executing those ops on unknown vector elements.
2054 // See PR20059.
2055 if (!isSafeToSpeculativelyExecute(&Inst))
2056 return nullptr;
2057
2058 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2059 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2060 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2061 BO->copyIRFlags(&Inst);
2062 return new ShuffleVectorInst(XY, M);
2063 };
2064
2065 // If both arguments of the binary operation are shuffles that use the same
2066 // mask and shuffle within a single vector, move the shuffle after the binop.
2067 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2068 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2069 V1->getType() == V2->getType() &&
2070 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2071 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2072 return createBinOpShuffle(V1, V2, Mask);
2073 }
2074
2075 // If both arguments of a commutative binop are select-shuffles that use the
2076 // same mask with commuted operands, the shuffles are unnecessary.
2077 if (Inst.isCommutative() &&
2078 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2079 match(RHS,
2080 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2081 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2082 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2083 // TODO: Allow shuffles that contain undefs in the mask?
2084 // That is legal, but it reduces undef knowledge.
2085 // TODO: Allow arbitrary shuffles by shuffling after binop?
2086 // That might be legal, but we have to deal with poison.
2087 if (LShuf->isSelect() &&
2088 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2089 RShuf->isSelect() &&
2090 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2091 // Example:
2092 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2093 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2094 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2095 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2096 NewBO->copyIRFlags(&Inst);
2097 return NewBO;
2098 }
2099 }
2100
2101 // If one argument is a shuffle within one vector and the other is a constant,
2102 // try moving the shuffle after the binary operation. This canonicalization
2103 // intends to move shuffles closer to other shuffles and binops closer to
2104 // other binops, so they can be folded. It may also enable demanded elements
2105 // transforms.
2106 Constant *C;
2107 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
2108 if (InstVTy &&
2110 m_Mask(Mask))),
2111 m_ImmConstant(C))) &&
2112 cast<FixedVectorType>(V1->getType())->getNumElements() <=
2113 InstVTy->getNumElements()) {
2114 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2115 "Shuffle should not change scalar type");
2116
2117 // Find constant NewC that has property:
2118 // shuffle(NewC, ShMask) = C
2119 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2120 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2121 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2122 bool ConstOp1 = isa<Constant>(RHS);
2123 ArrayRef<int> ShMask = Mask;
2124 unsigned SrcVecNumElts =
2125 cast<FixedVectorType>(V1->getType())->getNumElements();
2126 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2127 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2128 bool MayChange = true;
2129 unsigned NumElts = InstVTy->getNumElements();
2130 for (unsigned I = 0; I < NumElts; ++I) {
2131 Constant *CElt = C->getAggregateElement(I);
2132 if (ShMask[I] >= 0) {
2133 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2134 Constant *NewCElt = NewVecC[ShMask[I]];
2135 // Bail out if:
2136 // 1. The constant vector contains a constant expression.
2137 // 2. The shuffle needs an element of the constant vector that can't
2138 // be mapped to a new constant vector.
2139 // 3. This is a widening shuffle that copies elements of V1 into the
2140 // extended elements (extending with poison is allowed).
2141 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2142 I >= SrcVecNumElts) {
2143 MayChange = false;
2144 break;
2145 }
2146 NewVecC[ShMask[I]] = CElt;
2147 }
2148 // If this is a widening shuffle, we must be able to extend with poison
2149 // elements. If the original binop does not produce a poison in the high
2150 // lanes, then this transform is not safe.
2151 // Similarly for poison lanes due to the shuffle mask, we can only
2152 // transform binops that preserve poison.
2153 // TODO: We could shuffle those non-poison constant values into the
2154 // result by using a constant vector (rather than an poison vector)
2155 // as operand 1 of the new binop, but that might be too aggressive
2156 // for target-independent shuffle creation.
2157 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2158 Constant *MaybePoison =
2159 ConstOp1
2160 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
2161 : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
2162 if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
2163 MayChange = false;
2164 break;
2165 }
2166 }
2167 }
2168 if (MayChange) {
2169 Constant *NewC = ConstantVector::get(NewVecC);
2170 // It may not be safe to execute a binop on a vector with poison elements
2171 // because the entire instruction can be folded to undef or create poison
2172 // that did not exist in the original code.
2173 // TODO: The shift case should not be necessary.
2174 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2175 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2176
2177 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2178 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2179 Value *NewLHS = ConstOp1 ? V1 : NewC;
2180 Value *NewRHS = ConstOp1 ? NewC : V1;
2181 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2182 }
2183 }
2184
2185 // Try to reassociate to sink a splat shuffle after a binary operation.
2186 if (Inst.isAssociative() && Inst.isCommutative()) {
2187 // Canonicalize shuffle operand as LHS.
2188 if (isa<ShuffleVectorInst>(RHS))
2189 std::swap(LHS, RHS);
2190
2191 Value *X;
2192 ArrayRef<int> MaskC;
2193 int SplatIndex;
2194 Value *Y, *OtherOp;
2195 if (!match(LHS,
2196 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2197 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2198 X->getType() != Inst.getType() ||
2199 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2200 return nullptr;
2201
2202 // FIXME: This may not be safe if the analysis allows undef elements. By
2203 // moving 'Y' before the splat shuffle, we are implicitly assuming
2204 // that it is not undef/poison at the splat index.
2205 if (isSplatValue(OtherOp, SplatIndex)) {
2206 std::swap(Y, OtherOp);
2207 } else if (!isSplatValue(Y, SplatIndex)) {
2208 return nullptr;
2209 }
2210
2211 // X and Y are splatted values, so perform the binary operation on those
2212 // values followed by a splat followed by the 2nd binary operation:
2213 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2214 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2215 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2216 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2217 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2218
2219 // Intersect FMF on both new binops. Other (poison-generating) flags are
2220 // dropped to be safe.
2221 if (isa<FPMathOperator>(R)) {
2222 R->copyFastMathFlags(&Inst);
2223 R->andIRFlags(RHS);
2224 }
2225 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2226 NewInstBO->copyIRFlags(R);
2227 return R;
2228 }
2229
2230 return nullptr;
2231}
2232
2233/// Try to narrow the width of a binop if at least 1 operand is an extend of
2234/// of a value. This requires a potentially expensive known bits check to make
2235/// sure the narrow op does not overflow.
2236Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2237 // We need at least one extended operand.
2238 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2239
2240 // If this is a sub, we swap the operands since we always want an extension
2241 // on the RHS. The LHS can be an extension or a constant.
2242 if (BO.getOpcode() == Instruction::Sub)
2243 std::swap(Op0, Op1);
2244
2245 Value *X;
2246 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2247 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2248 return nullptr;
2249
2250 // If both operands are the same extension from the same source type and we
2251 // can eliminate at least one (hasOneUse), this might work.
2252 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2253 Value *Y;
2254 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2255 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2256 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2257 // If that did not match, see if we have a suitable constant operand.
2258 // Truncating and extending must produce the same constant.
2259 Constant *WideC;
2260 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2261 return nullptr;
2262 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
2263 if (!NarrowC)
2264 return nullptr;
2265 Y = NarrowC;
2266 }
2267
2268 // Swap back now that we found our operands.
2269 if (BO.getOpcode() == Instruction::Sub)
2270 std::swap(X, Y);
2271
2272 // Both operands have narrow versions. Last step: the math must not overflow
2273 // in the narrow width.
2274 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2275 return nullptr;
2276
2277 // bo (ext X), (ext Y) --> ext (bo X, Y)
2278 // bo (ext X), C --> ext (bo X, C')
2279 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2280 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2281 if (IsSext)
2282 NewBinOp->setHasNoSignedWrap();
2283 else
2284 NewBinOp->setHasNoUnsignedWrap();
2285 }
2286 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2287}
2288
2290 // At least one GEP must be inbounds.
2291 if (!GEP1.isInBounds() && !GEP2.isInBounds())
2292 return false;
2293
2294 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
2295 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
2296}
2297
2298/// Thread a GEP operation with constant indices through the constant true/false
2299/// arms of a select.
2301 InstCombiner::BuilderTy &Builder) {
2302 if (!GEP.hasAllConstantIndices())
2303 return nullptr;
2304
2305 Instruction *Sel;
2306 Value *Cond;
2307 Constant *TrueC, *FalseC;
2308 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2309 !match(Sel,
2310 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2311 return nullptr;
2312
2313 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2314 // Propagate 'inbounds' and metadata from existing instructions.
2315 // Note: using IRBuilder to create the constants for efficiency.
2316 SmallVector<Value *, 4> IndexC(GEP.indices());
2317 bool IsInBounds = GEP.isInBounds();
2318 Type *Ty = GEP.getSourceElementType();
2319 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
2320 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
2321 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2322}
2323
2325 GEPOperator *Src) {
2326 // Combine Indices - If the source pointer to this getelementptr instruction
2327 // is a getelementptr instruction with matching element type, combine the
2328 // indices of the two getelementptr instructions into a single instruction.
2329 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2330 return nullptr;
2331
2332 // For constant GEPs, use a more general offset-based folding approach.
2333 Type *PtrTy = Src->getType()->getScalarType();
2334 if (GEP.hasAllConstantIndices() &&
2335 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2336 // Split Src into a variable part and a constant suffix.
2338 Type *BaseType = GTI.getIndexedType();
2339 bool IsFirstType = true;
2340 unsigned NumVarIndices = 0;
2341 for (auto Pair : enumerate(Src->indices())) {
2342 if (!isa<ConstantInt>(Pair.value())) {
2343 BaseType = GTI.getIndexedType();
2344 IsFirstType = false;
2345 NumVarIndices = Pair.index() + 1;
2346 }
2347 ++GTI;
2348 }
2349
2350 // Determine the offset for the constant suffix of Src.
2352 if (NumVarIndices != Src->getNumIndices()) {
2353 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2354 if (BaseType->isScalableTy())
2355 return nullptr;
2356
2357 SmallVector<Value *> ConstantIndices;
2358 if (!IsFirstType)
2359 ConstantIndices.push_back(
2361 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2362 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2363 }
2364
2365 // Add the offset for GEP (which is fully constant).
2366 if (!GEP.accumulateConstantOffset(DL, Offset))
2367 return nullptr;
2368
2369 APInt OffsetOld = Offset;
2370 // Convert the total offset back into indices.
2371 SmallVector<APInt> ConstIndices =
2373 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2374 // If both GEP are constant-indexed, and cannot be merged in either way,
2375 // convert them to a GEP of i8.
2376 if (Src->hasAllConstantIndices())
2377 return replaceInstUsesWith(
2379 Builder.getInt8Ty(), Src->getOperand(0),
2380 Builder.getInt(OffsetOld), "",
2381 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2382 return nullptr;
2383 }
2384
2385 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2386 SmallVector<Value *> Indices;
2387 append_range(Indices, drop_end(Src->indices(),
2388 Src->getNumIndices() - NumVarIndices));
2389 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2390 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2391 // Even if the total offset is inbounds, we may end up representing it
2392 // by first performing a larger negative offset, and then a smaller
2393 // positive one. The large negative offset might go out of bounds. Only
2394 // preserve inbounds if all signs are the same.
2395 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2396 }
2397
2398 return replaceInstUsesWith(
2399 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2400 Indices, "", IsInBounds));
2401 }
2402
2403 if (Src->getResultElementType() != GEP.getSourceElementType())
2404 return nullptr;
2405
2406 SmallVector<Value*, 8> Indices;
2407
2408 // Find out whether the last index in the source GEP is a sequential idx.
2409 bool EndsWithSequential = false;
2410 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2411 I != E; ++I)
2412 EndsWithSequential = I.isSequential();
2413
2414 // Can we combine the two pointer arithmetics offsets?
2415 if (EndsWithSequential) {
2416 // Replace: gep (gep %P, long B), long A, ...
2417 // With: T = long A+B; gep %P, T, ...
2418 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2419 Value *GO1 = GEP.getOperand(1);
2420
2421 // If they aren't the same type, then the input hasn't been processed
2422 // by the loop above yet (which canonicalizes sequential index types to
2423 // intptr_t). Just avoid transforming this until the input has been
2424 // normalized.
2425 if (SO1->getType() != GO1->getType())
2426 return nullptr;
2427
2428 Value *Sum =
2429 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2430 // Only do the combine when we are sure the cost after the
2431 // merge is never more than that before the merge.
2432 if (Sum == nullptr)
2433 return nullptr;
2434
2435 // Update the GEP in place if possible.
2436 if (Src->getNumOperands() == 2) {
2437 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2438 replaceOperand(GEP, 0, Src->getOperand(0));
2439 replaceOperand(GEP, 1, Sum);
2440 return &GEP;
2441 }
2442 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2443 Indices.push_back(Sum);
2444 Indices.append(GEP.op_begin()+2, GEP.op_end());
2445 } else if (isa<Constant>(*GEP.idx_begin()) &&
2446 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2447 Src->getNumOperands() != 1) {
2448 // Otherwise we can do the fold if the first index of the GEP is a zero
2449 Indices.append(Src->op_begin()+1, Src->op_end());
2450 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2451 }
2452
2453 if (!Indices.empty())
2454 return replaceInstUsesWith(
2456 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2457 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2458
2459 return nullptr;
2460}
2461
2463 BuilderTy *Builder,
2464 bool &DoesConsume, unsigned Depth) {
2465 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2466 // ~(~(X)) -> X.
2467 Value *A, *B;
2468 if (match(V, m_Not(m_Value(A)))) {
2469 DoesConsume = true;
2470 return A;
2471 }
2472
2473 Constant *C;
2474 // Constants can be considered to be not'ed values.
2475 if (match(V, m_ImmConstant(C)))
2476 return ConstantExpr::getNot(C);
2477
2479 return nullptr;
2480
2481 // The rest of the cases require that we invert all uses so don't bother
2482 // doing the analysis if we know we can't use the result.
2483 if (!WillInvertAllUses)
2484 return nullptr;
2485
2486 // Compares can be inverted if all of their uses are being modified to use
2487 // the ~V.
2488 if (auto *I = dyn_cast<CmpInst>(V)) {
2489 if (Builder != nullptr)
2490 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2491 I->getOperand(1));
2492 return NonNull;
2493 }
2494
2495 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2496 // `(-1 - B) - A` if we are willing to invert all of the uses.
2497 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2498 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2499 DoesConsume, Depth))
2500 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2501 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2502 DoesConsume, Depth))
2503 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2504 return nullptr;
2505 }
2506
2507 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2508 // into `A ^ B` if we are willing to invert all of the uses.
2509 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2510 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2511 DoesConsume, Depth))
2512 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2513 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2514 DoesConsume, Depth))
2515 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2516 return nullptr;
2517 }
2518
2519 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2520 // `A + (-1 - B)` if we are willing to invert all of the uses.
2521 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2522 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2523 DoesConsume, Depth))
2524 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2525 return nullptr;
2526 }
2527
2528 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2529 // into `A s>> B` if we are willing to invert all of the uses.
2530 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2531 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2532 DoesConsume, Depth))
2533 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2534 return nullptr;
2535 }
2536
2537 Value *Cond;
2538 // LogicOps are special in that we canonicalize them at the cost of an
2539 // instruction.
2540 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2541 !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
2542 // Selects/min/max with invertible operands are freely invertible
2543 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2544 bool LocalDoesConsume = DoesConsume;
2545 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2546 LocalDoesConsume, Depth))
2547 return nullptr;
2548 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2549 LocalDoesConsume, Depth)) {
2550 DoesConsume = LocalDoesConsume;
2551 if (Builder != nullptr) {
2552 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2553 DoesConsume, Depth);
2554 assert(NotB != nullptr &&
2555 "Unable to build inverted value for known freely invertable op");
2556 if (auto *II = dyn_cast<IntrinsicInst>(V))
2558 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2559 return Builder->CreateSelect(Cond, NotA, NotB);
2560 }
2561 return NonNull;
2562 }
2563 }
2564
2565 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2566 bool LocalDoesConsume = DoesConsume;
2568 for (Use &U : PN->operands()) {
2569 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2570 Value *NewIncomingVal = getFreelyInvertedImpl(
2571 U.get(), /*WillInvertAllUses=*/false,
2572 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2573 if (NewIncomingVal == nullptr)
2574 return nullptr;
2575 // Make sure that we can safely erase the original PHI node.
2576 if (NewIncomingVal == V)
2577 return nullptr;
2578 if (Builder != nullptr)
2579 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2580 }
2581
2582 DoesConsume = LocalDoesConsume;
2583 if (Builder != nullptr) {
2586 PHINode *NewPN =
2587 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2588 for (auto [Val, Pred] : IncomingValues)
2589 NewPN->addIncoming(Val, Pred);
2590 return NewPN;
2591 }
2592 return NonNull;
2593 }
2594
2595 if (match(V, m_SExtLike(m_Value(A)))) {
2596 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2597 DoesConsume, Depth))
2598 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2599 return nullptr;
2600 }
2601
2602 if (match(V, m_Trunc(m_Value(A)))) {
2603 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2604 DoesConsume, Depth))
2605 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2606 return nullptr;
2607 }
2608
2609 // De Morgan's Laws:
2610 // (~(A | B)) -> (~A & ~B)
2611 // (~(A & B)) -> (~A | ~B)
2612 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2613 bool IsLogical, Value *A,
2614 Value *B) -> Value * {
2615 bool LocalDoesConsume = DoesConsume;
2616 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2617 LocalDoesConsume, Depth))
2618 return nullptr;
2619 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2620 LocalDoesConsume, Depth)) {
2621 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2622 LocalDoesConsume, Depth);
2623 DoesConsume = LocalDoesConsume;
2624 if (IsLogical)
2625 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2626 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2627 }
2628
2629 return nullptr;
2630 };
2631
2632 if (match(V, m_Or(m_Value(A), m_Value(B))))
2633 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2634 B);
2635
2636 if (match(V, m_And(m_Value(A), m_Value(B))))
2637 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2638 B);
2639
2640 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2641 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2642 B);
2643
2644 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2645 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2646 B);
2647
2648 return nullptr;
2649}
2650
2652 Value *PtrOp = GEP.getOperand(0);
2653 SmallVector<Value *, 8> Indices(GEP.indices());
2654 Type *GEPType = GEP.getType();
2655 Type *GEPEltType = GEP.getSourceElementType();
2656 bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2657 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2659 return replaceInstUsesWith(GEP, V);
2660
2661 // For vector geps, use the generic demanded vector support.
2662 // Skip if GEP return type is scalable. The number of elements is unknown at
2663 // compile-time.
2664 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2665 auto VWidth = GEPFVTy->getNumElements();
2666 APInt PoisonElts(VWidth, 0);
2667 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2668 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2669 PoisonElts)) {
2670 if (V != &GEP)
2671 return replaceInstUsesWith(GEP, V);
2672 return &GEP;
2673 }
2674
2675 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2676 // possible (decide on canonical form for pointer broadcast), 3) exploit
2677 // undef elements to decrease demanded bits
2678 }
2679
2680 // Eliminate unneeded casts for indices, and replace indices which displace
2681 // by multiples of a zero size type with zero.
2682 bool MadeChange = false;
2683
2684 // Index width may not be the same width as pointer width.
2685 // Data layout chooses the right type based on supported integer types.
2686 Type *NewScalarIndexTy =
2687 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2688
2690 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2691 ++I, ++GTI) {
2692 // Skip indices into struct types.
2693 if (GTI.isStruct())
2694 continue;
2695
2696 Type *IndexTy = (*I)->getType();
2697 Type *NewIndexType =
2698 IndexTy->isVectorTy()
2699 ? VectorType::get(NewScalarIndexTy,
2700 cast<VectorType>(IndexTy)->getElementCount())
2701 : NewScalarIndexTy;
2702
2703 // If the element type has zero size then any index over it is equivalent
2704 // to an index of zero, so replace it with zero if it is not zero already.
2705 Type *EltTy = GTI.getIndexedType();
2706 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2707 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2708 *I = Constant::getNullValue(NewIndexType);
2709 MadeChange = true;
2710 }
2711
2712 if (IndexTy != NewIndexType) {
2713 // If we are using a wider index than needed for this platform, shrink
2714 // it to what we need. If narrower, sign-extend it to what we need.
2715 // This explicit cast can make subsequent optimizations more obvious.
2716 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2717 MadeChange = true;
2718 }
2719 }
2720 if (MadeChange)
2721 return &GEP;
2722
2723 // Canonicalize constant GEPs to i8 type.
2724 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2726 if (GEP.accumulateConstantOffset(DL, Offset))
2727 return replaceInstUsesWith(
2729 GEP.isInBounds()));
2730 }
2731
2732 // Check to see if the inputs to the PHI node are getelementptr instructions.
2733 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2734 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2735 if (!Op1)
2736 return nullptr;
2737
2738 // Don't fold a GEP into itself through a PHI node. This can only happen
2739 // through the back-edge of a loop. Folding a GEP into itself means that
2740 // the value of the previous iteration needs to be stored in the meantime,
2741 // thus requiring an additional register variable to be live, but not
2742 // actually achieving anything (the GEP still needs to be executed once per
2743 // loop iteration).
2744 if (Op1 == &GEP)
2745 return nullptr;
2746
2747 int DI = -1;
2748
2749 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2750 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2751 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2752 Op1->getSourceElementType() != Op2->getSourceElementType())
2753 return nullptr;
2754
2755 // As for Op1 above, don't try to fold a GEP into itself.
2756 if (Op2 == &GEP)
2757 return nullptr;
2758
2759 // Keep track of the type as we walk the GEP.
2760 Type *CurTy = nullptr;
2761
2762 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2763 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2764 return nullptr;
2765
2766 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2767 if (DI == -1) {
2768 // We have not seen any differences yet in the GEPs feeding the
2769 // PHI yet, so we record this one if it is allowed to be a
2770 // variable.
2771
2772 // The first two arguments can vary for any GEP, the rest have to be
2773 // static for struct slots
2774 if (J > 1) {
2775 assert(CurTy && "No current type?");
2776 if (CurTy->isStructTy())
2777 return nullptr;
2778 }
2779
2780 DI = J;
2781 } else {
2782 // The GEP is different by more than one input. While this could be
2783 // extended to support GEPs that vary by more than one variable it
2784 // doesn't make sense since it greatly increases the complexity and
2785 // would result in an R+R+R addressing mode which no backend
2786 // directly supports and would need to be broken into several
2787 // simpler instructions anyway.
2788 return nullptr;
2789 }
2790 }
2791
2792 // Sink down a layer of the type for the next iteration.
2793 if (J > 0) {
2794 if (J == 1) {
2795 CurTy = Op1->getSourceElementType();
2796 } else {
2797 CurTy =
2798 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2799 }
2800 }
2801 }
2802 }
2803
2804 // If not all GEPs are identical we'll have to create a new PHI node.
2805 // Check that the old PHI node has only one use so that it will get
2806 // removed.
2807 if (DI != -1 && !PN->hasOneUse())
2808 return nullptr;
2809
2810 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2811 if (DI == -1) {
2812 // All the GEPs feeding the PHI are identical. Clone one down into our
2813 // BB so that it can be merged with the current GEP.
2814 } else {
2815 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2816 // into the current block so it can be merged, and create a new PHI to
2817 // set that index.
2818 PHINode *NewPN;
2819 {
2822 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2823 PN->getNumOperands());
2824 }
2825
2826 for (auto &I : PN->operands())
2827 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2828 PN->getIncomingBlock(I));
2829
2830 NewGEP->setOperand(DI, NewPN);
2831 }
2832
2833 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2834 return replaceOperand(GEP, 0, NewGEP);
2835 }
2836
2837 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2838 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2839 return I;
2840
2841 // Skip if GEP source element type is scalable. The type alloc size is unknown
2842 // at compile-time.
2843 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2844 unsigned AS = GEP.getPointerAddressSpace();
2845 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2846 DL.getIndexSizeInBits(AS)) {
2847 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2848
2849 if (TyAllocSize == 1) {
2850 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2851 // but only if the result pointer is only used as if it were an integer,
2852 // or both point to the same underlying object (otherwise provenance is
2853 // not necessarily retained).
2854 Value *X = GEP.getPointerOperand();
2855 Value *Y;
2856 if (match(GEP.getOperand(1),
2858 GEPType == Y->getType()) {
2859 bool HasSameUnderlyingObject =
2861 bool Changed = false;
2862 GEP.replaceUsesWithIf(Y, [&](Use &U) {
2863 bool ShouldReplace = HasSameUnderlyingObject ||
2864 isa<ICmpInst>(U.getUser()) ||
2865 isa<PtrToIntInst>(U.getUser());
2866 Changed |= ShouldReplace;
2867 return ShouldReplace;
2868 });
2869 return Changed ? &GEP : nullptr;
2870 }
2871 } else {
2872 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2873 Value *V;
2874 if ((has_single_bit(TyAllocSize) &&
2875 match(GEP.getOperand(1),
2877 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
2878 match(GEP.getOperand(1),
2879 m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
2881 Builder.getInt8Ty(), GEP.getPointerOperand(), V);
2882 NewGEP->setIsInBounds(GEP.isInBounds());
2883 return NewGEP;
2884 }
2885 }
2886 }
2887 }
2888 // We do not handle pointer-vector geps here.
2889 if (GEPType->isVectorTy())
2890 return nullptr;
2891
2892 if (GEP.getNumIndices() == 1) {
2893 // Try to replace ADD + GEP with GEP + GEP.
2894 Value *Idx1, *Idx2;
2895 if (match(GEP.getOperand(1),
2896 m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
2897 // %idx = add i64 %idx1, %idx2
2898 // %gep = getelementptr i32, ptr %ptr, i64 %idx
2899 // as:
2900 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2901 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2902 auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
2903 GEP.getPointerOperand(), Idx1);
2904 return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
2905 Idx2);
2906 }
2907 ConstantInt *C;
2908 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
2909 m_Value(Idx1), m_ConstantInt(C))))))) {
2910 // %add = add nsw i32 %idx1, idx2
2911 // %sidx = sext i32 %add to i64
2912 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
2913 // as:
2914 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2915 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
2916 auto *NewPtr = Builder.CreateGEP(
2917 GEP.getResultElementType(), GEP.getPointerOperand(),
2918 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()));
2920 GEP.getResultElementType(), NewPtr,
2921 Builder.CreateSExt(C, GEP.getOperand(1)->getType()));
2922 }
2923 }
2924
2925 if (!GEP.isInBounds()) {
2926 unsigned IdxWidth =
2928 APInt BasePtrOffset(IdxWidth, 0);
2929 Value *UnderlyingPtrOp =
2931 BasePtrOffset);
2932 bool CanBeNull, CanBeFreed;
2933 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
2934 DL, CanBeNull, CanBeFreed);
2935 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
2936 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2937 BasePtrOffset.isNonNegative()) {
2938 APInt AllocSize(IdxWidth, DerefBytes);
2939 if (BasePtrOffset.ule(AllocSize)) {
2941 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2942 }
2943 }
2944 }
2945 }
2946
2948 return R;
2949
2950 return nullptr;
2951}
2952
2954 Instruction *AI) {
2955 if (isa<ConstantPointerNull>(V))
2956 return true;
2957 if (auto *LI = dyn_cast<LoadInst>(V))
2958 return isa<GlobalVariable>(LI->getPointerOperand());
2959 // Two distinct allocations will never be equal.
2960 return isAllocLikeFn(V, &TLI) && V != AI;
2961}
2962
2963/// Given a call CB which uses an address UsedV, return true if we can prove the
2964/// call's only possible effect is storing to V.
2965static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2966 const TargetLibraryInfo &TLI) {
2967 if (!CB.use_empty())
2968 // TODO: add recursion if returned attribute is present
2969 return false;
2970
2971 if (CB.isTerminator())
2972 // TODO: remove implementation restriction
2973 return false;
2974
2975 if (!CB.willReturn() || !CB.doesNotThrow())
2976 return false;
2977
2978 // If the only possible side effect of the call is writing to the alloca,
2979 // and the result isn't used, we can safely remove any reads implied by the
2980 // call including those which might read the alloca itself.
2981 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2982 return Dest && Dest->Ptr == UsedV;
2983}
2984
2987 const TargetLibraryInfo &TLI) {
2989 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2990 Worklist.push_back(AI);
2991
2992 do {
2993 Instruction *PI = Worklist.pop_back_val();
2994 for (User *U : PI->users()) {
2995 Instruction *I = cast<Instruction>(U);
2996 switch (I->getOpcode()) {
2997 default:
2998 // Give up the moment we see something we can't handle.
2999 return false;
3000
3001 case Instruction::AddrSpaceCast:
3002 case Instruction::BitCast:
3003 case Instruction::GetElementPtr:
3004 Users.emplace_back(I);
3005 Worklist.push_back(I);
3006 continue;
3007
3008 case Instruction::ICmp: {
3009 ICmpInst *ICI = cast<ICmpInst>(I);
3010 // We can fold eq/ne comparisons with null to false/true, respectively.
3011 // We also fold comparisons in some conditions provided the alloc has
3012 // not escaped (see isNeverEqualToUnescapedAlloc).
3013 if (!ICI->isEquality())
3014 return false;
3015 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3016 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3017 return false;
3018
3019 // Do not fold compares to aligned_alloc calls, as they may have to
3020 // return null in case the required alignment cannot be satisfied,
3021 // unless we can prove that both alignment and size are valid.
3022 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3023 // Check if alignment and size of a call to aligned_alloc is valid,
3024 // that is alignment is a power-of-2 and the size is a multiple of the
3025 // alignment.
3026 const APInt *Alignment;
3027 const APInt *Size;
3028 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3029 match(CB->getArgOperand(1), m_APInt(Size)) &&
3030 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3031 };
3032 auto *CB = dyn_cast<CallBase>(AI);
3033 LibFunc TheLibFunc;
3034 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3035 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3036 !AlignmentAndSizeKnownValid(CB))
3037 return false;
3038 Users.emplace_back(I);
3039 continue;
3040 }
3041
3042 case Instruction::Call:
3043 // Ignore no-op and store intrinsics.
3044 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3045 switch (II->getIntrinsicID()) {
3046 default:
3047 return false;
3048
3049 case Intrinsic::memmove:
3050 case Intrinsic::memcpy:
3051 case Intrinsic::memset: {
3052 MemIntrinsic *MI = cast<MemIntrinsic>(II);
3053 if (MI->isVolatile() || MI->getRawDest() != PI)
3054 return false;
3055 [[fallthrough]];
3056 }
3057 case Intrinsic::assume:
3058 case Intrinsic::invariant_start:
3059 case Intrinsic::invariant_end:
3060 case Intrinsic::lifetime_start:
3061 case Intrinsic::lifetime_end:
3062 case Intrinsic::objectsize:
3063 Users.emplace_back(I);
3064 continue;
3065 case Intrinsic::launder_invariant_group:
3066 case Intrinsic::strip_invariant_group:
3067 Users.emplace_back(I);
3068 Worklist.push_back(I);
3069 continue;
3070 }
3071 }
3072
3073 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3074 Users.emplace_back(I);
3075 continue;
3076 }
3077
3078 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3079 getAllocationFamily(I, &TLI) == Family) {
3080 assert(Family);
3081 Users.emplace_back(I);
3082 continue;
3083 }
3084
3085 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
3086 getAllocationFamily(I, &TLI) == Family) {
3087 assert(Family);
3088 Users.emplace_back(I);
3089 Worklist.push_back(I);
3090 continue;
3091 }
3092
3093 return false;
3094
3095 case Instruction::Store: {
3096 StoreInst *SI = cast<StoreInst>(I);
3097 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3098 return false;
3099 Users.emplace_back(I);
3100 continue;
3101 }
3102 }
3103 llvm_unreachable("missing a return?");
3104 }
3105 } while (!Worklist.empty());
3106 return true;
3107}
3108
3110 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3111
3112 // If we have a malloc call which is only used in any amount of comparisons to
3113 // null and free calls, delete the calls and replace the comparisons with true
3114 // or false as appropriate.
3115
3116 // This is based on the principle that we can substitute our own allocation
3117 // function (which will never return null) rather than knowledge of the
3118 // specific function being called. In some sense this can change the permitted
3119 // outputs of a program (when we convert a malloc to an alloca, the fact that
3120 // the allocation is now on the stack is potentially visible, for example),
3121 // but we believe in a permissible manner.
3123
3124 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3125 // before each store.
3128 std::unique_ptr<DIBuilder> DIB;
3129 if (isa<AllocaInst>(MI)) {
3130 findDbgUsers(DVIs, &MI, &DVRs);
3131 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3132 }
3133
3134 if (isAllocSiteRemovable(&MI, Users, TLI)) {
3135 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3136 // Lowering all @llvm.objectsize calls first because they may
3137 // use a bitcast/GEP of the alloca we are removing.
3138 if (!Users[i])
3139 continue;
3140
3141 Instruction *I = cast<Instruction>(&*Users[i]);
3142
3143 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3144 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3145 SmallVector<Instruction *> InsertedInstructions;
3146 Value *Result = lowerObjectSizeCall(
3147 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3148 for (Instruction *Inserted : InsertedInstructions)
3149 Worklist.add(Inserted);
3150 replaceInstUsesWith(*I, Result);
3152 Users[i] = nullptr; // Skip examining in the next loop.
3153 }
3154 }
3155 }
3156 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3157 if (!Users[i])
3158 continue;
3159
3160 Instruction *I = cast<Instruction>(&*Users[i]);
3161
3162 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3164 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3165 C->isFalseWhenEqual()));
3166 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3167 for (auto *DVI : DVIs)
3168 if (DVI->isAddressOfVariable())
3169 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
3170 for (auto *DVR : DVRs)
3171 if (DVR->isAddressOfVariable())
3172 ConvertDebugDeclareToDebugValue(DVR, SI, *DIB);
3173 } else {
3174 // Casts, GEP, or anything else: we're about to delete this instruction,
3175 // so it can not have any valid uses.
3176 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
3177 }
3179 }
3180
3181 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
3182 // Replace invoke with a NOP intrinsic to maintain the original CFG
3183 Module *M = II->getModule();
3184 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
3185 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
3186 std::nullopt, "", II->getParent());
3187 }
3188
3189 // Remove debug intrinsics which describe the value contained within the
3190 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3191 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3192 //
3193 // ```
3194 // define void @foo(i32 %0) {
3195 // %a = alloca i32 ; Deleted.
3196 // store i32 %0, i32* %a
3197 // dbg.value(i32 %0, "arg0") ; Not deleted.
3198 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3199 // call void @trivially_inlinable_no_op(i32* %a)
3200 // ret void
3201 // }
3202 // ```
3203 //
3204 // This may not be required if we stop describing the contents of allocas
3205 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3206 // the LowerDbgDeclare utility.
3207 //
3208 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3209 // "arg0" dbg.value may be stale after the call. However, failing to remove
3210 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3211 //
3212 // FIXME: the Assignment Tracking project has now likely made this
3213 // redundant (and it's sometimes harmful).
3214 for (auto *DVI : DVIs)
3215 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3216 DVI->eraseFromParent();
3217 for (auto *DVR : DVRs)
3218 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3219 DVR->eraseFromParent();
3220
3221 return eraseInstFromFunction(MI);
3222 }
3223 return nullptr;
3224}
3225
3226/// Move the call to free before a NULL test.
3227///
3228/// Check if this free is accessed after its argument has been test
3229/// against NULL (property 0).
3230/// If yes, it is legal to move this call in its predecessor block.
3231///
3232/// The move is performed only if the block containing the call to free
3233/// will be removed, i.e.:
3234/// 1. it has only one predecessor P, and P has two successors
3235/// 2. it contains the call, noops, and an unconditional branch
3236/// 3. its successor is the same as its predecessor's successor
3237///
3238/// The profitability is out-of concern here and this function should
3239/// be called only if the caller knows this transformation would be
3240/// profitable (e.g., for code size).
3242 const DataLayout &DL) {
3243 Value *Op = FI.getArgOperand(0);
3244 BasicBlock *FreeInstrBB = FI.getParent();
3245 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3246
3247 // Validate part of constraint #1: Only one predecessor
3248 // FIXME: We can extend the number of predecessor, but in that case, we
3249 // would duplicate the call to free in each predecessor and it may
3250 // not be profitable even for code size.
3251 if (!PredBB)
3252 return nullptr;
3253
3254 // Validate constraint #2: Does this block contains only the call to
3255 // free, noops, and an unconditional branch?
3256 BasicBlock *SuccBB;
3257 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3258 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3259 return nullptr;
3260
3261 // If there are only 2 instructions in the block, at this point,
3262 // this is the call to free and unconditional.
3263 // If there are more than 2 instructions, check that they are noops
3264 // i.e., they won't hurt the performance of the generated code.
3265 if (FreeInstrBB->size() != 2) {
3266 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3267 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3268 continue;
3269 auto *Cast = dyn_cast<CastInst>(&Inst);
3270 if (!Cast || !Cast->isNoopCast(DL))
3271 return nullptr;
3272 }
3273 }
3274 // Validate the rest of constraint #1 by matching on the pred branch.
3275 Instruction *TI = PredBB->getTerminator();
3276 BasicBlock *TrueBB, *FalseBB;
3278 if (!match(TI, m_Br(m_ICmp(Pred,
3280 m_Specific(Op->stripPointerCasts())),
3281 m_Zero()),
3282 TrueBB, FalseBB)))
3283 return nullptr;
3284 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3285 return nullptr;
3286
3287 // Validate constraint #3: Ensure the null case just falls through.
3288 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3289 return nullptr;
3290 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3291 "Broken CFG: missing edge from predecessor to successor");
3292
3293 // At this point, we know that everything in FreeInstrBB can be moved
3294 // before TI.
3295 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3296 if (&Instr == FreeInstrBBTerminator)
3297 break;
3298 Instr.moveBeforePreserving(TI);
3299 }
3300 assert(FreeInstrBB->size() == 1 &&
3301 "Only the branch instruction should remain");
3302
3303 // Now that we've moved the call to free before the NULL check, we have to
3304 // remove any attributes on its parameter that imply it's non-null, because
3305 // those attributes might have only been valid because of the NULL check, and
3306 // we can get miscompiles if we keep them. This is conservative if non-null is
3307 // also implied by something other than the NULL check, but it's guaranteed to
3308 // be correct, and the conservativeness won't matter in practice, since the
3309 // attributes are irrelevant for the call to free itself and the pointer
3310 // shouldn't be used after the call.
3311 AttributeList Attrs = FI.getAttributes();
3312 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3313 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3314 if (Dereferenceable.isValid()) {
3315 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3316 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3317 Attribute::Dereferenceable);
3318 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3319 }
3320 FI.setAttributes(Attrs);
3321
3322 return &FI;
3323}
3324
3326 // free undef -> unreachable.
3327 if (isa<UndefValue>(Op)) {
3328 // Leave a marker since we can't modify the CFG here.
3330 return eraseInstFromFunction(FI);
3331 }
3332
3333 // If we have 'free null' delete the instruction. This can happen in stl code
3334 // when lots of inlining happens.
3335 if (isa<ConstantPointerNull>(Op))
3336 return eraseInstFromFunction(FI);
3337
3338 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3339 // realloc() entirely.
3340 CallInst *CI = dyn_cast<CallInst>(Op);
3341 if (CI && CI->hasOneUse())
3342 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3343 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3344
3345 // If we optimize for code size, try to move the call to free before the null
3346 // test so that simplify cfg can remove the empty block and dead code
3347 // elimination the branch. I.e., helps to turn something like:
3348 // if (foo) free(foo);
3349 // into
3350 // free(foo);
3351 //
3352 // Note that we can only do this for 'free' and not for any flavor of
3353 // 'operator delete'; there is no 'operator delete' symbol for which we are
3354 // permitted to invent a call, even if we're passing in a null pointer.
3355 if (MinimizeSize) {
3356 LibFunc Func;
3357 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3359 return I;
3360 }
3361
3362 return nullptr;
3363}
3364
3366 Value *RetVal = RI.getReturnValue();
3367 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
3368 return nullptr;
3369
3370 Function *F = RI.getFunction();
3371 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3372 if (ReturnClass == fcNone)
3373 return nullptr;
3374
3375 KnownFPClass KnownClass;
3376 Value *Simplified =
3377 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
3378 if (!Simplified)
3379 return nullptr;
3380
3381 return ReturnInst::Create(RI.getContext(), Simplified);
3382}
3383
3384// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3386 // Try to remove the previous instruction if it must lead to unreachable.
3387 // This includes instructions like stores and "llvm.assume" that may not get
3388 // removed by simple dead code elimination.
3389 bool Changed = false;
3390 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3391 // While we theoretically can erase EH, that would result in a block that
3392 // used to start with an EH no longer starting with EH, which is invalid.
3393 // To make it valid, we'd need to fixup predecessors to no longer refer to
3394 // this block, but that changes CFG, which is not allowed in InstCombine.
3395 if (Prev->isEHPad())
3396 break; // Can not drop any more instructions. We're done here.
3397
3399 break; // Can not drop any more instructions. We're done here.
3400 // Otherwise, this instruction can be freely erased,
3401 // even if it is not side-effect free.
3402
3403 // A value may still have uses before we process it here (for example, in
3404 // another unreachable block), so convert those to poison.
3405 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3406 eraseInstFromFunction(*Prev);
3407 Changed = true;
3408 }
3409 return Changed;
3410}
3411
3414 return nullptr;
3415}
3416
3418 assert(BI.isUnconditional() && "Only for unconditional branches.");
3419
3420 // If this store is the second-to-last instruction in the basic block
3421 // (excluding debug info and bitcasts of pointers) and if the block ends with
3422 // an unconditional branch, try to move the store to the successor block.
3423
3424 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3425 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3426 return BBI->isDebugOrPseudoInst() ||
3427 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3428 };
3429
3430 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3431 do {
3432 if (BBI != FirstInstr)
3433 --BBI;
3434 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3435
3436 return dyn_cast<StoreInst>(BBI);
3437 };
3438
3439 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3440 if (mergeStoreIntoSuccessor(*SI))
3441 return &BI;
3442
3443 return nullptr;
3444}
3445
3448 if (!DeadEdges.insert({From, To}).second)
3449 return;
3450
3451 // Replace phi node operands in successor with poison.
3452 for (PHINode &PN : To->phis())
3453 for (Use &U : PN.incoming_values())
3454 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
3455 replaceUse(U, PoisonValue::get(PN.getType()));
3456 addToWorklist(&PN);
3457 MadeIRChange = true;
3458 }
3459
3460 Worklist.push_back(To);
3461}
3462
3463// Under the assumption that I is unreachable, remove it and following
3464// instructions. Changes are reported directly to MadeIRChange.
3467 BasicBlock *BB = I->getParent();
3468 for (Instruction &Inst : make_early_inc_range(
3469 make_range(std::next(BB->getTerminator()->getReverseIterator()),
3470 std::next(I->getReverseIterator())))) {
3471 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3472 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
3473 MadeIRChange = true;
3474 }
3475 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3476 continue;
3477 // RemoveDIs: erase debug-info on this instruction manually.
3478 Inst.dropDbgRecords();
3480 MadeIRChange = true;
3481 }
3482
3483 SmallVector<Value *> Changed;
3484 if (handleUnreachableTerminator(BB->getTerminator(), Changed)) {
3485 MadeIRChange = true;
3486 for (Value *V : Changed)
3487 addToWorklist(cast<Instruction>(V));
3488 }
3489
3490 // Handle potentially dead successors.
3491 for (BasicBlock *Succ : successors(BB))
3492 addDeadEdge(BB, Succ, Worklist);
3493}
3494
3497 while (!Worklist.empty()) {
3498 BasicBlock *BB = Worklist.pop_back_val();
3499 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
3500 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
3501 }))
3502 continue;
3503
3505 }
3506}
3507
3509 BasicBlock *LiveSucc) {
3511 for (BasicBlock *Succ : successors(BB)) {
3512 // The live successor isn't dead.
3513 if (Succ == LiveSucc)
3514 continue;
3515
3516 addDeadEdge(BB, Succ, Worklist);
3517 }
3518
3520}
3521
3523 if (BI.isUnconditional())
3525
3526 // Change br (not X), label True, label False to: br X, label False, True
3527 Value *Cond = BI.getCondition();
3528 Value *X;
3529 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3530 // Swap Destinations and condition...
3531 BI.swapSuccessors();
3532 if (BPI)
3534 return replaceOperand(BI, 0, X);
3535 }
3536
3537 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3538 // This is done by inverting the condition and swapping successors:
3539 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3540 Value *Y;
3541 if (isa<SelectInst>(Cond) &&
3542 match(Cond,
3544 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3545 Value *Or = Builder.CreateLogicalOr(NotX, Y);
3546 BI.swapSuccessors();
3547 if (BPI)
3549 return replaceOperand(BI, 0, Or);
3550 }
3551
3552 // If the condition is irrelevant, remove the use so that other
3553 // transforms on the condition become more effective.
3554 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3555 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3556
3557 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3558 CmpInst::Predicate Pred;
3559 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3560 !isCanonicalPredicate(Pred)) {
3561 // Swap destinations and condition.
3562 auto *Cmp = cast<CmpInst>(Cond);
3563 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3564 BI.swapSuccessors();
3565 if (BPI)
3567 Worklist.push(Cmp);
3568 return &BI;
3569 }
3570
3571 if (isa<UndefValue>(Cond)) {
3572 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
3573 return nullptr;
3574 }
3575 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3577 BI.getSuccessor(!CI->getZExtValue()));
3578 return nullptr;
3579 }
3580
3581 DC.registerBranch(&BI);
3582 return nullptr;
3583}
3584
3585// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3586// we can prove that both (switch C) and (switch X) go to the default when cond
3587// is false/true.
3590 bool IsTrueArm) {
3591 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
3592 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
3593 if (!C)
3594 return nullptr;
3595
3596 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3597 if (CstBB != SI.getDefaultDest())
3598 return nullptr;
3599 Value *X = Select->getOperand(3 - CstOpIdx);
3601 const APInt *RHSC;
3602 if (!match(Select->getCondition(),
3603 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
3604 return nullptr;
3605 if (IsTrueArm)
3606 Pred = ICmpInst::getInversePredicate(Pred);
3607
3608 // See whether we can replace the select with X
3610 for (auto Case : SI.cases())
3611 if (!CR.contains(Case.getCaseValue()->getValue()))
3612 return nullptr;
3613
3614 return X;
3615}
3616
3618 Value *Cond = SI.getCondition();
3619 Value *Op0;
3620 ConstantInt *AddRHS;
3621 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3622 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3623 for (auto Case : SI.cases()) {
3624 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3625 assert(isa<ConstantInt>(NewCase) &&
3626 "Result of expression should be constant");
3627 Case.setValue(cast<ConstantInt>(NewCase));
3628 }
3629 return replaceOperand(SI, 0, Op0);
3630 }
3631
3632 ConstantInt *SubLHS;
3633 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
3634 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3635 for (auto Case : SI.cases()) {
3636 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
3637 assert(isa<ConstantInt>(NewCase) &&
3638 "Result of expression should be constant");
3639 Case.setValue(cast<ConstantInt>(NewCase));
3640 }
3641 return replaceOperand(SI, 0, Op0);
3642 }
3643
3644 uint64_t ShiftAmt;
3645 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
3646 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3647 all_of(SI.cases(), [&](const auto &Case) {
3648 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3649 })) {
3650 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3651 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond);
3652 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3653 Shl->hasOneUse()) {
3654 Value *NewCond = Op0;
3655 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3656 // If the shift may wrap, we need to mask off the shifted bits.
3657 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3658 NewCond = Builder.CreateAnd(
3659 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
3660 }
3661 for (auto Case : SI.cases()) {
3662 const APInt &CaseVal = Case.getCaseValue()->getValue();
3663 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3664 : CaseVal.lshr(ShiftAmt);
3665 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
3666 }
3667 return replaceOperand(SI, 0, NewCond);
3668 }
3669 }
3670
3671 // Fold switch(zext/sext(X)) into switch(X) if possible.
3672 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
3673 bool IsZExt = isa<ZExtInst>(Cond);
3674 Type *SrcTy = Op0->getType();
3675 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3676
3677 if (all_of(SI.cases(), [&](const auto &Case) {
3678 const APInt &CaseVal = Case.getCaseValue()->getValue();
3679 return IsZExt ? CaseVal.isIntN(NewWidth)
3680 : CaseVal.isSignedIntN(NewWidth);
3681 })) {
3682 for (auto &Case : SI.cases()) {
3683 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3684 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3685 }
3686 return replaceOperand(SI, 0, Op0);
3687 }
3688 }
3689
3690 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
3691 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
3692 if (Value *V =
3693 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
3694 return replaceOperand(SI, 0, V);
3695 if (Value *V =
3696 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
3697 return replaceOperand(SI, 0, V);
3698 }
3699
3700 KnownBits Known = computeKnownBits(Cond, 0, &SI);
3701 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3702 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3703
3704 // Compute the number of leading bits we can ignore.
3705 // TODO: A better way to determine this would use ComputeNumSignBits().
3706 for (const auto &C : SI.cases()) {
3707 LeadingKnownZeros =
3708 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
3709 LeadingKnownOnes =
3710 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
3711 }
3712
3713 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3714
3715 // Shrink the condition operand if the new type is smaller than the old type.
3716 // But do not shrink to a non-standard type, because backend can't generate
3717 // good code for that yet.
3718 // TODO: We can make it aggressive again after fixing PR39569.
3719 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3720 shouldChangeType(Known.getBitWidth(), NewWidth)) {
3721 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3723 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3724
3725 for (auto Case : SI.cases()) {
3726 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3727 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3728 }
3729 return replaceOperand(SI, 0, NewCond);
3730 }
3731
3732 if (isa<UndefValue>(Cond)) {
3733 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
3734 return nullptr;
3735 }
3736 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3737 handlePotentiallyDeadSuccessors(SI.getParent(),
3738 SI.findCaseValue(CI)->getCaseSuccessor());
3739 return nullptr;
3740 }
3741
3742 return nullptr;
3743}
3744
3746InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3747 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3748 if (!WO)
3749 return nullptr;
3750
3751 Intrinsic::ID OvID = WO->getIntrinsicID();
3752 const APInt *C = nullptr;
3753 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
3754 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3755 OvID == Intrinsic::umul_with_overflow)) {
3756 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3757 if (C->isAllOnes())
3758 return BinaryOperator::CreateNeg(WO->getLHS());
3759 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3760 if (C->isPowerOf2()) {
3761 return BinaryOperator::CreateShl(
3762 WO->getLHS(),
3763 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3764 }
3765 }
3766 }
3767
3768 // We're extracting from an overflow intrinsic. See if we're the only user.
3769 // That allows us to simplify multiple result intrinsics to simpler things
3770 // that just get one value.
3771 if (!WO->hasOneUse())
3772 return nullptr;
3773
3774 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3775 // and replace it with a traditional binary instruction.
3776 if (*EV.idx_begin() == 0) {
3777 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3778 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3779 // Replace the old instruction's uses with poison.
3780 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3782 return BinaryOperator::Create(BinOp, LHS, RHS);
3783 }
3784
3785 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3786
3787 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3788 if (OvID == Intrinsic::usub_with_overflow)
3789 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3790
3791 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3792 // +1 is not possible because we assume signed values.
3793 if (OvID == Intrinsic::smul_with_overflow &&
3794 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3795 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3796
3797 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3798 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3799 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3800 // Only handle even bitwidths for performance reasons.
3801 if (BitWidth % 2 == 0)
3802 return new ICmpInst(
3803 ICmpInst::ICMP_UGT, WO->getLHS(),
3804 ConstantInt::get(WO->getLHS()->getType(),
3806 }
3807
3808 // If only the overflow result is used, and the right hand side is a
3809 // constant (or constant splat), we can remove the intrinsic by directly
3810 // checking for overflow.
3811 if (C) {
3812 // Compute the no-wrap range for LHS given RHS=C, then construct an
3813 // equivalent icmp, potentially using an offset.
3815 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3816
3817 CmpInst::Predicate Pred;
3818 APInt NewRHSC, Offset;
3819 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3820 auto *OpTy = WO->getRHS()->getType();
3821 auto *NewLHS = WO->getLHS();
3822 if (Offset != 0)
3823 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3824 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3825 ConstantInt::get(OpTy, NewRHSC));
3826 }
3827
3828 return nullptr;
3829}
3830
3832 Value *Agg = EV.getAggregateOperand();
3833
3834 if (!EV.hasIndices())
3835 return replaceInstUsesWith(EV, Agg);
3836
3837 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3838 SQ.getWithInstruction(&EV)))
3839 return replaceInstUsesWith(EV, V);
3840
3841 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3842 // We're extracting from an insertvalue instruction, compare the indices
3843 const unsigned *exti, *exte, *insi, *inse;
3844 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3845 exte = EV.idx_end(), inse = IV->idx_end();
3846 exti != exte && insi != inse;
3847 ++exti, ++insi) {
3848 if (*insi != *exti)
3849 // The insert and extract both reference distinctly different elements.
3850 // This means the extract is not influenced by the insert, and we can
3851 // replace the aggregate operand of the extract with the aggregate
3852 // operand of the insert. i.e., replace
3853 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3854 // %E = extractvalue { i32, { i32 } } %I, 0
3855 // with
3856 // %E = extractvalue { i32, { i32 } } %A, 0
3857 return ExtractValueInst::Create(IV->getAggregateOperand(),
3858 EV.getIndices());
3859 }
3860 if (exti == exte && insi == inse)
3861 // Both iterators are at the end: Index lists are identical. Replace
3862 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3863 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3864 // with "i32 42"
3865 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3866 if (exti == exte) {
3867 // The extract list is a prefix of the insert list. i.e. replace
3868 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3869 // %E = extractvalue { i32, { i32 } } %I, 1
3870 // with
3871 // %X = extractvalue { i32, { i32 } } %A, 1
3872 // %E = insertvalue { i32 } %X, i32 42, 0
3873 // by switching the order of the insert and extract (though the
3874 // insertvalue should be left in, since it may have other uses).
3875 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3876 EV.getIndices());
3877 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3878 ArrayRef(insi, inse));
3879 }
3880 if (insi == inse)
3881 // The insert list is a prefix of the extract list
3882 // We can simply remove the common indices from the extract and make it
3883 // operate on the inserted value instead of the insertvalue result.
3884 // i.e., replace
3885 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3886 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3887 // with
3888 // %E extractvalue { i32 } { i32 42 }, 0
3889 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3890 ArrayRef(exti, exte));
3891 }
3892
3893 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3894 return R;
3895
3896 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3897 // Bail out if the aggregate contains scalable vector type
3898 if (auto *STy = dyn_cast<StructType>(Agg->getType());
3899 STy && STy->containsScalableVectorType())
3900 return nullptr;
3901
3902 // If the (non-volatile) load only has one use, we can rewrite this to a
3903 // load from a GEP. This reduces the size of the load. If a load is used
3904 // only by extractvalue instructions then this either must have been
3905 // optimized before, or it is a struct with padding, in which case we
3906 // don't want to do the transformation as it loses padding knowledge.
3907 if (L->isSimple() && L->hasOneUse()) {
3908 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3909 SmallVector<Value*, 4> Indices;
3910 // Prefix an i32 0 since we need the first element.
3911 Indices.push_back(Builder.getInt32(0));
3912 for (unsigned Idx : EV.indices())
3913 Indices.push_back(Builder.getInt32(Idx));
3914
3915 // We need to insert these at the location of the old load, not at that of
3916 // the extractvalue.
3918 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3919 L->getPointerOperand(), Indices);
3921 // Whatever aliasing information we had for the orignal load must also
3922 // hold for the smaller load, so propagate the annotations.
3923 NL->setAAMetadata(L->getAAMetadata());
3924 // Returning the load directly will cause the main loop to insert it in
3925 // the wrong spot, so use replaceInstUsesWith().
3926 return replaceInstUsesWith(EV, NL);
3927 }
3928 }
3929
3930 if (auto *PN = dyn_cast<PHINode>(Agg))
3931 if (Instruction *Res = foldOpIntoPhi(EV, PN))
3932 return Res;
3933
3934 // Canonicalize extract (select Cond, TV, FV)
3935 // -> select cond, (extract TV), (extract FV)
3936 if (auto *SI = dyn_cast<SelectInst>(Agg))
3937 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
3938 return R;
3939
3940 // We could simplify extracts from other values. Note that nested extracts may
3941 // already be simplified implicitly by the above: extract (extract (insert) )
3942 // will be translated into extract ( insert ( extract ) ) first and then just
3943 // the value inserted, if appropriate. Similarly for extracts from single-use
3944 // loads: extract (extract (load)) will be translated to extract (load (gep))
3945 // and if again single-use then via load (gep (gep)) to load (gep).
3946 // However, double extracts from e.g. function arguments or return values
3947 // aren't handled yet.
3948 return nullptr;
3949}
3950
3951/// Return 'true' if the given typeinfo will match anything.
3952static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3953 switch (Personality) {
3957 // The GCC C EH and Rust personality only exists to support cleanups, so
3958 // it's not clear what the semantics of catch clauses are.
3959 return false;
3961 return false;
3963 // While __gnat_all_others_value will match any Ada exception, it doesn't
3964 // match foreign exceptions (or didn't, before gcc-4.7).
3965 return false;
3975 return TypeInfo->isNullValue();
3976 }
3977 llvm_unreachable("invalid enum");
3978}
3979
3980static bool shorter_filter(const Value *LHS, const Value *RHS) {
3981 return
3982 cast<ArrayType>(LHS->getType())->getNumElements()
3983 <
3984 cast<ArrayType>(RHS->getType())->getNumElements();
3985}
3986
3988 // The logic here should be correct for any real-world personality function.
3989 // However if that turns out not to be true, the offending logic can always
3990 // be conditioned on the personality function, like the catch-all logic is.
3991 EHPersonality Personality =
3992 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
3993
3994 // Simplify the list of clauses, eg by removing repeated catch clauses
3995 // (these are often created by inlining).
3996 bool MakeNewInstruction = false; // If true, recreate using the following:
3997 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3998 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3999
4000 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4001 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4002 bool isLastClause = i + 1 == e;
4003 if (LI.isCatch(i)) {
4004 // A catch clause.
4005 Constant *CatchClause = LI.getClause(i);
4006 Constant *TypeInfo = CatchClause->stripPointerCasts();
4007
4008 // If we already saw this clause, there is no point in having a second
4009 // copy of it.
4010 if (AlreadyCaught.insert(TypeInfo).second) {
4011 // This catch clause was not already seen.
4012 NewClauses.push_back(CatchClause);
4013 } else {
4014 // Repeated catch clause - drop the redundant copy.
4015 MakeNewInstruction = true;
4016 }
4017
4018 // If this is a catch-all then there is no point in keeping any following
4019 // clauses or marking the landingpad as having a cleanup.
4020 if (isCatchAll(Personality, TypeInfo)) {
4021 if (!isLastClause)
4022 MakeNewInstruction = true;
4023 CleanupFlag = false;
4024 break;
4025 }
4026 } else {
4027 // A filter clause. If any of the filter elements were already caught
4028 // then they can be dropped from the filter. It is tempting to try to
4029 // exploit the filter further by saying that any typeinfo that does not
4030 // occur in the filter can't be caught later (and thus can be dropped).
4031 // However this would be wrong, since typeinfos can match without being
4032 // equal (for example if one represents a C++ class, and the other some
4033 // class derived from it).
4034 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4035 Constant *FilterClause = LI.getClause(i);
4036 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4037 unsigned NumTypeInfos = FilterType->getNumElements();
4038
4039 // An empty filter catches everything, so there is no point in keeping any
4040 // following clauses or marking the landingpad as having a cleanup. By
4041 // dealing with this case here the following code is made a bit simpler.
4042 if (!NumTypeInfos) {
4043 NewClauses.push_back(FilterClause);
4044 if (!isLastClause)
4045 MakeNewInstruction = true;
4046 CleanupFlag = false;
4047 break;
4048 }
4049
4050 bool MakeNewFilter = false; // If true, make a new filter.
4051 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4052 if (isa<ConstantAggregateZero>(FilterClause)) {
4053 // Not an empty filter - it contains at least one null typeinfo.
4054 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4055 Constant *TypeInfo =
4057 // If this typeinfo is a catch-all then the filter can never match.
4058 if (isCatchAll(Personality, TypeInfo)) {
4059 // Throw the filter away.
4060 MakeNewInstruction = true;
4061 continue;
4062 }
4063
4064 // There is no point in having multiple copies of this typeinfo, so
4065 // discard all but the first copy if there is more than one.
4066 NewFilterElts.push_back(TypeInfo);
4067 if (NumTypeInfos > 1)
4068 MakeNewFilter = true;
4069 } else {
4070 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4071 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4072 NewFilterElts.reserve(NumTypeInfos);
4073
4074 // Remove any filter elements that were already caught or that already
4075 // occurred in the filter. While there, see if any of the elements are
4076 // catch-alls. If so, the filter can be discarded.
4077 bool SawCatchAll = false;
4078 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4079 Constant *Elt = Filter->getOperand(j);
4080 Constant *TypeInfo = Elt->stripPointerCasts();
4081 if (isCatchAll(Personality, TypeInfo)) {
4082 // This element is a catch-all. Bail out, noting this fact.
4083 SawCatchAll = true;
4084 break;
4085 }
4086
4087 // Even if we've seen a type in a catch clause, we don't want to
4088 // remove it from the filter. An unexpected type handler may be
4089 // set up for a call site which throws an exception of the same
4090 // type caught. In order for the exception thrown by the unexpected
4091 // handler to propagate correctly, the filter must be correctly
4092 // described for the call site.
4093 //
4094 // Example:
4095 //
4096 // void unexpected() { throw 1;}
4097 // void foo() throw (int) {
4098 // std::set_unexpected(unexpected);
4099 // try {
4100 // throw 2.0;
4101 // } catch (int i) {}
4102 // }
4103
4104 // There is no point in having multiple copies of the same typeinfo in
4105 // a filter, so only add it if we didn't already.
4106 if (SeenInFilter.insert(TypeInfo).second)
4107 NewFilterElts.push_back(cast<Constant>(Elt));
4108 }
4109 // A filter containing a catch-all cannot match anything by definition.
4110 if (SawCatchAll) {
4111 // Throw the filter away.
4112 MakeNewInstruction = true;
4113 continue;
4114 }
4115
4116 // If we dropped something from the filter, make a new one.
4117 if (NewFilterElts.size() < NumTypeInfos)
4118 MakeNewFilter = true;
4119 }
4120 if (MakeNewFilter) {
4121 FilterType = ArrayType::get(FilterType->getElementType(),
4122 NewFilterElts.size());
4123 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4124 MakeNewInstruction = true;
4125 }
4126
4127 NewClauses.push_back(FilterClause);
4128
4129 // If the new filter is empty then it will catch everything so there is
4130 // no point in keeping any following clauses or marking the landingpad
4131 // as having a cleanup. The case of the original filter being empty was
4132 // already handled above.
4133 if (MakeNewFilter && !NewFilterElts.size()) {
4134 assert(MakeNewInstruction && "New filter but not a new instruction!");
4135 CleanupFlag = false;
4136 break;
4137 }
4138 }
4139 }
4140
4141 // If several filters occur in a row then reorder them so that the shortest
4142 // filters come first (those with the smallest number of elements). This is
4143 // advantageous because shorter filters are more likely to match, speeding up
4144 // unwinding, but mostly because it increases the effectiveness of the other
4145 // filter optimizations below.
4146 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4147 unsigned j;
4148 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4149 for (j = i; j != e; ++j)
4150 if (!isa<ArrayType>(NewClauses[j]->getType()))
4151 break;
4152
4153 // Check whether the filters are already sorted by length. We need to know
4154 // if sorting them is actually going to do anything so that we only make a
4155 // new landingpad instruction if it does.
4156 for (unsigned k = i; k + 1 < j; ++k)
4157 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4158 // Not sorted, so sort the filters now. Doing an unstable sort would be
4159 // correct too but reordering filters pointlessly might confuse users.
4160 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4162 MakeNewInstruction = true;
4163 break;
4164 }
4165
4166 // Look for the next batch of filters.
4167 i = j + 1;
4168 }
4169
4170 // If typeinfos matched if and only if equal, then the elements of a filter L
4171 // that occurs later than a filter F could be replaced by the intersection of
4172 // the elements of F and L. In reality two typeinfos can match without being
4173 // equal (for example if one represents a C++ class, and the other some class
4174 // derived from it) so it would be wrong to perform this transform in general.
4175 // However the transform is correct and useful if F is a subset of L. In that
4176 // case L can be replaced by F, and thus removed altogether since repeating a
4177 // filter is pointless. So here we look at all pairs of filters F and L where
4178 // L follows F in the list of clauses, and remove L if every element of F is
4179 // an element of L. This can occur when inlining C++ functions with exception
4180 // specifications.
4181 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4182 // Examine each filter in turn.
4183 Value *Filter = NewClauses[i];
4184 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4185 if (!FTy)
4186 // Not a filter - skip it.
4187 continue;
4188 unsigned FElts = FTy->getNumElements();
4189 // Examine each filter following this one. Doing this backwards means that
4190 // we don't have to worry about filters disappearing under us when removed.
4191 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4192 Value *LFilter = NewClauses[j];
4193 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4194 if (!LTy)
4195 // Not a filter - skip it.
4196 continue;
4197 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4198 // an element of LFilter, then discard LFilter.
4199 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4200 // If Filter is empty then it is a subset of LFilter.
4201 if (!FElts) {
4202 // Discard LFilter.
4203 NewClauses.erase(J);
4204 MakeNewInstruction = true;
4205 // Move on to the next filter.
4206 continue;
4207 }
4208 unsigned LElts = LTy->getNumElements();
4209 // If Filter is longer than LFilter then it cannot be a subset of it.
4210 if (FElts > LElts)
4211 // Move on to the next filter.
4212 continue;
4213 // At this point we know that LFilter has at least one element.
4214 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4215 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4216 // already know that Filter is not longer than LFilter).
4217 if (isa<ConstantAggregateZero>(Filter)) {
4218 assert(FElts <= LElts && "Should have handled this case earlier!");
4219 // Discard LFilter.
4220 NewClauses.erase(J);
4221 MakeNewInstruction = true;
4222 }
4223 // Move on to the next filter.
4224 continue;
4225 }
4226 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4227 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4228 // Since Filter is non-empty and contains only zeros, it is a subset of
4229 // LFilter iff LFilter contains a zero.
4230 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4231 for (unsigned l = 0; l != LElts; ++l)
4232 if (LArray->getOperand(l)->isNullValue()) {
4233 // LFilter contains a zero - discard it.
4234 NewClauses.erase(J);
4235 MakeNewInstruction = true;
4236 break;
4237 }
4238 // Move on to the next filter.
4239 continue;
4240 }
4241 // At this point we know that both filters are ConstantArrays. Loop over
4242 // operands to see whether every element of Filter is also an element of
4243 // LFilter. Since filters tend to be short this is probably faster than
4244 // using a method that scales nicely.
4245 ConstantArray *FArray = cast<ConstantArray>(Filter);
4246 bool AllFound = true;
4247 for (unsigned f = 0; f != FElts; ++f) {
4248 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4249 AllFound = false;
4250 for (unsigned l = 0; l != LElts; ++l) {
4251 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4252 if (LTypeInfo == FTypeInfo) {
4253 AllFound = true;
4254 break;
4255 }
4256 }
4257 if (!AllFound)
4258 break;
4259 }
4260 if (AllFound) {
4261 // Discard LFilter.
4262 NewClauses.erase(J);
4263 MakeNewInstruction = true;
4264 }
4265 // Move on to the next filter.
4266 }
4267 }
4268
4269 // If we changed any of the clauses, replace the old landingpad instruction
4270 // with a new one.
4271 if (MakeNewInstruction) {
4272 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
4273 NewClauses.size());
4274 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
4275 NLI->addClause(NewClauses[i]);
4276 // A landing pad with no clauses must have the cleanup flag set. It is
4277 // theoretically possible, though highly unlikely, that we eliminated all
4278 // clauses. If so, force the cleanup flag to true.
4279 if (NewClauses.empty())
4280 CleanupFlag = true;
4281 NLI->setCleanup(CleanupFlag);
4282 return NLI;
4283 }
4284
4285 // Even if none of the clauses changed, we may nonetheless have understood
4286 // that the cleanup flag is pointless. Clear it if so.
4287 if (LI.isCleanup() != CleanupFlag) {
4288 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4289 LI.setCleanup(CleanupFlag);
4290 return &LI;
4291 }
4292
4293 return nullptr;
4294}
4295
4296Value *
4298 // Try to push freeze through instructions that propagate but don't produce
4299 // poison as far as possible. If an operand of freeze follows three
4300 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4301 // guaranteed-non-poison operands then push the freeze through to the one
4302 // operand that is not guaranteed non-poison. The actual transform is as
4303 // follows.
4304 // Op1 = ... ; Op1 can be posion
4305 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4306 // ; single guaranteed-non-poison operands
4307 // ... = Freeze(Op0)
4308 // =>
4309 // Op1 = ...
4310 // Op1.fr = Freeze(Op1)
4311 // ... = Inst(Op1.fr, NonPoisonOps...)
4312 auto *OrigOp = OrigFI.getOperand(0);
4313 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
4314
4315 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4316 // potentially reduces their optimization potential, so let's only do this iff
4317 // the OrigOp is only used by the freeze.
4318 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
4319 return nullptr;
4320
4321 // We can't push the freeze through an instruction which can itself create
4322 // poison. If the only source of new poison is flags, we can simply
4323 // strip them (since we know the only use is the freeze and nothing can
4324 // benefit from them.)
4325 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
4326 /*ConsiderFlagsAndMetadata*/ false))
4327 return nullptr;
4328
4329 // If operand is guaranteed not to be poison, there is no need to add freeze
4330 // to the operand. So we first find the operand that is not guaranteed to be
4331 // poison.
4332 Use *MaybePoisonOperand = nullptr;
4333 for (Use &U : OrigOpInst->operands()) {
4334 if (isa<MetadataAsValue>(U.get()) ||
4336 continue;
4337 if (!MaybePoisonOperand)
4338 MaybePoisonOperand = &U;
4339 else
4340 return nullptr;
4341 }
4342
4343 OrigOpInst->dropPoisonGeneratingAnnotations();
4344
4345 // If all operands are guaranteed to be non-poison, we can drop freeze.
4346 if (!MaybePoisonOperand)
4347 return OrigOp;
4348
4349 Builder.SetInsertPoint(OrigOpInst);
4350 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4351 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
4352
4353 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
4354 return OrigOp;
4355}
4356
4358 PHINode *PN) {
4359 // Detect whether this is a recurrence with a start value and some number of
4360 // backedge values. We'll check whether we can push the freeze through the
4361 // backedge values (possibly dropping poison flags along the way) until we
4362 // reach the phi again. In that case, we can move the freeze to the start
4363 // value.
4364 Use *StartU = nullptr;
4366 for (Use &U : PN->incoming_values()) {
4367 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
4368 // Add backedge value to worklist.
4369 Worklist.push_back(U.get());
4370 continue;
4371 }
4372
4373 // Don't bother handling multiple start values.
4374 if (StartU)
4375 return nullptr;
4376 StartU = &U;
4377 }
4378
4379 if (!StartU || Worklist.empty())
4380 return nullptr; // Not a recurrence.
4381
4382 Value *StartV = StartU->get();
4383 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
4384 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
4385 // We can't insert freeze if the start value is the result of the
4386 // terminator (e.g. an invoke).
4387 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4388 return nullptr;
4389
4392 while (!Worklist.empty()) {
4393 Value *V = Worklist.pop_back_val();
4394 if (!Visited.insert(V).second)
4395 continue;
4396
4397 if (Visited.size() > 32)
4398 return nullptr; // Limit the total number of values we inspect.
4399
4400 // Assume that PN is non-poison, because it will be after the transform.
4401 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4402 continue;
4403
4404 Instruction *I = dyn_cast<Instruction>(V);
4405 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
4406 /*ConsiderFlagsAndMetadata*/ false))
4407 return nullptr;
4408
4409 DropFlags.push_back(I);
4410 append_range(Worklist, I->operands());
4411 }
4412
4413 for (Instruction *I : DropFlags)
4414 I->dropPoisonGeneratingAnnotations();
4415
4416 if (StartNeedsFreeze) {
4418 Value *FrozenStartV = Builder.CreateFreeze(StartV,
4419 StartV->getName() + ".fr");
4420 replaceUse(*StartU, FrozenStartV);
4421 }
4422 return replaceInstUsesWith(FI, PN);
4423}
4424
4426 Value *Op = FI.getOperand(0);
4427
4428 if (isa<Constant>(Op) || Op->hasOneUse())
4429 return false;
4430
4431 // Move the freeze directly after the definition of its operand, so that
4432 // it dominates the maximum number of uses. Note that it may not dominate
4433 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4434 // the normal/default destination. This is why the domination check in the
4435 // replacement below is still necessary.
4436 BasicBlock::iterator MoveBefore;
4437 if (isa<Argument>(Op)) {
4438 MoveBefore =
4440 } else {
4441 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
4442 if (!MoveBeforeOpt)
4443 return false;
4444 MoveBefore = *MoveBeforeOpt;
4445 }
4446
4447 // Don't move to the position of a debug intrinsic.
4448 if (isa<DbgInfoIntrinsic>(MoveBefore))
4449 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4450 // Re-point iterator to come after any debug-info records, if we're
4451 // running in "RemoveDIs" mode
4452 MoveBefore.setHeadBit(false);
4453
4454 bool Changed = false;
4455 if (&FI != &*MoveBefore) {
4456 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
4457 Changed = true;
4458 }
4459
4460 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
4461 bool Dominates = DT.dominates(&FI, U);
4462 Changed |= Dominates;
4463 return Dominates;
4464 });
4465
4466 return Changed;
4467}
4468
4469// Check if any direct or bitcast user of this value is a shuffle instruction.
4471 for (auto *U : V->users()) {
4472 if (isa<ShuffleVectorInst>(U))
4473 return true;
4474 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
4475 return true;
4476 }
4477 return false;
4478}
4479
4481 Value *Op0 = I.getOperand(0);
4482
4484 return replaceInstUsesWith(I, V);
4485
4486 // freeze (phi const, x) --> phi const, (freeze x)
4487 if (auto *PN = dyn_cast<PHINode>(Op0)) {
4488 if (Instruction *NV = foldOpIntoPhi(I, PN))
4489 return NV;
4490 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
4491 return NV;
4492 }
4493
4495 return replaceInstUsesWith(I, NI);
4496
4497 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4498 // - or: pick -1
4499 // - select's condition: if the true value is constant, choose it by making
4500 // the condition true.
4501 // - default: pick 0
4502 //
4503 // Note that this transform is intentionally done here rather than
4504 // via an analysis in InstSimplify or at individual user sites. That is
4505 // because we must produce the same value for all uses of the freeze -
4506 // it's the reason "freeze" exists!
4507 //
4508 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4509 // duplicating logic for binops at least.
4510 auto getUndefReplacement = [&I](Type *Ty) {
4511 Constant *BestValue = nullptr;
4512 Constant *NullValue = Constant::getNullValue(Ty);
4513 for (const auto *U : I.users()) {
4514 Constant *C = NullValue;
4515 if (match(U, m_Or(m_Value(), m_Value())))
4517 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4518 C = ConstantInt::getTrue(Ty);
4519
4520 if (!BestValue)
4521 BestValue = C;
4522 else if (BestValue != C)
4523 BestValue = NullValue;
4524 }
4525 assert(BestValue && "Must have at least one use");
4526 return BestValue;
4527 };
4528
4529 if (match(Op0, m_Undef())) {
4530 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4531 // a shuffle. This may improve codegen for shuffles that allow
4532 // unspecified inputs.
4534 return nullptr;
4535 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4536 }
4537
4538 Constant *C;
4539 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4540 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4542 }
4543
4544 // Replace uses of Op with freeze(Op).
4545 if (freezeOtherUses(I))
4546 return &I;
4547
4548 return nullptr;
4549}
4550
4551/// Check for case where the call writes to an otherwise dead alloca. This
4552/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4553/// helper *only* analyzes the write; doesn't check any other legality aspect.
4555 auto *CB = dyn_cast<CallBase>(I);
4556 if (!CB)
4557 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4558 // to allow reload along used path as described below. Otherwise, this
4559 // is simply a store to a dead allocation which will be removed.
4560 return false;
4561 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4562 if (!Dest)
4563 return false;
4564 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4565 if (!AI)
4566 // TODO: allow malloc?
4567 return false;
4568 // TODO: allow memory access dominated by move point? Note that since AI
4569 // could have a reference to itself captured by the call, we would need to
4570 // account for cycles in doing so.
4571 SmallVector<const User *> AllocaUsers;
4573 auto pushUsers = [&](const Instruction &I) {
4574 for (const User *U : I.users()) {
4575 if (Visited.insert(U).second)
4576 AllocaUsers.push_back(U);
4577 }
4578 };
4579 pushUsers(*AI);
4580 while (!AllocaUsers.empty()) {
4581 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4582 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4583 isa<AddrSpaceCastInst>(UserI)) {
4584 pushUsers(*UserI);
4585 continue;
4586 }
4587 if (UserI == CB)
4588 continue;
4589 // TODO: support lifetime.start/end here
4590 return false;
4591 }
4592 return true;
4593}
4594
4595/// Try to move the specified instruction from its current block into the
4596/// beginning of DestBlock, which can only happen if it's safe to move the
4597/// instruction past all of the instructions between it and the end of its
4598/// block.
4600 BasicBlock *DestBlock) {
4601 BasicBlock *SrcBlock = I->getParent();
4602
4603 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4604 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4605 I->isTerminator())
4606 return false;
4607
4608 // Do not sink static or dynamic alloca instructions. Static allocas must
4609 // remain in the entry block, and dynamic allocas must not be sunk in between
4610 // a stacksave / stackrestore pair, which would incorrectly shorten its
4611 // lifetime.
4612 if (isa<AllocaInst>(I))
4613 return false;
4614
4615 // Do not sink into catchswitch blocks.
4616 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4617 return false;
4618
4619 // Do not sink convergent call instructions.
4620 if (auto *CI = dyn_cast<CallInst>(I)) {
4621 if (CI->isConvergent())
4622 return false;
4623 }
4624
4625 // Unless we can prove that the memory write isn't visibile except on the
4626 // path we're sinking to, we must bail.
4627 if (I->mayWriteToMemory()) {
4628 if (!SoleWriteToDeadLocal(I, TLI))
4629 return false;
4630 }
4631
4632 // We can only sink load instructions if there is nothing between the load and
4633 // the end of block that could change the value.
4634 if (I->mayReadFromMemory()) {
4635 // We don't want to do any sophisticated alias analysis, so we only check
4636 // the instructions after I in I's parent block if we try to sink to its
4637 // successor block.
4638 if (DestBlock->getUniquePredecessor() != I->getParent())
4639 return false;
4640 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4641 E = I->getParent()->end();
4642 Scan != E; ++Scan)
4643 if (Scan->mayWriteToMemory())
4644 return false;
4645 }
4646
4647 I->dropDroppableUses([&](const Use *U) {
4648 auto *I = dyn_cast<Instruction>(U->getUser());
4649 if (I && I->getParent() != DestBlock) {
4650 Worklist.add(I);
4651 return true;
4652 }
4653 return false;
4654 });
4655 /// FIXME: We could remove droppable uses that are not dominated by
4656 /// the new position.
4657
4658 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4659 I->moveBefore(*DestBlock, InsertPos);
4660 ++NumSunkInst;
4661
4662 // Also sink all related debug uses from the source basic block. Otherwise we
4663 // get debug use before the def. Attempt to salvage debug uses first, to
4664 // maximise the range variables have location for. If we cannot salvage, then
4665 // mark the location undef: we know it was supposed to receive a new location
4666 // here, but that computation has been sunk.
4668 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
4669 findDbgUsers(DbgUsers, I, &DbgVariableRecords);
4670 if (!DbgUsers.empty())
4671 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4672 if (!DbgVariableRecords.empty())
4673 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4674 DbgVariableRecords);
4675
4676 // PS: there are numerous flaws with this behaviour, not least that right now
4677 // assignments can be re-ordered past other assignments to the same variable
4678 // if they use different Values. Creating more undef assignements can never be
4679 // undone. And salvaging all users outside of this block can un-necessarily
4680 // alter the lifetime of the live-value that the variable refers to.
4681 // Some of these things can be resolved by tolerating debug use-before-defs in
4682 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4683 // being used for more architectures.
4684
4685 return true;
4686}
4687
4689 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4691 // For all debug values in the destination block, the sunk instruction
4692 // will still be available, so they do not need to be dropped.
4694 for (auto &DbgUser : DbgUsers)
4695 if (DbgUser->getParent() != DestBlock)
4696 DbgUsersToSalvage.push_back(DbgUser);
4697
4698 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4699 // to clone the last appearing debug intrinsic for each given variable.
4701 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4702 if (DVI->getParent() == SrcBlock)
4703 DbgUsersToSink.push_back(DVI);
4704 llvm::sort(DbgUsersToSink,
4705 [](auto *A, auto *B) { return B->comesBefore(A); });
4706
4708 SmallSet<DebugVariable, 4> SunkVariables;
4709 for (auto *User : DbgUsersToSink) {
4710 // A dbg.declare instruction should not be cloned, since there can only be
4711 // one per variable fragment. It should be left in the original place
4712 // because the sunk instruction is not an alloca (otherwise we could not be
4713 // here).
4714 if (isa<DbgDeclareInst>(User))
4715 continue;
4716
4717 DebugVariable DbgUserVariable =
4718 DebugVariable(User->getVariable(), User->getExpression(),
4719 User->getDebugLoc()->getInlinedAt());
4720
4721 if (!SunkVariables.insert(DbgUserVariable).second)
4722 continue;
4723
4724 // Leave dbg.assign intrinsics in their original positions and there should
4725 // be no need to insert a clone.
4726 if (isa<DbgAssignIntrinsic>(User))
4727 continue;
4728
4729 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4730 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4731 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4732 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4733 }
4734
4735 // Perform salvaging without the clones, then sink the clones.
4736 if (!DIIClones.empty()) {
4737 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
4738 // The clones are in reverse order of original appearance, reverse again to
4739 // maintain the original order.
4740 for (auto &DIIClone : llvm::reverse(DIIClones)) {
4741 DIIClone->insertBefore(&*InsertPos);
4742 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4743 }
4744 }
4745}
4746
4748 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4749 BasicBlock *DestBlock,
4750 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4751 // Implementation of tryToSinkInstructionDbgValues, but for the
4752 // DbgVariableRecord of variable assignments rather than dbg.values.
4753
4754 // Fetch all DbgVariableRecords not already in the destination.
4755 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
4756 for (auto &DVR : DbgVariableRecords)
4757 if (DVR->getParent() != DestBlock)
4758 DbgVariableRecordsToSalvage.push_back(DVR);
4759
4760 // Fetch a second collection, of DbgVariableRecords in the source block that
4761 // we're going to sink.
4762 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4763 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4764 if (DVR->getParent() == SrcBlock)
4765 DbgVariableRecordsToSink.push_back(DVR);
4766
4767 // Sort DbgVariableRecords according to their position in the block. This is a
4768 // partial order: DbgVariableRecords attached to different instructions will
4769 // be ordered by the instruction order, but DbgVariableRecords attached to the
4770 // same instruction won't have an order.
4771 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
4772 return B->getInstruction()->comesBefore(A->getInstruction());
4773 };
4774 llvm::stable_sort(DbgVariableRecordsToSink, Order);
4775
4776 // If there are two assignments to the same variable attached to the same
4777 // instruction, the ordering between the two assignments is important. Scan
4778 // for this (rare) case and establish which is the last assignment.
4779 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4781 if (DbgVariableRecordsToSink.size() > 1) {
4783 // Count how many assignments to each variable there is per instruction.
4784 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4785 DebugVariable DbgUserVariable =
4786 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4787 DVR->getDebugLoc()->getInlinedAt());
4788 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
4789 }
4790
4791 // If there are any instructions with two assignments, add them to the
4792 // FilterOutMap to record that they need extra filtering.
4794 for (auto It : CountMap) {
4795 if (It.second > 1) {
4796 FilterOutMap[It.first] = nullptr;
4797 DupSet.insert(It.first.first);
4798 }
4799 }
4800
4801 // For all instruction/variable pairs needing extra filtering, find the
4802 // latest assignment.
4803 for (const Instruction *Inst : DupSet) {
4804 for (DbgVariableRecord &DVR :
4805 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
4806 DebugVariable DbgUserVariable =
4807 DebugVariable(DVR.getVariable(), DVR.getExpression(),
4808 DVR.getDebugLoc()->getInlinedAt());
4809 auto FilterIt =
4810 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
4811 if (FilterIt == FilterOutMap.end())
4812 continue;
4813 if (FilterIt->second != nullptr)
4814 continue;
4815 FilterIt->second = &DVR;
4816 }
4817 }
4818 }
4819
4820 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4821 // out any duplicate assignments identified above.
4823 SmallSet<DebugVariable, 4> SunkVariables;
4824 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4826 continue;
4827
4828 DebugVariable DbgUserVariable =
4829 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4830 DVR->getDebugLoc()->getInlinedAt());
4831
4832 // For any variable where there were multiple assignments in the same place,
4833 // ignore all but the last assignment.
4834 if (!FilterOutMap.empty()) {
4835 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
4836 auto It = FilterOutMap.find(IVP);
4837
4838 // Filter out.
4839 if (It != FilterOutMap.end() && It->second != DVR)
4840 continue;
4841 }
4842
4843 if (!SunkVariables.insert(DbgUserVariable).second)
4844 continue;
4845
4846 if (DVR->isDbgAssign())
4847 continue;
4848
4849 DVRClones.emplace_back(DVR->clone());
4850 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
4851 }
4852
4853 // Perform salvaging without the clones, then sink the clones.
4854 if (DVRClones.empty())
4855 return;
4856
4857 salvageDebugInfoForDbgValues(*I, {}, DbgVariableRecordsToSalvage);
4858
4859 // The clones are in reverse order of original appearance. Assert that the
4860 // head bit is set on the iterator as we _should_ have received it via
4861 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4862 // we'll repeatedly insert at the head, such as:
4863 // DVR-3 (third insertion goes here)
4864 // DVR-2 (second insertion goes here)
4865 // DVR-1 (first insertion goes here)
4866 // Any-Prior-DVRs
4867 // InsertPtInst
4868 assert(InsertPos.getHeadBit());
4869 for (DbgVariableRecord *DVRClone : DVRClones) {
4870 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
4871 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
4872 }
4873}
4874
4876 while (!Worklist.isEmpty()) {
4877 // Walk deferred instructions in reverse order, and push them to the
4878 // worklist, which means they'll end up popped from the worklist in-order.
4879 while (Instruction *I = Worklist.popDeferred()) {
4880 // Check to see if we can DCE the instruction. We do this already here to
4881 // reduce the number of uses and thus allow other folds to trigger.
4882 // Note that eraseInstFromFunction() may push additional instructions on
4883 // the deferred worklist, so this will DCE whole instruction chains.
4886 ++NumDeadInst;
4887 continue;
4888 }
4889
4890 Worklist.push(I);
4891 }
4892
4894 if (I == nullptr) continue; // skip null values.
4895
4896 // Check to see if we can DCE the instruction.
4899 ++NumDeadInst;
4900 continue;
4901 }
4902
4903 if (!DebugCounter::shouldExecute(VisitCounter))
4904 continue;
4905
4906 // See if we can trivially sink this instruction to its user if we can
4907 // prove that the successor is not executed more frequently than our block.
4908 // Return the UserBlock if successful.
4909 auto getOptionalSinkBlockForInst =
4910 [this](Instruction *I) -> std::optional<BasicBlock *> {
4911 if (!EnableCodeSinking)
4912 return std::nullopt;
4913
4914 BasicBlock *BB = I->getParent();
4915 BasicBlock *UserParent = nullptr;
4916 unsigned NumUsers = 0;
4917
4918 for (auto *U : I->users()) {
4919 if (U->isDroppable())
4920 continue;
4921 if (NumUsers > MaxSinkNumUsers)
4922 return std::nullopt;
4923
4924 Instruction *UserInst = cast<Instruction>(U);
4925 // Special handling for Phi nodes - get the block the use occurs in.
4926 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4927 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4928 if (PN->getIncomingValue(i) == I) {
4929 // Bail out if we have uses in different blocks. We don't do any
4930 // sophisticated analysis (i.e finding NearestCommonDominator of
4931 // these use blocks).
4932 if (UserParent && UserParent != PN->getIncomingBlock(i))
4933 return std::nullopt;
4934 UserParent = PN->getIncomingBlock(i);
4935 }
4936 }
4937 assert(UserParent && "expected to find user block!");
4938 } else {
4939 if (UserParent && UserParent != UserInst->getParent())
4940 return std::nullopt;
4941 UserParent = UserInst->getParent();
4942 }
4943
4944 // Make sure these checks are done only once, naturally we do the checks
4945 // the first time we get the userparent, this will save compile time.
4946 if (NumUsers == 0) {
4947 // Try sinking to another block. If that block is unreachable, then do
4948 // not bother. SimplifyCFG should handle it.
4949 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4950 return std::nullopt;
4951
4952 auto *Term = UserParent->getTerminator();
4953 // See if the user is one of our successors that has only one
4954 // predecessor, so that we don't have to split the critical edge.
4955 // Another option where we can sink is a block that ends with a
4956 // terminator that does not pass control to other block (such as
4957 // return or unreachable or resume). In this case:
4958 // - I dominates the User (by SSA form);
4959 // - the User will be executed at most once.
4960 // So sinking I down to User is always profitable or neutral.
4961 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4962 return std::nullopt;
4963
4964 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4965 }
4966
4967 NumUsers++;
4968 }
4969
4970 // No user or only has droppable users.
4971 if (!UserParent)
4972 return std::nullopt;
4973
4974 return UserParent;
4975 };
4976
4977 auto OptBB = getOptionalSinkBlockForInst(I);
4978 if (OptBB) {
4979 auto *UserParent = *OptBB;
4980 // Okay, the CFG is simple enough, try to sink this instruction.
4981 if (tryToSinkInstruction(I, UserParent)) {
4982 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4983 MadeIRChange = true;
4984 // We'll add uses of the sunk instruction below, but since
4985 // sinking can expose opportunities for it's *operands* add
4986 // them to the worklist
4987 for (Use &U : I->operands())
4988 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4989 Worklist.push(OpI);
4990 }
4991 }
4992
4993 // Now that we have an instruction, try combining it to simplify it.
4996 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4997
4998#ifndef NDEBUG
4999 std::string OrigI;
5000#endif
5001 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5002 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5003
5004 if (Instruction *Result = visit(*I)) {
5005 ++NumCombined;
5006 // Should we replace the old instruction with a new one?
5007 if (Result != I) {
5008 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5009 << " New = " << *Result << '\n');
5010
5011 Result->copyMetadata(*I,
5012 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5013 // Everything uses the new instruction now.
5014 I->replaceAllUsesWith(Result);
5015
5016 // Move the name to the new instruction first.
5017 Result->takeName(I);
5018
5019 // Insert the new instruction into the basic block...
5020 BasicBlock *InstParent = I->getParent();
5021 BasicBlock::iterator InsertPos = I->getIterator();
5022
5023 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5024 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5025 // We need to fix up the insertion point.
5026 if (isa<PHINode>(I)) // PHI -> Non-PHI
5027 InsertPos = InstParent->getFirstInsertionPt();
5028 else // Non-PHI -> PHI
5029 InsertPos = InstParent->getFirstNonPHIIt();
5030 }
5031
5032 Result->insertInto(InstParent, InsertPos);
5033
5034 // Push the new instruction and any users onto the worklist.
5036 Worklist.push(Result);
5037
5039 } else {
5040 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5041 << " New = " << *I << '\n');
5042
5043 // If the instruction was modified, it's possible that it is now dead.
5044 // if so, remove it.
5047 } else {
5049 Worklist.push(I);
5050 }
5051 }
5052 MadeIRChange = true;
5053 }
5054 }
5055
5056 Worklist.zap();
5057 return MadeIRChange;
5058}
5059
5060// Track the scopes used by !alias.scope and !noalias. In a function, a
5061// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5062// by both sets. If not, the declaration of the scope can be safely omitted.
5063// The MDNode of the scope can be omitted as well for the instructions that are
5064// part of this function. We do not do that at this point, as this might become
5065// too time consuming to do.
5067 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5068 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5069
5070public:
5072 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5073 if (!I->hasMetadataOtherThanDebugLoc())
5074 return;
5075
5076 auto Track = [](Metadata *ScopeList, auto &Container) {
5077 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5078 if (!MDScopeList || !Container.insert(MDScopeList).second)
5079 return;
5080 for (const auto &MDOperand : MDScopeList->operands())
5081 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5082 Container.insert(MDScope);
5083 };
5084
5085 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5086 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5087 }
5088
5090 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
5091 if (!Decl)
5092 return false;
5093
5094 assert(Decl->use_empty() &&
5095 "llvm.experimental.noalias.scope.decl in use ?");
5096 const MDNode *MDSL = Decl->getScopeList();
5097 assert(MDSL->getNumOperands() == 1 &&
5098 "llvm.experimental.noalias.scope should refer to a single scope");
5099 auto &MDOperand = MDSL->getOperand(0);
5100 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5101 return !UsedAliasScopesAndLists.contains(MD) ||
5102 !UsedNoAliasScopesAndLists.contains(MD);
5103
5104 // Not an MDNode ? throw away.
5105 return true;
5106 }
5107};
5108
5109/// Populate the IC worklist from a function, by walking it in reverse
5110/// post-order and adding all reachable code to the worklist.
5111///
5112/// This has a couple of tricks to make the code faster and more powerful. In
5113/// particular, we constant fold and DCE instructions as we go, to avoid adding
5114/// them to the worklist (this significantly speeds up instcombine on code where
5115/// many instructions are dead or constant). Additionally, if we find a branch
5116/// whose condition is a known constant, we only visit the reachable successors.
5119 bool MadeIRChange = false;
5121 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5122 DenseMap<Constant *, Constant *> FoldedConstants;
5123 AliasScopeTracker SeenAliasScopes;
5124
5125 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5126 for (BasicBlock *Succ : successors(BB))
5127 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5128 for (PHINode &PN : Succ->phis())
5129 for (Use &U : PN.incoming_values())
5130 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5131 U.set(PoisonValue::get(PN.getType()));
5132 MadeIRChange = true;
5133 }
5134 };
5135
5136 for (BasicBlock *BB : RPOT) {
5137 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5138 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5139 })) {
5140 HandleOnlyLiveSuccessor(BB, nullptr);
5141 continue;
5142 }
5143 LiveBlocks.insert(BB);
5144
5145 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5146 // ConstantProp instruction if trivially constant.
5147 if (!Inst.use_empty() &&
5148 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5149 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5150 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5151 << '\n');
5152 Inst.replaceAllUsesWith(C);
5153 ++NumConstProp;
5154 if (isInstructionTriviallyDead(&Inst, &TLI))
5155 Inst.eraseFromParent();
5156 MadeIRChange = true;
5157 continue;
5158 }
5159
5160 // See if we can constant fold its operands.
5161 for (Use &U : Inst.operands()) {
5162 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
5163 continue;
5164
5165 auto *C = cast<Constant>(U);
5166 Constant *&FoldRes = FoldedConstants[C];
5167 if (!FoldRes)
5168 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5169
5170 if (FoldRes != C) {
5171 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5172 << "\n Old = " << *C
5173 << "\n New = " << *FoldRes << '\n');
5174 U = FoldRes;
5175 MadeIRChange = true;
5176 }
5177 }
5178
5179 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5180 // these call instructions consumes non-trivial amount of time and
5181 // provides no value for the optimization.
5182 if (!Inst.isDebugOrPseudoInst()) {
5183 InstrsForInstructionWorklist.push_back(&Inst);
5184 SeenAliasScopes.analyse(&Inst);
5185 }
5186 }
5187
5188 // If this is a branch or switch on a constant, mark only the single
5189 // live successor. Otherwise assume all successors are live.
5190 Instruction *TI = BB->getTerminator();
5191 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5192 if (isa<UndefValue>(BI->getCondition())) {
5193 // Branch on undef is UB.
5194 HandleOnlyLiveSuccessor(BB, nullptr);
5195 continue;
5196 }
5197 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5198 bool CondVal = Cond->getZExtValue();
5199 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5200 continue;
5201 }
5202 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5203 if (isa<UndefValue>(SI->getCondition())) {
5204 // Switch on undef is UB.
5205 HandleOnlyLiveSuccessor(BB, nullptr);
5206 continue;
5207 }
5208 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5209 HandleOnlyLiveSuccessor(BB,
5210 SI->findCaseValue(Cond)->getCaseSuccessor());
5211 continue;
5212 }
5213 }
5214 }
5215
5216 // Remove instructions inside unreachable blocks. This prevents the
5217 // instcombine code from having to deal with some bad special cases, and
5218 // reduces use counts of instructions.
5219 for (BasicBlock &BB : F) {
5220 if (LiveBlocks.count(&BB))
5221 continue;
5222
5223 unsigned NumDeadInstInBB;
5224 unsigned NumDeadDbgInstInBB;
5225 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
5227
5228 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5229 NumDeadInst += NumDeadInstInBB;
5230 }
5231
5232 // Once we've found all of the instructions to add to instcombine's worklist,
5233 // add them in reverse order. This way instcombine will visit from the top
5234 // of the function down. This jives well with the way that it adds all uses
5235 // of instructions to the worklist after doing a transformation, thus avoiding
5236 // some N^2 behavior in pathological cases.
5237 Worklist.reserve(InstrsForInstructionWorklist.size());
5238 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5239 // DCE instruction if trivially dead. As we iterate in reverse program
5240 // order here, we will clean up whole chains of dead instructions.
5241 if (isInstructionTriviallyDead(Inst, &TLI) ||
5242 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5243 ++NumDeadInst;
5244 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5245 salvageDebugInfo(*Inst);
5246 Inst->eraseFromParent();
5247 MadeIRChange = true;
5248 continue;
5249 }
5250
5251 Worklist.push(Inst);
5252 }
5253
5254 return MadeIRChange;
5255}
5256
5262 const InstCombineOptions &Opts) {
5263 auto &DL = F.getParent()->getDataLayout();
5264
5265 /// Builder - This is an IRBuilder that automatically inserts new
5266 /// instructions into the worklist when they are created.
5268 F.getContext(), TargetFolder(DL),
5269 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5270 Worklist.add(I);
5271 if (auto *Assume = dyn_cast<AssumeInst>(I))
5272 AC.registerAssumption(Assume);
5273 }));
5274
5276
5277 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5278 // by instcombiner.
5279 bool MadeIRChange = false;
5281 MadeIRChange = LowerDbgDeclare(F);
5282
5283 // Iterate while there is work to do.
5284 unsigned Iteration = 0;
5285 while (true) {
5286 ++Iteration;
5287
5288 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5289 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5290 << " on " << F.getName()
5291 << " reached; stopping without verifying fixpoint\n");
5292 break;
5293 }
5294
5295 ++NumWorklistIterations;
5296 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5297 << F.getName() << "\n");
5298
5299 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5300 ORE, BFI, BPI, PSI, DL, LI);
5302 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5303 MadeChangeInThisIteration |= IC.run();
5304 if (!MadeChangeInThisIteration)
5305 break;
5306
5307 MadeIRChange = true;
5308 if (Iteration > Opts.MaxIterations) {
5310 "Instruction Combining did not reach a fixpoint after " +
5311 Twine(Opts.MaxIterations) + " iterations",
5312 /*GenCrashDiag=*/false);
5313 }
5314 }
5315
5316 if (Iteration == 1)
5317 ++NumOneIteration;
5318 else if (Iteration == 2)
5319 ++NumTwoIterations;
5320 else if (Iteration == 3)
5321 ++NumThreeIterations;
5322 else
5323 ++NumFourOrMoreIterations;
5324
5325 return MadeIRChange;
5326}
5327
5329
5331 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5332 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5333 OS, MapClassName2PassName);
5334 OS << '<';
5335 OS << "max-iterations=" << Options.MaxIterations << ";";
5336 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5337 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5338 OS << '>';
5339}
5340
5343 auto &AC = AM.getResult<AssumptionAnalysis>(F);
5344 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
5345 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5347 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
5348
5349 // TODO: Only use LoopInfo when the option is set. This requires that the
5350 // callers in the pass pipeline explicitly set the option.
5351 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
5352 if (!LI && Options.UseLoopInfo)
5353 LI = &AM.getResult<LoopAnalysis>(F);
5354
5355 auto *AA = &AM.getResult<AAManager>(F);
5356 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
5357 ProfileSummaryInfo *PSI =
5358 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
5359 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5360 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
5362
5363 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5364 BFI, BPI, PSI, LI, Options))
5365 // No changes, all analyses are preserved.
5366 return PreservedAnalyses::all();
5367
5368 // Mark all the analyses that instcombine updates as preserved.
5371 return PA;
5372}
5373
5375 AU.setPreservesCFG();
5388}
5389
5391 if (skipFunction(F))
5392 return false;
5393
5394 // Required analyses.
5395 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5396 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5397 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5398 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5399 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5400 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5401
5402 // Optional analyses.
5403 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5404 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5405 ProfileSummaryInfo *PSI =
5406 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5407 BlockFrequencyInfo *BFI =
5408 (PSI && PSI->hasProfileSummary()) ?
5409 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5410 nullptr;
5411 BranchProbabilityInfo *BPI = nullptr;
5412 if (auto *WrapperPass =
5413 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5414 BPI = &WrapperPass->getBPI();
5415
5416 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5417 BFI, BPI, PSI, LI,
5419}
5420
5422
5425}
5426
5428 "Combine redundant instructions", false, false)
5440
5441// Initialization Routines
5444}
5445
5447 return new InstructionCombiningPass();
5448}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
Expand Atomic instructions
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static bool IsSelect(MachineInstr &MI)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1898
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:805
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Class to represent array types.
Definition: DerivedTypes.h:371
uint64_t getNumElements() const
Definition: DerivedTypes.h:383
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
Type * getElementType() const
Definition: DerivedTypes.h:384
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:390
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:193
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:453
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:564
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:423
size_t size() const
Definition: BasicBlock.h:451
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:392
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1823
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:2272
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
ConstantArray - Constant Array Declarations.
Definition: Constants.h:423
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:766
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2542
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2529
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2402
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2535
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2596
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:507
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:767
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
Definition: DataLayout.cpp:998
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:774
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:905
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:920
This is the common base class for debug info intrinsics for variables.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
void registerBranch(BranchInst *BI)
Add a branch condition to the cache.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:201
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:879
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:420
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:475
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:921
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:502
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
InstCombinePass(InstCombineOptions Opts={})
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
bool prepareWorklist(Function &F, ReversePostOrderTraversal< BasicBlock * > &RPOT)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Constant * getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth, Instruction *CxtI)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void tryToSinkInstructionDbgValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableIntrinsic * > &DbgUsers)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Definition: InstCombiner.h:157
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
Definition: InstCombiner.h:232
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
Definition: InstCombiner.h:139
TargetLibraryInfo & TLI
Definition: InstCombiner.h:73
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:366
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
Definition: InstCombiner.h:191
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
Definition: InstCombiner.h:418
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
BranchProbabilityInfo * BPI
Definition: InstCombiner.h:79
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DomConditionCache DC
Definition: InstCombiner.h:81
const bool MinimizeSize
Definition: InstCombiner.h:67
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
DominatorTree & DT
Definition: InstCombiner.h:74
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
Definition: InstCombiner.h:90
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
Definition: InstCombiner.h:213
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition: InstCombine.h:71
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void add(Instruction *I)
Add instruction to the worklist.
void push(Instruction *I)
Push the instruction onto the worklist stack.
void zap()
Check that the worklist is empty and nuke the backing store for the map.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:301
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:82
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:86
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:306
bool isShift() const
Definition: Instruction.h:259
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isIntDivRem() const
Definition: Instruction.h:258
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, BasicBlock::iterator InsertBefore)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Definition: Instructions.h:184
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
This is the common base class for memset/memcpy/memmove.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition: Metadata.h:62
This class represents min/max intrinsics.
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:76
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:109
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:103
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1396
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents a cast from signed integer to floating point.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This class represents a cast unsigned integer to floating point.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:851
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isZero() const
Definition: TypeSize.h:156
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNoFPClassCompatibleType(Type *Ty)
Returns true if this is a type legal for the 'nofpclass' attribute.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:483
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:927
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:771
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:830
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
Definition: PatternMatch.h:186
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
Definition: PatternMatch.h:515
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, FCmpInst, FCmpInst::Predicate > m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
Definition: PatternMatch.h:305
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:809
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
Exact_match< T > m_Exact(const T &SubPattern)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
Definition: PatternMatch.h:746
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:567
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
FunctionPass * createInstructionCombiningPass()
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2800
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic * > Insns, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:2241
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:148
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1650
auto successors(const MachineBasicBlock *BB)
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
gep_type_iterator gep_type_end(const User *GEP)
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition: Local.cpp:2782
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:47
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1915
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1690
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2710
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Or
Bitwise or logical OR of integers.
DWARFExpression::Operation Op
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
void initializeInstructionCombiningPassPass(PassRegistry &)
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96
SimplifyQuery getWithoutUndef() const