LLVM 19.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133static cl::opt<bool>
134EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
135 cl::init(true));
136
138 "instcombine-max-sink-users", cl::init(32),
139 cl::desc("Maximum number of undroppable users for instruction sinking"));
140
142MaxArraySize("instcombine-maxarray-size", cl::init(1024),
143 cl::desc("Maximum array size considered when doing a combine"));
144
145// FIXME: Remove this flag when it is no longer necessary to convert
146// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
147// increases variable availability at the cost of accuracy. Variables that
148// cannot be promoted by mem2reg or SROA will be described as living in memory
149// for their entire lifetime. However, passes like DSE and instcombine can
150// delete stores to the alloca, leading to misleading and inaccurate debug
151// information. This flag can be removed when those passes are fixed.
152static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
153 cl::Hidden, cl::init(true));
154
155std::optional<Instruction *>
157 // Handle target specific intrinsics
159 return TTI.instCombineIntrinsic(*this, II);
160 }
161 return std::nullopt;
162}
163
165 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
166 bool &KnownBitsComputed) {
167 // Handle target specific intrinsics
169 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
170 KnownBitsComputed);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
177 APInt &PoisonElts2, APInt &PoisonElts3,
178 std::function<void(Instruction *, unsigned, APInt, APInt &)>
179 SimplifyAndSetOp) {
180 // Handle target specific intrinsics
183 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
184 SimplifyAndSetOp);
185 }
186 return std::nullopt;
187}
188
189bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
190 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
191}
192
193Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
194 if (!RewriteGEP)
196
198 auto *Inst = dyn_cast<Instruction>(GEP);
199 if (Inst)
201
202 Value *Offset = EmitGEPOffset(GEP);
203 // If a non-trivial GEP has other uses, rewrite it to avoid duplicating
204 // the offset arithmetic.
205 if (Inst && !GEP->hasOneUse() && !GEP->hasAllConstantIndices() &&
206 !GEP->getSourceElementType()->isIntegerTy(8)) {
208 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
209 Offset, "", GEP->isInBounds()));
211 }
212 return Offset;
213}
214
215/// Legal integers and common types are considered desirable. This is used to
216/// avoid creating instructions with types that may not be supported well by the
217/// the backend.
218/// NOTE: This treats i8, i16 and i32 specially because they are common
219/// types in frontend languages.
220bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
221 switch (BitWidth) {
222 case 8:
223 case 16:
224 case 32:
225 return true;
226 default:
227 return DL.isLegalInteger(BitWidth);
228 }
229}
230
231/// Return true if it is desirable to convert an integer computation from a
232/// given bit width to a new bit width.
233/// We don't want to convert from a legal or desirable type (like i8) to an
234/// illegal type or from a smaller to a larger illegal type. A width of '1'
235/// is always treated as a desirable type because i1 is a fundamental type in
236/// IR, and there are many specialized optimizations for i1 types.
237/// Common/desirable widths are equally treated as legal to convert to, in
238/// order to open up more combining opportunities.
239bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
240 unsigned ToWidth) const {
241 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
242 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
243
244 // Convert to desirable widths even if they are not legal types.
245 // Only shrink types, to prevent infinite loops.
246 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
247 return true;
248
249 // If this is a legal or desiable integer from type, and the result would be
250 // an illegal type, don't do the transformation.
251 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
252 return false;
253
254 // Otherwise, if both are illegal, do not increase the size of the result. We
255 // do allow things like i160 -> i64, but not i64 -> i160.
256 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
257 return false;
258
259 return true;
260}
261
262/// Return true if it is desirable to convert a computation from 'From' to 'To'.
263/// We don't want to convert from a legal to an illegal type or from a smaller
264/// to a larger illegal type. i1 is always treated as a legal type because it is
265/// a fundamental type in IR, and there are many specialized optimizations for
266/// i1 types.
267bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
268 // TODO: This could be extended to allow vectors. Datalayout changes might be
269 // needed to properly support that.
270 if (!From->isIntegerTy() || !To->isIntegerTy())
271 return false;
272
273 unsigned FromWidth = From->getPrimitiveSizeInBits();
274 unsigned ToWidth = To->getPrimitiveSizeInBits();
275 return shouldChangeType(FromWidth, ToWidth);
276}
277
278// Return true, if No Signed Wrap should be maintained for I.
279// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
280// where both B and C should be ConstantInts, results in a constant that does
281// not overflow. This function only handles the Add and Sub opcodes. For
282// all other opcodes, the function conservatively returns false.
284 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
285 if (!OBO || !OBO->hasNoSignedWrap())
286 return false;
287
288 // We reason about Add and Sub Only.
289 Instruction::BinaryOps Opcode = I.getOpcode();
290 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
291 return false;
292
293 const APInt *BVal, *CVal;
294 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
295 return false;
296
297 bool Overflow = false;
298 if (Opcode == Instruction::Add)
299 (void)BVal->sadd_ov(*CVal, Overflow);
300 else
301 (void)BVal->ssub_ov(*CVal, Overflow);
302
303 return !Overflow;
304}
305
307 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
308 return OBO && OBO->hasNoUnsignedWrap();
309}
310
312 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
313 return OBO && OBO->hasNoSignedWrap();
314}
315
316/// Conservatively clears subclassOptionalData after a reassociation or
317/// commutation. We preserve fast-math flags when applicable as they can be
318/// preserved.
320 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
321 if (!FPMO) {
322 I.clearSubclassOptionalData();
323 return;
324 }
325
326 FastMathFlags FMF = I.getFastMathFlags();
327 I.clearSubclassOptionalData();
328 I.setFastMathFlags(FMF);
329}
330
331/// Combine constant operands of associative operations either before or after a
332/// cast to eliminate one of the associative operations:
333/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
334/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
336 InstCombinerImpl &IC) {
337 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
338 if (!Cast || !Cast->hasOneUse())
339 return false;
340
341 // TODO: Enhance logic for other casts and remove this check.
342 auto CastOpcode = Cast->getOpcode();
343 if (CastOpcode != Instruction::ZExt)
344 return false;
345
346 // TODO: Enhance logic for other BinOps and remove this check.
347 if (!BinOp1->isBitwiseLogicOp())
348 return false;
349
350 auto AssocOpcode = BinOp1->getOpcode();
351 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
352 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
353 return false;
354
355 Constant *C1, *C2;
356 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
357 !match(BinOp2->getOperand(1), m_Constant(C2)))
358 return false;
359
360 // TODO: This assumes a zext cast.
361 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
362 // to the destination type might lose bits.
363
364 // Fold the constants together in the destination type:
365 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
366 const DataLayout &DL = IC.getDataLayout();
367 Type *DestTy = C1->getType();
368 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
369 if (!CastC2)
370 return false;
371 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
372 if (!FoldedC)
373 return false;
374
375 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
376 IC.replaceOperand(*BinOp1, 1, FoldedC);
378 Cast->dropPoisonGeneratingFlags();
379 return true;
380}
381
382// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
383// inttoptr ( ptrtoint (x) ) --> x
384Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
385 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
386 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
387 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
388 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
389 Type *CastTy = IntToPtr->getDestTy();
390 if (PtrToInt &&
391 CastTy->getPointerAddressSpace() ==
392 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
393 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
394 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
395 return PtrToInt->getOperand(0);
396 }
397 return nullptr;
398}
399
400/// This performs a few simplifications for operators that are associative or
401/// commutative:
402///
403/// Commutative operators:
404///
405/// 1. Order operands such that they are listed from right (least complex) to
406/// left (most complex). This puts constants before unary operators before
407/// binary operators.
408///
409/// Associative operators:
410///
411/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
412/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
413///
414/// Associative and commutative operators:
415///
416/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
417/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
418/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
419/// if C1 and C2 are constants.
421 Instruction::BinaryOps Opcode = I.getOpcode();
422 bool Changed = false;
423
424 do {
425 // Order operands such that they are listed from right (least complex) to
426 // left (most complex). This puts constants before unary operators before
427 // binary operators.
428 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
429 getComplexity(I.getOperand(1)))
430 Changed = !I.swapOperands();
431
432 if (I.isCommutative()) {
433 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
434 replaceOperand(I, 0, Pair->first);
435 replaceOperand(I, 1, Pair->second);
436 Changed = true;
437 }
438 }
439
440 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
441 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
442
443 if (I.isAssociative()) {
444 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
445 if (Op0 && Op0->getOpcode() == Opcode) {
446 Value *A = Op0->getOperand(0);
447 Value *B = Op0->getOperand(1);
448 Value *C = I.getOperand(1);
449
450 // Does "B op C" simplify?
451 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
452 // It simplifies to V. Form "A op V".
453 replaceOperand(I, 0, A);
454 replaceOperand(I, 1, V);
455 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
456 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
457
458 // Conservatively clear all optional flags since they may not be
459 // preserved by the reassociation. Reset nsw/nuw based on the above
460 // analysis.
462
463 // Note: this is only valid because SimplifyBinOp doesn't look at
464 // the operands to Op0.
465 if (IsNUW)
466 I.setHasNoUnsignedWrap(true);
467
468 if (IsNSW)
469 I.setHasNoSignedWrap(true);
470
471 Changed = true;
472 ++NumReassoc;
473 continue;
474 }
475 }
476
477 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
478 if (Op1 && Op1->getOpcode() == Opcode) {
479 Value *A = I.getOperand(0);
480 Value *B = Op1->getOperand(0);
481 Value *C = Op1->getOperand(1);
482
483 // Does "A op B" simplify?
484 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
485 // It simplifies to V. Form "V op C".
486 replaceOperand(I, 0, V);
487 replaceOperand(I, 1, C);
488 // Conservatively clear the optional flags, since they may not be
489 // preserved by the reassociation.
491 Changed = true;
492 ++NumReassoc;
493 continue;
494 }
495 }
496 }
497
498 if (I.isAssociative() && I.isCommutative()) {
499 if (simplifyAssocCastAssoc(&I, *this)) {
500 Changed = true;
501 ++NumReassoc;
502 continue;
503 }
504
505 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
506 if (Op0 && Op0->getOpcode() == Opcode) {
507 Value *A = Op0->getOperand(0);
508 Value *B = Op0->getOperand(1);
509 Value *C = I.getOperand(1);
510
511 // Does "C op A" simplify?
512 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
513 // It simplifies to V. Form "V op B".
514 replaceOperand(I, 0, V);
515 replaceOperand(I, 1, B);
516 // Conservatively clear the optional flags, since they may not be
517 // preserved by the reassociation.
519 Changed = true;
520 ++NumReassoc;
521 continue;
522 }
523 }
524
525 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
526 if (Op1 && Op1->getOpcode() == Opcode) {
527 Value *A = I.getOperand(0);
528 Value *B = Op1->getOperand(0);
529 Value *C = Op1->getOperand(1);
530
531 // Does "C op A" simplify?
532 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
533 // It simplifies to V. Form "B op V".
534 replaceOperand(I, 0, B);
535 replaceOperand(I, 1, V);
536 // Conservatively clear the optional flags, since they may not be
537 // preserved by the reassociation.
539 Changed = true;
540 ++NumReassoc;
541 continue;
542 }
543 }
544
545 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
546 // if C1 and C2 are constants.
547 Value *A, *B;
548 Constant *C1, *C2, *CRes;
549 if (Op0 && Op1 &&
550 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
551 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
552 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
553 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
554 bool IsNUW = hasNoUnsignedWrap(I) &&
555 hasNoUnsignedWrap(*Op0) &&
556 hasNoUnsignedWrap(*Op1);
557 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
558 BinaryOperator::CreateNUW(Opcode, A, B) :
559 BinaryOperator::Create(Opcode, A, B);
560
561 if (isa<FPMathOperator>(NewBO)) {
562 FastMathFlags Flags = I.getFastMathFlags() &
563 Op0->getFastMathFlags() &
564 Op1->getFastMathFlags();
565 NewBO->setFastMathFlags(Flags);
566 }
567 InsertNewInstWith(NewBO, I.getIterator());
568 NewBO->takeName(Op1);
569 replaceOperand(I, 0, NewBO);
570 replaceOperand(I, 1, CRes);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
574 if (IsNUW)
575 I.setHasNoUnsignedWrap(true);
576
577 Changed = true;
578 continue;
579 }
580 }
581
582 // No further simplifications.
583 return Changed;
584 } while (true);
585}
586
587/// Return whether "X LOp (Y ROp Z)" is always equal to
588/// "(X LOp Y) ROp (X LOp Z)".
591 // X & (Y | Z) <--> (X & Y) | (X & Z)
592 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
593 if (LOp == Instruction::And)
594 return ROp == Instruction::Or || ROp == Instruction::Xor;
595
596 // X | (Y & Z) <--> (X | Y) & (X | Z)
597 if (LOp == Instruction::Or)
598 return ROp == Instruction::And;
599
600 // X * (Y + Z) <--> (X * Y) + (X * Z)
601 // X * (Y - Z) <--> (X * Y) - (X * Z)
602 if (LOp == Instruction::Mul)
603 return ROp == Instruction::Add || ROp == Instruction::Sub;
604
605 return false;
606}
607
608/// Return whether "(X LOp Y) ROp Z" is always equal to
609/// "(X ROp Z) LOp (Y ROp Z)".
613 return leftDistributesOverRight(ROp, LOp);
614
615 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
617
618 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
619 // but this requires knowing that the addition does not overflow and other
620 // such subtleties.
621}
622
623/// This function returns identity value for given opcode, which can be used to
624/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
626 if (isa<Constant>(V))
627 return nullptr;
628
629 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
630}
631
632/// This function predicates factorization using distributive laws. By default,
633/// it just returns the 'Op' inputs. But for special-cases like
634/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
635/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
636/// allow more factorization opportunities.
639 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
640 assert(Op && "Expected a binary operator");
641 LHS = Op->getOperand(0);
642 RHS = Op->getOperand(1);
643 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
644 Constant *C;
645 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
646 // X << C --> X * (1 << C)
647 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
648 return Instruction::Mul;
649 }
650 // TODO: We can add other conversions e.g. shr => div etc.
651 }
652 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
653 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
655 // lshr nneg C, X --> ashr nneg C, X
656 return Instruction::AShr;
657 }
658 }
659 return Op->getOpcode();
660}
661
662/// This tries to simplify binary operations by factorizing out common terms
663/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
666 Instruction::BinaryOps InnerOpcode, Value *A,
667 Value *B, Value *C, Value *D) {
668 assert(A && B && C && D && "All values must be provided");
669
670 Value *V = nullptr;
671 Value *RetVal = nullptr;
672 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
673 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
674
675 // Does "X op' Y" always equal "Y op' X"?
676 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
677
678 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
679 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
680 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
681 // commutative case, "(A op' B) op (C op' A)"?
682 if (A == C || (InnerCommutative && A == D)) {
683 if (A != C)
684 std::swap(C, D);
685 // Consider forming "A op' (B op D)".
686 // If "B op D" simplifies then it can be formed with no cost.
687 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
688
689 // If "B op D" doesn't simplify then only go on if one of the existing
690 // operations "A op' B" and "C op' D" will be zapped as no longer used.
691 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
692 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
693 if (V)
694 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
695 }
696 }
697
698 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
699 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
700 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
701 // commutative case, "(A op' B) op (B op' D)"?
702 if (B == D || (InnerCommutative && B == C)) {
703 if (B != D)
704 std::swap(C, D);
705 // Consider forming "(A op C) op' B".
706 // If "A op C" simplifies then it can be formed with no cost.
707 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
708
709 // If "A op C" doesn't simplify then only go on if one of the existing
710 // operations "A op' B" and "C op' D" will be zapped as no longer used.
711 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
712 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
713 if (V)
714 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
715 }
716 }
717
718 if (!RetVal)
719 return nullptr;
720
721 ++NumFactor;
722 RetVal->takeName(&I);
723
724 // Try to add no-overflow flags to the final value.
725 if (isa<OverflowingBinaryOperator>(RetVal)) {
726 bool HasNSW = false;
727 bool HasNUW = false;
728 if (isa<OverflowingBinaryOperator>(&I)) {
729 HasNSW = I.hasNoSignedWrap();
730 HasNUW = I.hasNoUnsignedWrap();
731 }
732 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
733 HasNSW &= LOBO->hasNoSignedWrap();
734 HasNUW &= LOBO->hasNoUnsignedWrap();
735 }
736
737 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
738 HasNSW &= ROBO->hasNoSignedWrap();
739 HasNUW &= ROBO->hasNoUnsignedWrap();
740 }
741
742 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
743 // We can propagate 'nsw' if we know that
744 // %Y = mul nsw i16 %X, C
745 // %Z = add nsw i16 %Y, %X
746 // =>
747 // %Z = mul nsw i16 %X, C+1
748 //
749 // iff C+1 isn't INT_MIN
750 const APInt *CInt;
751 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
752 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
753
754 // nuw can be propagated with any constant or nuw value.
755 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
756 }
757 }
758 return RetVal;
759}
760
761// If `I` has one Const operand and the other matches `(ctpop (not x))`,
762// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
763// This is only useful is the new subtract can fold so we only handle the
764// following cases:
765// 1) (add/sub/disjoint_or C, (ctpop (not x))
766// -> (add/sub/disjoint_or C', (ctpop x))
767// 1) (cmp pred C, (ctpop (not x))
768// -> (cmp pred C', (ctpop x))
770 unsigned Opc = I->getOpcode();
771 unsigned ConstIdx = 1;
772 switch (Opc) {
773 default:
774 return nullptr;
775 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
776 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
777 // is constant.
778 case Instruction::Sub:
779 ConstIdx = 0;
780 break;
781 case Instruction::ICmp:
782 // Signed predicates aren't correct in some edge cases like for i2 types, as
783 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
784 // comparisons against it are simplfied to unsigned.
785 if (cast<ICmpInst>(I)->isSigned())
786 return nullptr;
787 break;
788 case Instruction::Or:
789 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
790 return nullptr;
791 [[fallthrough]];
792 case Instruction::Add:
793 break;
794 }
795
796 Value *Op;
797 // Find ctpop.
798 if (!match(I->getOperand(1 - ConstIdx),
799 m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(Op)))))
800 return nullptr;
801
802 Constant *C;
803 // Check other operand is ImmConstant.
804 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
805 return nullptr;
806
807 Type *Ty = Op->getType();
808 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
809 // Need extra check for icmp. Note if this check is true, it generally means
810 // the icmp will simplify to true/false.
811 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality() &&
812 !ConstantExpr::getICmp(ICmpInst::ICMP_UGT, C, BitWidthC)->isZeroValue())
813 return nullptr;
814
815 // Check we can invert `(not x)` for free.
816 bool Consumes = false;
817 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
818 return nullptr;
819 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
820 assert(NotOp != nullptr &&
821 "Desync between isFreeToInvert and getFreelyInverted");
822
823 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
824
825 Value *R = nullptr;
826
827 // Do the transformation here to avoid potentially introducing an infinite
828 // loop.
829 switch (Opc) {
830 case Instruction::Sub:
831 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
832 break;
833 case Instruction::Or:
834 case Instruction::Add:
835 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
836 break;
837 case Instruction::ICmp:
838 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
839 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
840 break;
841 default:
842 llvm_unreachable("Unhandled Opcode");
843 }
844 assert(R != nullptr);
845 return replaceInstUsesWith(*I, R);
846}
847
848// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
849// IFF
850// 1) the logic_shifts match
851// 2) either both binops are binops and one is `and` or
852// BinOp1 is `and`
853// (logic_shift (inv_logic_shift C1, C), C) == C1 or
854//
855// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
856//
857// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
858// IFF
859// 1) the logic_shifts match
860// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
861//
862// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
863//
864// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
865// IFF
866// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
867// 2) Binop2 is `not`
868//
869// -> (arithmetic_shift Binop1((not X), Y), Amt)
870
872 const DataLayout &DL = I.getModule()->getDataLayout();
873 auto IsValidBinOpc = [](unsigned Opc) {
874 switch (Opc) {
875 default:
876 return false;
877 case Instruction::And:
878 case Instruction::Or:
879 case Instruction::Xor:
880 case Instruction::Add:
881 // Skip Sub as we only match constant masks which will canonicalize to use
882 // add.
883 return true;
884 }
885 };
886
887 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
888 // constraints.
889 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
890 unsigned ShOpc) {
891 assert(ShOpc != Instruction::AShr);
892 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
893 ShOpc == Instruction::Shl;
894 };
895
896 auto GetInvShift = [](unsigned ShOpc) {
897 assert(ShOpc != Instruction::AShr);
898 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
899 };
900
901 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
902 unsigned ShOpc, Constant *CMask,
903 Constant *CShift) {
904 // If the BinOp1 is `and` we don't need to check the mask.
905 if (BinOpc1 == Instruction::And)
906 return true;
907
908 // For all other possible transfers we need complete distributable
909 // binop/shift (anything but `add` + `lshr`).
910 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
911 return false;
912
913 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
914 // vecs, otherwise the mask will be simplified and the following check will
915 // handle it).
916 if (BinOpc2 == Instruction::And)
917 return true;
918
919 // Otherwise, need mask that meets the below requirement.
920 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
921 Constant *MaskInvShift =
922 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
923 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
924 CMask;
925 };
926
927 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
928 Constant *CMask, *CShift;
929 Value *X, *Y, *ShiftedX, *Mask, *Shift;
930 if (!match(I.getOperand(ShOpnum),
931 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
932 return nullptr;
933 if (!match(I.getOperand(1 - ShOpnum),
934 m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
935 return nullptr;
936
937 if (!match(ShiftedX, m_OneUse(m_Shift(m_Value(X), m_Specific(Shift)))))
938 return nullptr;
939
940 // Make sure we are matching instruction shifts and not ConstantExpr
941 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
942 auto *IX = dyn_cast<Instruction>(ShiftedX);
943 if (!IY || !IX)
944 return nullptr;
945
946 // LHS and RHS need same shift opcode
947 unsigned ShOpc = IY->getOpcode();
948 if (ShOpc != IX->getOpcode())
949 return nullptr;
950
951 // Make sure binop is real instruction and not ConstantExpr
952 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
953 if (!BO2)
954 return nullptr;
955
956 unsigned BinOpc = BO2->getOpcode();
957 // Make sure we have valid binops.
958 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
959 return nullptr;
960
961 if (ShOpc == Instruction::AShr) {
962 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
963 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
964 Value *NotX = Builder.CreateNot(X);
965 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
967 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
968 }
969
970 return nullptr;
971 }
972
973 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
974 // distribute to drop the shift irrelevant of constants.
975 if (BinOpc == I.getOpcode() &&
976 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
977 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
978 Value *NewBinOp1 = Builder.CreateBinOp(
979 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
980 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
981 }
982
983 // Otherwise we can only distribute by constant shifting the mask, so
984 // ensure we have constants.
985 if (!match(Shift, m_ImmConstant(CShift)))
986 return nullptr;
987 if (!match(Mask, m_ImmConstant(CMask)))
988 return nullptr;
989
990 // Check if we can distribute the binops.
991 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
992 return nullptr;
993
994 Constant *NewCMask =
995 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
996 Value *NewBinOp2 = Builder.CreateBinOp(
997 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
998 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
999 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1000 NewBinOp1, CShift);
1001 };
1002
1003 if (Instruction *R = MatchBinOp(0))
1004 return R;
1005 return MatchBinOp(1);
1006}
1007
1008// (Binop (zext C), (select C, T, F))
1009// -> (select C, (binop 1, T), (binop 0, F))
1010//
1011// (Binop (sext C), (select C, T, F))
1012// -> (select C, (binop -1, T), (binop 0, F))
1013//
1014// Attempt to simplify binary operations into a select with folded args, when
1015// one operand of the binop is a select instruction and the other operand is a
1016// zext/sext extension, whose value is the select condition.
1019 // TODO: this simplification may be extended to any speculatable instruction,
1020 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1021 Instruction::BinaryOps Opc = I.getOpcode();
1022 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1023 Value *A, *CondVal, *TrueVal, *FalseVal;
1024 Value *CastOp;
1025
1026 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1027 return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
1028 A->getType()->getScalarSizeInBits() == 1 &&
1029 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1030 m_Value(FalseVal)));
1031 };
1032
1033 // Make sure one side of the binop is a select instruction, and the other is a
1034 // zero/sign extension operating on a i1.
1035 if (MatchSelectAndCast(LHS, RHS))
1036 CastOp = LHS;
1037 else if (MatchSelectAndCast(RHS, LHS))
1038 CastOp = RHS;
1039 else
1040 return nullptr;
1041
1042 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1043 bool IsCastOpRHS = (CastOp == RHS);
1044 bool IsZExt = isa<ZExtInst>(CastOp);
1045 Constant *C;
1046
1047 if (IsTrueArm) {
1048 C = Constant::getNullValue(V->getType());
1049 } else if (IsZExt) {
1050 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1051 C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
1052 } else {
1053 C = Constant::getAllOnesValue(V->getType());
1054 }
1055
1056 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
1057 : Builder.CreateBinOp(Opc, C, V);
1058 };
1059
1060 // If the value used in the zext/sext is the select condition, or the negated
1061 // of the select condition, the binop can be simplified.
1062 if (CondVal == A) {
1063 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1064 return SelectInst::Create(CondVal, NewTrueVal,
1065 NewFoldedConst(true, FalseVal));
1066 }
1067
1068 if (match(A, m_Not(m_Specific(CondVal)))) {
1069 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1070 return SelectInst::Create(CondVal, NewTrueVal,
1071 NewFoldedConst(false, FalseVal));
1072 }
1073
1074 return nullptr;
1075}
1076
1078 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1079 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1080 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1081 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1082 Value *A, *B, *C, *D;
1083 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1084
1085 if (Op0)
1086 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1087 if (Op1)
1088 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1089
1090 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1091 // a common term.
1092 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1093 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1094 return V;
1095
1096 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1097 // term.
1098 if (Op0)
1099 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1100 if (Value *V =
1101 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1102 return V;
1103
1104 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1105 // term.
1106 if (Op1)
1107 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1108 if (Value *V =
1109 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1110 return V;
1111
1112 return nullptr;
1113}
1114
1115/// This tries to simplify binary operations which some other binary operation
1116/// distributes over either by factorizing out common terms
1117/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1118/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1119/// Returns the simplified value, or null if it didn't simplify.
1121 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1122 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
1123 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
1124 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1125
1126 // Factorization.
1127 if (Value *R = tryFactorizationFolds(I))
1128 return R;
1129
1130 // Expansion.
1131 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1132 // The instruction has the form "(A op' B) op C". See if expanding it out
1133 // to "(A op C) op' (B op C)" results in simplifications.
1134 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1135 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1136
1137 // Disable the use of undef because it's not safe to distribute undef.
1138 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1139 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1140 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1141
1142 // Do "A op C" and "B op C" both simplify?
1143 if (L && R) {
1144 // They do! Return "L op' R".
1145 ++NumExpand;
1146 C = Builder.CreateBinOp(InnerOpcode, L, R);
1147 C->takeName(&I);
1148 return C;
1149 }
1150
1151 // Does "A op C" simplify to the identity value for the inner opcode?
1152 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1153 // They do! Return "B op C".
1154 ++NumExpand;
1155 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1156 C->takeName(&I);
1157 return C;
1158 }
1159
1160 // Does "B op C" simplify to the identity value for the inner opcode?
1161 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1162 // They do! Return "A op C".
1163 ++NumExpand;
1164 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1165 C->takeName(&I);
1166 return C;
1167 }
1168 }
1169
1170 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1171 // The instruction has the form "A op (B op' C)". See if expanding it out
1172 // to "(A op B) op' (A op C)" results in simplifications.
1173 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1174 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1175
1176 // Disable the use of undef because it's not safe to distribute undef.
1177 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1178 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1179 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1180
1181 // Do "A op B" and "A op C" both simplify?
1182 if (L && R) {
1183 // They do! Return "L op' R".
1184 ++NumExpand;
1185 A = Builder.CreateBinOp(InnerOpcode, L, R);
1186 A->takeName(&I);
1187 return A;
1188 }
1189
1190 // Does "A op B" simplify to the identity value for the inner opcode?
1191 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1192 // They do! Return "A op C".
1193 ++NumExpand;
1194 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1195 A->takeName(&I);
1196 return A;
1197 }
1198
1199 // Does "A op C" simplify to the identity value for the inner opcode?
1200 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1201 // They do! Return "A op B".
1202 ++NumExpand;
1203 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1204 A->takeName(&I);
1205 return A;
1206 }
1207 }
1208
1210}
1211
1212static std::optional<std::pair<Value *, Value *>>
1214 if (LHS->getParent() != RHS->getParent())
1215 return std::nullopt;
1216
1217 if (LHS->getNumIncomingValues() < 2)
1218 return std::nullopt;
1219
1220 if (!equal(LHS->blocks(), RHS->blocks()))
1221 return std::nullopt;
1222
1223 Value *L0 = LHS->getIncomingValue(0);
1224 Value *R0 = RHS->getIncomingValue(0);
1225
1226 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1227 Value *L1 = LHS->getIncomingValue(I);
1228 Value *R1 = RHS->getIncomingValue(I);
1229
1230 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1231 continue;
1232
1233 return std::nullopt;
1234 }
1235
1236 return std::optional(std::pair(L0, R0));
1237}
1238
1239std::optional<std::pair<Value *, Value *>>
1240InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1241 Instruction *LHSInst = dyn_cast<Instruction>(LHS);
1242 Instruction *RHSInst = dyn_cast<Instruction>(RHS);
1243 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1244 return std::nullopt;
1245 switch (LHSInst->getOpcode()) {
1246 case Instruction::PHI:
1247 return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS));
1248 case Instruction::Select: {
1249 Value *Cond = LHSInst->getOperand(0);
1250 Value *TrueVal = LHSInst->getOperand(1);
1251 Value *FalseVal = LHSInst->getOperand(2);
1252 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1253 FalseVal == RHSInst->getOperand(1))
1254 return std::pair(TrueVal, FalseVal);
1255 return std::nullopt;
1256 }
1257 case Instruction::Call: {
1258 // Match min(a, b) and max(a, b)
1259 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1260 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1261 if (LHSMinMax && RHSMinMax &&
1262 LHSMinMax->getPredicate() ==
1264 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1265 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1266 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1267 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1268 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1269 return std::nullopt;
1270 }
1271 default:
1272 return std::nullopt;
1273 }
1274}
1275
1277 Value *LHS,
1278 Value *RHS) {
1279 Value *A, *B, *C, *D, *E, *F;
1280 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1281 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1282 if (!LHSIsSelect && !RHSIsSelect)
1283 return nullptr;
1284
1285 FastMathFlags FMF;
1287 if (isa<FPMathOperator>(&I)) {
1288 FMF = I.getFastMathFlags();
1290 }
1291
1292 Instruction::BinaryOps Opcode = I.getOpcode();
1294
1295 Value *Cond, *True = nullptr, *False = nullptr;
1296
1297 // Special-case for add/negate combination. Replace the zero in the negation
1298 // with the trailing add operand:
1299 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1300 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1301 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1302 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1303 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1304 return nullptr;
1305
1306 Value *N;
1307 if (True && match(FVal, m_Neg(m_Value(N)))) {
1308 Value *Sub = Builder.CreateSub(Z, N);
1309 return Builder.CreateSelect(Cond, True, Sub, I.getName());
1310 }
1311 if (False && match(TVal, m_Neg(m_Value(N)))) {
1312 Value *Sub = Builder.CreateSub(Z, N);
1313 return Builder.CreateSelect(Cond, Sub, False, I.getName());
1314 }
1315 return nullptr;
1316 };
1317
1318 if (LHSIsSelect && RHSIsSelect && A == D) {
1319 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1320 Cond = A;
1321 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1322 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1323
1324 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1325 if (False && !True)
1326 True = Builder.CreateBinOp(Opcode, B, E);
1327 else if (True && !False)
1328 False = Builder.CreateBinOp(Opcode, C, F);
1329 }
1330 } else if (LHSIsSelect && LHS->hasOneUse()) {
1331 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1332 Cond = A;
1333 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1334 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1335 if (Value *NewSel = foldAddNegate(B, C, RHS))
1336 return NewSel;
1337 } else if (RHSIsSelect && RHS->hasOneUse()) {
1338 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1339 Cond = D;
1340 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1341 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1342 if (Value *NewSel = foldAddNegate(E, F, LHS))
1343 return NewSel;
1344 }
1345
1346 if (!True || !False)
1347 return nullptr;
1348
1349 Value *SI = Builder.CreateSelect(Cond, True, False);
1350 SI->takeName(&I);
1351 return SI;
1352}
1353
1354/// Freely adapt every user of V as-if V was changed to !V.
1355/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1357 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1358 for (User *U : make_early_inc_range(I->users())) {
1359 if (U == IgnoredUser)
1360 continue; // Don't consider this user.
1361 switch (cast<Instruction>(U)->getOpcode()) {
1362 case Instruction::Select: {
1363 auto *SI = cast<SelectInst>(U);
1364 SI->swapValues();
1365 SI->swapProfMetadata();
1366 break;
1367 }
1368 case Instruction::Br: {
1369 BranchInst *BI = cast<BranchInst>(U);
1370 BI->swapSuccessors(); // swaps prof metadata too
1371 if (BPI)
1373 break;
1374 }
1375 case Instruction::Xor:
1376 replaceInstUsesWith(cast<Instruction>(*U), I);
1377 // Add to worklist for DCE.
1378 addToWorklist(cast<Instruction>(U));
1379 break;
1380 default:
1381 llvm_unreachable("Got unexpected user - out of sync with "
1382 "canFreelyInvertAllUsersOf() ?");
1383 }
1384 }
1385}
1386
1387/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1388/// constant zero (which is the 'negate' form).
1389Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1390 Value *NegV;
1391 if (match(V, m_Neg(m_Value(NegV))))
1392 return NegV;
1393
1394 // Constants can be considered to be negated values if they can be folded.
1395 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
1396 return ConstantExpr::getNeg(C);
1397
1398 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
1399 if (C->getType()->getElementType()->isIntegerTy())
1400 return ConstantExpr::getNeg(C);
1401
1402 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
1403 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1404 Constant *Elt = CV->getAggregateElement(i);
1405 if (!Elt)
1406 return nullptr;
1407
1408 if (isa<UndefValue>(Elt))
1409 continue;
1410
1411 if (!isa<ConstantInt>(Elt))
1412 return nullptr;
1413 }
1414 return ConstantExpr::getNeg(CV);
1415 }
1416
1417 // Negate integer vector splats.
1418 if (auto *CV = dyn_cast<Constant>(V))
1419 if (CV->getType()->isVectorTy() &&
1420 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1421 return ConstantExpr::getNeg(CV);
1422
1423 return nullptr;
1424}
1425
1426// Try to fold:
1427// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1428// -> ({s|u}itofp (int_binop x, y))
1429// 2) (fp_binop ({s|u}itofp x), FpC)
1430// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1431//
1432// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1433Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1434 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1436
1437 Type *FPTy = BO.getType();
1438 Type *IntTy = IntOps[0]->getType();
1439
1440 unsigned IntSz = IntTy->getScalarSizeInBits();
1441 // This is the maximum number of inuse bits by the integer where the int -> fp
1442 // casts are exact.
1443 unsigned MaxRepresentableBits =
1445
1446 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1447 // checks later on.
1448 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1449
1450 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1451 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1452 auto IsNonZero = [&](unsigned OpNo) -> bool {
1453 if (OpsKnown[OpNo].hasKnownBits() &&
1454 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1455 return true;
1456 return isKnownNonZero(IntOps[OpNo], SQ);
1457 };
1458
1459 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1460 // NB: This matches the impl in ValueTracking, we just try to use cached
1461 // knownbits here. If we ever start supporting WithCache for
1462 // `isKnownNonNegative`, change this to an explicit call.
1463 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1464 };
1465
1466 // Check if we know for certain that ({s|u}itofp op) is exact.
1467 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1468 // Can we treat this operand as the desired sign?
1469 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1470 !IsNonNeg(OpNo))
1471 return false;
1472
1473 // If fp precision >= bitwidth(op) then its exact.
1474 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1475 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1476 // handled specially. We can't, however, increase the bound arbitrarily for
1477 // `sitofp` as for larger sizes, it won't sign extend.
1478 if (MaxRepresentableBits < IntSz) {
1479 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1480 // numSignBits(op).
1481 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1482 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1483 if (OpsFromSigned)
1484 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1485 // Finally for unsigned check that fp precision >= bitwidth(op) -
1486 // numLeadingZeros(op).
1487 else {
1488 NumUsedLeadingBits[OpNo] =
1489 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1490 }
1491 }
1492 // NB: We could also check if op is known to be a power of 2 or zero (which
1493 // will always be representable). Its unlikely, however, that is we are
1494 // unable to bound op in any way we will be able to pass the overflow checks
1495 // later on.
1496
1497 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1498 return false;
1499 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1500 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1501 IsNonZero(OpNo);
1502 };
1503
1504 // If we have a constant rhs, see if we can losslessly convert it to an int.
1505 if (Op1FpC != nullptr) {
1506 // Signed + Mul req non-zero
1507 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1508 !match(Op1FpC, m_NonZeroFP()))
1509 return nullptr;
1510
1512 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1513 IntTy, DL);
1514 if (Op1IntC == nullptr)
1515 return nullptr;
1516 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1517 : Instruction::UIToFP,
1518 Op1IntC, FPTy, DL) != Op1FpC)
1519 return nullptr;
1520
1521 // First try to keep sign of cast the same.
1522 IntOps[1] = Op1IntC;
1523 }
1524
1525 // Ensure lhs/rhs integer types match.
1526 if (IntTy != IntOps[1]->getType())
1527 return nullptr;
1528
1529 if (Op1FpC == nullptr) {
1530 if (!IsValidPromotion(1))
1531 return nullptr;
1532 }
1533 if (!IsValidPromotion(0))
1534 return nullptr;
1535
1536 // Final we check if the integer version of the binop will not overflow.
1538 // Because of the precision check, we can often rule out overflows.
1539 bool NeedsOverflowCheck = true;
1540 // Try to conservatively rule out overflow based on the already done precision
1541 // checks.
1542 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1543 unsigned OverflowMaxCurBits =
1544 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1545 bool OutputSigned = OpsFromSigned;
1546 switch (BO.getOpcode()) {
1547 case Instruction::FAdd:
1548 IntOpc = Instruction::Add;
1549 OverflowMaxOutputBits += OverflowMaxCurBits;
1550 break;
1551 case Instruction::FSub:
1552 IntOpc = Instruction::Sub;
1553 OverflowMaxOutputBits += OverflowMaxCurBits;
1554 break;
1555 case Instruction::FMul:
1556 IntOpc = Instruction::Mul;
1557 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1558 break;
1559 default:
1560 llvm_unreachable("Unsupported binop");
1561 }
1562 // The precision check may have already ruled out overflow.
1563 if (OverflowMaxOutputBits < IntSz) {
1564 NeedsOverflowCheck = false;
1565 // We can bound unsigned overflow from sub to in range signed value (this is
1566 // what allows us to avoid the overflow check for sub).
1567 if (IntOpc == Instruction::Sub)
1568 OutputSigned = true;
1569 }
1570
1571 // Precision check did not rule out overflow, so need to check.
1572 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1573 // `IntOps[...]` arguments to `KnownOps[...]`.
1574 if (NeedsOverflowCheck &&
1575 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1576 return nullptr;
1577
1578 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1579 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1580 IntBO->setHasNoSignedWrap(OutputSigned);
1581 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1582 }
1583 if (OutputSigned)
1584 return new SIToFPInst(IntBinOp, FPTy);
1585 return new UIToFPInst(IntBinOp, FPTy);
1586}
1587
1588// Try to fold:
1589// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1590// -> ({s|u}itofp (int_binop x, y))
1591// 2) (fp_binop ({s|u}itofp x), FpC)
1592// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1593Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1594 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1595 Constant *Op1FpC = nullptr;
1596 // Check for:
1597 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1598 // 2) (binop ({s|u}itofp x), FpC)
1599 if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
1600 !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
1601 return nullptr;
1602
1603 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1604 !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
1605 !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
1606 return nullptr;
1607
1608 // Cache KnownBits a bit to potentially save some analysis.
1609 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1610
1611 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1612 // different constraints depending on the sign of the cast.
1613 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1614 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1615 IntOps, Op1FpC, OpsKnown))
1616 return R;
1617 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1618 Op1FpC, OpsKnown);
1619}
1620
1621/// A binop with a constant operand and a sign-extended boolean operand may be
1622/// converted into a select of constants by applying the binary operation to
1623/// the constant with the two possible values of the extended boolean (0 or -1).
1624Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1625 // TODO: Handle non-commutative binop (constant is operand 0).
1626 // TODO: Handle zext.
1627 // TODO: Peek through 'not' of cast.
1628 Value *BO0 = BO.getOperand(0);
1629 Value *BO1 = BO.getOperand(1);
1630 Value *X;
1631 Constant *C;
1632 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1633 !X->getType()->isIntOrIntVectorTy(1))
1634 return nullptr;
1635
1636 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1639 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1640 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1641 return SelectInst::Create(X, TVal, FVal);
1642}
1643
1645 SelectInst *SI,
1646 bool IsTrueArm) {
1647 SmallVector<Constant *> ConstOps;
1648 for (Value *Op : I.operands()) {
1649 CmpInst::Predicate Pred;
1650 Constant *C = nullptr;
1651 if (Op == SI) {
1652 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1653 : SI->getFalseValue());
1654 } else if (match(SI->getCondition(),
1655 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1656 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1658 // Pass
1659 } else {
1660 C = dyn_cast<Constant>(Op);
1661 }
1662 if (C == nullptr)
1663 return nullptr;
1664
1665 ConstOps.push_back(C);
1666 }
1667
1668 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1669}
1670
1672 Value *NewOp, InstCombiner &IC) {
1673 Instruction *Clone = I.clone();
1674 Clone->replaceUsesOfWith(SI, NewOp);
1676 IC.InsertNewInstBefore(Clone, SI->getIterator());
1677 return Clone;
1678}
1679
1681 bool FoldWithMultiUse) {
1682 // Don't modify shared select instructions unless set FoldWithMultiUse
1683 if (!SI->hasOneUse() && !FoldWithMultiUse)
1684 return nullptr;
1685
1686 Value *TV = SI->getTrueValue();
1687 Value *FV = SI->getFalseValue();
1688 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1689 return nullptr;
1690
1691 // Bool selects with constant operands can be folded to logical ops.
1692 if (SI->getType()->isIntOrIntVectorTy(1))
1693 return nullptr;
1694
1695 // Test if a FCmpInst instruction is used exclusively by a select as
1696 // part of a minimum or maximum operation. If so, refrain from doing
1697 // any other folding. This helps out other analyses which understand
1698 // non-obfuscated minimum and maximum idioms. And in this case, at
1699 // least one of the comparison operands has at least one user besides
1700 // the compare (the select), which would often largely negate the
1701 // benefit of folding anyway.
1702 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1703 if (CI->hasOneUse()) {
1704 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1705 if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1))
1706 return nullptr;
1707 }
1708 }
1709
1710 // Make sure that one of the select arms constant folds successfully.
1711 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1712 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1713 if (!NewTV && !NewFV)
1714 return nullptr;
1715
1716 // Create an instruction for the arm that did not fold.
1717 if (!NewTV)
1718 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1719 if (!NewFV)
1720 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1721 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1722}
1723
1725 Value *InValue, BasicBlock *InBB,
1726 const DataLayout &DL,
1727 const SimplifyQuery SQ) {
1728 // NB: It is a precondition of this transform that the operands be
1729 // phi translatable! This is usually trivially satisfied by limiting it
1730 // to constant ops, and for selects we do a more sophisticated check.
1732 for (Value *Op : I.operands()) {
1733 if (Op == PN)
1734 Ops.push_back(InValue);
1735 else
1736 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1737 }
1738
1739 // Don't consider the simplification successful if we get back a constant
1740 // expression. That's just an instruction in hiding.
1741 // Also reject the case where we simplify back to the phi node. We wouldn't
1742 // be able to remove it in that case.
1744 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1745 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1746 return NewVal;
1747
1748 // Check if incoming PHI value can be replaced with constant
1749 // based on implied condition.
1750 BranchInst *TerminatorBI = dyn_cast<BranchInst>(InBB->getTerminator());
1751 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1752 if (TerminatorBI && TerminatorBI->isConditional() &&
1753 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1754 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1755 std::optional<bool> ImpliedCond =
1756 isImpliedCondition(TerminatorBI->getCondition(), ICmp->getPredicate(),
1757 Ops[0], Ops[1], DL, LHSIsTrue);
1758 if (ImpliedCond)
1759 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1760 }
1761
1762 return nullptr;
1763}
1764
1766 unsigned NumPHIValues = PN->getNumIncomingValues();
1767 if (NumPHIValues == 0)
1768 return nullptr;
1769
1770 // We normally only transform phis with a single use. However, if a PHI has
1771 // multiple uses and they are all the same operation, we can fold *all* of the
1772 // uses into the PHI.
1773 if (!PN->hasOneUse()) {
1774 // Walk the use list for the instruction, comparing them to I.
1775 for (User *U : PN->users()) {
1776 Instruction *UI = cast<Instruction>(U);
1777 if (UI != &I && !I.isIdenticalTo(UI))
1778 return nullptr;
1779 }
1780 // Otherwise, we can replace *all* users with the new PHI we form.
1781 }
1782
1783 // Check to see whether the instruction can be folded into each phi operand.
1784 // If there is one operand that does not fold, remember the BB it is in.
1785 // If there is more than one or if *it* is a PHI, bail out.
1786 SmallVector<Value *> NewPhiValues;
1787 BasicBlock *NonSimplifiedBB = nullptr;
1788 Value *NonSimplifiedInVal = nullptr;
1789 for (unsigned i = 0; i != NumPHIValues; ++i) {
1790 Value *InVal = PN->getIncomingValue(i);
1791 BasicBlock *InBB = PN->getIncomingBlock(i);
1792
1793 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1794 NewPhiValues.push_back(NewVal);
1795 continue;
1796 }
1797
1798 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1799
1800 NonSimplifiedBB = InBB;
1801 NonSimplifiedInVal = InVal;
1802 NewPhiValues.push_back(nullptr);
1803
1804 // If the InVal is an invoke at the end of the pred block, then we can't
1805 // insert a computation after it without breaking the edge.
1806 if (isa<InvokeInst>(InVal))
1807 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1808 return nullptr;
1809
1810 // If the incoming non-constant value is reachable from the phis block,
1811 // we'll push the operation across a loop backedge. This could result in
1812 // an infinite combine loop, and is generally non-profitable (especially
1813 // if the operation was originally outside the loop).
1814 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1815 LI))
1816 return nullptr;
1817 }
1818
1819 // If there is exactly one non-simplified value, we can insert a copy of the
1820 // operation in that block. However, if this is a critical edge, we would be
1821 // inserting the computation on some other paths (e.g. inside a loop). Only
1822 // do this if the pred block is unconditionally branching into the phi block.
1823 // Also, make sure that the pred block is not dead code.
1824 if (NonSimplifiedBB != nullptr) {
1825 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1826 if (!BI || !BI->isUnconditional() ||
1827 !DT.isReachableFromEntry(NonSimplifiedBB))
1828 return nullptr;
1829 }
1830
1831 // Okay, we can do the transformation: create the new PHI node.
1832 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1833 InsertNewInstBefore(NewPN, PN->getIterator());
1834 NewPN->takeName(PN);
1835 NewPN->setDebugLoc(PN->getDebugLoc());
1836
1837 // If we are going to have to insert a new computation, do so right before the
1838 // predecessor's terminator.
1839 Instruction *Clone = nullptr;
1840 if (NonSimplifiedBB) {
1841 Clone = I.clone();
1842 for (Use &U : Clone->operands()) {
1843 if (U == PN)
1844 U = NonSimplifiedInVal;
1845 else
1846 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1847 }
1848 InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
1849 }
1850
1851 for (unsigned i = 0; i != NumPHIValues; ++i) {
1852 if (NewPhiValues[i])
1853 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1854 else
1855 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1856 }
1857
1858 for (User *U : make_early_inc_range(PN->users())) {
1859 Instruction *User = cast<Instruction>(U);
1860 if (User == &I) continue;
1861 replaceInstUsesWith(*User, NewPN);
1863 }
1864
1865 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1866 const_cast<PHINode &>(*NewPN),
1867 const_cast<PHINode &>(*PN), DT);
1868 return replaceInstUsesWith(I, NewPN);
1869}
1870
1872 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1873 // we are guarding against replicating the binop in >1 predecessor.
1874 // This could miss matching a phi with 2 constant incoming values.
1875 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1876 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1877 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1878 Phi0->getNumOperands() != Phi1->getNumOperands())
1879 return nullptr;
1880
1881 // TODO: Remove the restriction for binop being in the same block as the phis.
1882 if (BO.getParent() != Phi0->getParent() ||
1883 BO.getParent() != Phi1->getParent())
1884 return nullptr;
1885
1886 // Fold if there is at least one specific constant value in phi0 or phi1's
1887 // incoming values that comes from the same block and this specific constant
1888 // value can be used to do optimization for specific binary operator.
1889 // For example:
1890 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1891 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1892 // %add = add i32 %phi0, %phi1
1893 // ==>
1894 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1896 /*AllowRHSConstant*/ false);
1897 if (C) {
1898 SmallVector<Value *, 4> NewIncomingValues;
1899 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1900 auto &Phi0Use = std::get<0>(T);
1901 auto &Phi1Use = std::get<1>(T);
1902 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1903 return false;
1904 Value *Phi0UseV = Phi0Use.get();
1905 Value *Phi1UseV = Phi1Use.get();
1906 if (Phi0UseV == C)
1907 NewIncomingValues.push_back(Phi1UseV);
1908 else if (Phi1UseV == C)
1909 NewIncomingValues.push_back(Phi0UseV);
1910 else
1911 return false;
1912 return true;
1913 };
1914
1915 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1916 CanFoldIncomingValuePair)) {
1917 PHINode *NewPhi =
1918 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1919 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1920 "The number of collected incoming values should equal the number "
1921 "of the original PHINode operands!");
1922 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1923 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1924 return NewPhi;
1925 }
1926 }
1927
1928 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1929 return nullptr;
1930
1931 // Match a pair of incoming constants for one of the predecessor blocks.
1932 BasicBlock *ConstBB, *OtherBB;
1933 Constant *C0, *C1;
1934 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1935 ConstBB = Phi0->getIncomingBlock(0);
1936 OtherBB = Phi0->getIncomingBlock(1);
1937 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1938 ConstBB = Phi0->getIncomingBlock(1);
1939 OtherBB = Phi0->getIncomingBlock(0);
1940 } else {
1941 return nullptr;
1942 }
1943 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1944 return nullptr;
1945
1946 // The block that we are hoisting to must reach here unconditionally.
1947 // Otherwise, we could be speculatively executing an expensive or
1948 // non-speculative op.
1949 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1950 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1951 !DT.isReachableFromEntry(OtherBB))
1952 return nullptr;
1953
1954 // TODO: This check could be tightened to only apply to binops (div/rem) that
1955 // are not safe to speculatively execute. But that could allow hoisting
1956 // potentially expensive instructions (fdiv for example).
1957 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1959 return nullptr;
1960
1961 // Fold constants for the predecessor block with constant incoming values.
1962 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1963 if (!NewC)
1964 return nullptr;
1965
1966 // Make a new binop in the predecessor block with the non-constant incoming
1967 // values.
1968 Builder.SetInsertPoint(PredBlockBranch);
1969 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1970 Phi0->getIncomingValueForBlock(OtherBB),
1971 Phi1->getIncomingValueForBlock(OtherBB));
1972 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1973 NotFoldedNewBO->copyIRFlags(&BO);
1974
1975 // Replace the binop with a phi of the new values. The old phis are dead.
1976 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1977 NewPhi->addIncoming(NewBO, OtherBB);
1978 NewPhi->addIncoming(NewC, ConstBB);
1979 return NewPhi;
1980}
1981
1983 if (!isa<Constant>(I.getOperand(1)))
1984 return nullptr;
1985
1986 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1987 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1988 return NewSel;
1989 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1990 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1991 return NewPhi;
1992 }
1993 return nullptr;
1994}
1995
1997 // If this GEP has only 0 indices, it is the same pointer as
1998 // Src. If Src is not a trivial GEP too, don't combine
1999 // the indices.
2000 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2001 !Src.hasOneUse())
2002 return false;
2003 return true;
2004}
2005
2007 if (!isa<VectorType>(Inst.getType()))
2008 return nullptr;
2009
2010 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2011 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2012 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2013 cast<VectorType>(Inst.getType())->getElementCount());
2014 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2015 cast<VectorType>(Inst.getType())->getElementCount());
2016
2017 // If both operands of the binop are vector concatenations, then perform the
2018 // narrow binop on each pair of the source operands followed by concatenation
2019 // of the results.
2020 Value *L0, *L1, *R0, *R1;
2021 ArrayRef<int> Mask;
2022 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2023 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2024 LHS->hasOneUse() && RHS->hasOneUse() &&
2025 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2026 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2027 // This transform does not have the speculative execution constraint as
2028 // below because the shuffle is a concatenation. The new binops are
2029 // operating on exactly the same elements as the existing binop.
2030 // TODO: We could ease the mask requirement to allow different undef lanes,
2031 // but that requires an analysis of the binop-with-undef output value.
2032 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2033 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2034 BO->copyIRFlags(&Inst);
2035 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2036 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2037 BO->copyIRFlags(&Inst);
2038 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2039 }
2040
2041 auto createBinOpReverse = [&](Value *X, Value *Y) {
2042 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2043 if (auto *BO = dyn_cast<BinaryOperator>(V))
2044 BO->copyIRFlags(&Inst);
2045 Module *M = Inst.getModule();
2046 Function *F =
2047 Intrinsic::getDeclaration(M, Intrinsic::vector_reverse, V->getType());
2048 return CallInst::Create(F, V);
2049 };
2050
2051 // NOTE: Reverse shuffles don't require the speculative execution protection
2052 // below because they don't affect which lanes take part in the computation.
2053
2054 Value *V1, *V2;
2055 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2056 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2057 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2058 (LHS->hasOneUse() || RHS->hasOneUse() ||
2059 (LHS == RHS && LHS->hasNUses(2))))
2060 return createBinOpReverse(V1, V2);
2061
2062 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2063 if (LHS->hasOneUse() && isSplatValue(RHS))
2064 return createBinOpReverse(V1, RHS);
2065 }
2066 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2067 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2068 return createBinOpReverse(LHS, V2);
2069
2070 // It may not be safe to reorder shuffles and things like div, urem, etc.
2071 // because we may trap when executing those ops on unknown vector elements.
2072 // See PR20059.
2073 if (!isSafeToSpeculativelyExecute(&Inst))
2074 return nullptr;
2075
2076 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2077 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2078 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2079 BO->copyIRFlags(&Inst);
2080 return new ShuffleVectorInst(XY, M);
2081 };
2082
2083 // If both arguments of the binary operation are shuffles that use the same
2084 // mask and shuffle within a single vector, move the shuffle after the binop.
2085 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2086 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2087 V1->getType() == V2->getType() &&
2088 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2089 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2090 return createBinOpShuffle(V1, V2, Mask);
2091 }
2092
2093 // If both arguments of a commutative binop are select-shuffles that use the
2094 // same mask with commuted operands, the shuffles are unnecessary.
2095 if (Inst.isCommutative() &&
2096 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2097 match(RHS,
2098 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2099 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2100 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2101 // TODO: Allow shuffles that contain undefs in the mask?
2102 // That is legal, but it reduces undef knowledge.
2103 // TODO: Allow arbitrary shuffles by shuffling after binop?
2104 // That might be legal, but we have to deal with poison.
2105 if (LShuf->isSelect() &&
2106 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2107 RShuf->isSelect() &&
2108 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2109 // Example:
2110 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2111 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2112 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2113 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2114 NewBO->copyIRFlags(&Inst);
2115 return NewBO;
2116 }
2117 }
2118
2119 // If one argument is a shuffle within one vector and the other is a constant,
2120 // try moving the shuffle after the binary operation. This canonicalization
2121 // intends to move shuffles closer to other shuffles and binops closer to
2122 // other binops, so they can be folded. It may also enable demanded elements
2123 // transforms.
2124 Constant *C;
2125 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
2126 if (InstVTy &&
2128 m_Mask(Mask))),
2129 m_ImmConstant(C))) &&
2130 cast<FixedVectorType>(V1->getType())->getNumElements() <=
2131 InstVTy->getNumElements()) {
2132 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
2133 "Shuffle should not change scalar type");
2134
2135 // Find constant NewC that has property:
2136 // shuffle(NewC, ShMask) = C
2137 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
2138 // reorder is not possible. A 1-to-1 mapping is not required. Example:
2139 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
2140 bool ConstOp1 = isa<Constant>(RHS);
2141 ArrayRef<int> ShMask = Mask;
2142 unsigned SrcVecNumElts =
2143 cast<FixedVectorType>(V1->getType())->getNumElements();
2144 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2145 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
2146 bool MayChange = true;
2147 unsigned NumElts = InstVTy->getNumElements();
2148 for (unsigned I = 0; I < NumElts; ++I) {
2149 Constant *CElt = C->getAggregateElement(I);
2150 if (ShMask[I] >= 0) {
2151 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2152 Constant *NewCElt = NewVecC[ShMask[I]];
2153 // Bail out if:
2154 // 1. The constant vector contains a constant expression.
2155 // 2. The shuffle needs an element of the constant vector that can't
2156 // be mapped to a new constant vector.
2157 // 3. This is a widening shuffle that copies elements of V1 into the
2158 // extended elements (extending with poison is allowed).
2159 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2160 I >= SrcVecNumElts) {
2161 MayChange = false;
2162 break;
2163 }
2164 NewVecC[ShMask[I]] = CElt;
2165 }
2166 // If this is a widening shuffle, we must be able to extend with poison
2167 // elements. If the original binop does not produce a poison in the high
2168 // lanes, then this transform is not safe.
2169 // Similarly for poison lanes due to the shuffle mask, we can only
2170 // transform binops that preserve poison.
2171 // TODO: We could shuffle those non-poison constant values into the
2172 // result by using a constant vector (rather than an poison vector)
2173 // as operand 1 of the new binop, but that might be too aggressive
2174 // for target-independent shuffle creation.
2175 if (I >= SrcVecNumElts || ShMask[I] < 0) {
2176 Constant *MaybePoison =
2177 ConstOp1
2178 ? ConstantFoldBinaryOpOperands(Opcode, PoisonScalar, CElt, DL)
2179 : ConstantFoldBinaryOpOperands(Opcode, CElt, PoisonScalar, DL);
2180 if (!MaybePoison || !isa<PoisonValue>(MaybePoison)) {
2181 MayChange = false;
2182 break;
2183 }
2184 }
2185 }
2186 if (MayChange) {
2187 Constant *NewC = ConstantVector::get(NewVecC);
2188 // It may not be safe to execute a binop on a vector with poison elements
2189 // because the entire instruction can be folded to undef or create poison
2190 // that did not exist in the original code.
2191 // TODO: The shift case should not be necessary.
2192 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
2193 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2194
2195 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2196 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2197 Value *NewLHS = ConstOp1 ? V1 : NewC;
2198 Value *NewRHS = ConstOp1 ? NewC : V1;
2199 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2200 }
2201 }
2202
2203 // Try to reassociate to sink a splat shuffle after a binary operation.
2204 if (Inst.isAssociative() && Inst.isCommutative()) {
2205 // Canonicalize shuffle operand as LHS.
2206 if (isa<ShuffleVectorInst>(RHS))
2207 std::swap(LHS, RHS);
2208
2209 Value *X;
2210 ArrayRef<int> MaskC;
2211 int SplatIndex;
2212 Value *Y, *OtherOp;
2213 if (!match(LHS,
2214 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2215 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2216 X->getType() != Inst.getType() ||
2217 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2218 return nullptr;
2219
2220 // FIXME: This may not be safe if the analysis allows undef elements. By
2221 // moving 'Y' before the splat shuffle, we are implicitly assuming
2222 // that it is not undef/poison at the splat index.
2223 if (isSplatValue(OtherOp, SplatIndex)) {
2224 std::swap(Y, OtherOp);
2225 } else if (!isSplatValue(Y, SplatIndex)) {
2226 return nullptr;
2227 }
2228
2229 // X and Y are splatted values, so perform the binary operation on those
2230 // values followed by a splat followed by the 2nd binary operation:
2231 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2232 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2233 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2234 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2235 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2236
2237 // Intersect FMF on both new binops. Other (poison-generating) flags are
2238 // dropped to be safe.
2239 if (isa<FPMathOperator>(R)) {
2240 R->copyFastMathFlags(&Inst);
2241 R->andIRFlags(RHS);
2242 }
2243 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2244 NewInstBO->copyIRFlags(R);
2245 return R;
2246 }
2247
2248 return nullptr;
2249}
2250
2251/// Try to narrow the width of a binop if at least 1 operand is an extend of
2252/// of a value. This requires a potentially expensive known bits check to make
2253/// sure the narrow op does not overflow.
2254Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2255 // We need at least one extended operand.
2256 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2257
2258 // If this is a sub, we swap the operands since we always want an extension
2259 // on the RHS. The LHS can be an extension or a constant.
2260 if (BO.getOpcode() == Instruction::Sub)
2261 std::swap(Op0, Op1);
2262
2263 Value *X;
2264 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2265 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2266 return nullptr;
2267
2268 // If both operands are the same extension from the same source type and we
2269 // can eliminate at least one (hasOneUse), this might work.
2270 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2271 Value *Y;
2272 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2273 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2274 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2275 // If that did not match, see if we have a suitable constant operand.
2276 // Truncating and extending must produce the same constant.
2277 Constant *WideC;
2278 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2279 return nullptr;
2280 Constant *NarrowC = getLosslessTrunc(WideC, X->getType(), CastOpc);
2281 if (!NarrowC)
2282 return nullptr;
2283 Y = NarrowC;
2284 }
2285
2286 // Swap back now that we found our operands.
2287 if (BO.getOpcode() == Instruction::Sub)
2288 std::swap(X, Y);
2289
2290 // Both operands have narrow versions. Last step: the math must not overflow
2291 // in the narrow width.
2292 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2293 return nullptr;
2294
2295 // bo (ext X), (ext Y) --> ext (bo X, Y)
2296 // bo (ext X), C --> ext (bo X, C')
2297 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2298 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2299 if (IsSext)
2300 NewBinOp->setHasNoSignedWrap();
2301 else
2302 NewBinOp->setHasNoUnsignedWrap();
2303 }
2304 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2305}
2306
2308 // At least one GEP must be inbounds.
2309 if (!GEP1.isInBounds() && !GEP2.isInBounds())
2310 return false;
2311
2312 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
2313 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
2314}
2315
2316/// Thread a GEP operation with constant indices through the constant true/false
2317/// arms of a select.
2319 InstCombiner::BuilderTy &Builder) {
2320 if (!GEP.hasAllConstantIndices())
2321 return nullptr;
2322
2323 Instruction *Sel;
2324 Value *Cond;
2325 Constant *TrueC, *FalseC;
2326 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2327 !match(Sel,
2328 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2329 return nullptr;
2330
2331 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2332 // Propagate 'inbounds' and metadata from existing instructions.
2333 // Note: using IRBuilder to create the constants for efficiency.
2334 SmallVector<Value *, 4> IndexC(GEP.indices());
2335 bool IsInBounds = GEP.isInBounds();
2336 Type *Ty = GEP.getSourceElementType();
2337 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
2338 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
2339 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2340}
2341
2342// Canonicalization:
2343// gep T, (gep i8, base, C1), (Index + C2) into
2344// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2346 GEPOperator *Src,
2347 InstCombinerImpl &IC) {
2348 if (GEP.getNumIndices() != 1)
2349 return nullptr;
2350 auto &DL = IC.getDataLayout();
2351 Value *Base;
2352 const APInt *C1;
2353 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2354 return nullptr;
2355 Value *VarIndex;
2356 const APInt *C2;
2357 Type *PtrTy = Src->getType()->getScalarType();
2358 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2359 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2360 return nullptr;
2361 if (C1->getBitWidth() != IndexSizeInBits ||
2362 C2->getBitWidth() != IndexSizeInBits)
2363 return nullptr;
2364 Type *BaseType = GEP.getSourceElementType();
2365 if (isa<ScalableVectorType>(BaseType))
2366 return nullptr;
2367 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2368 APInt NewOffset = TypeSize * *C2 + *C1;
2369 if (NewOffset.isZero() ||
2370 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2371 Value *GEPConst =
2372 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset));
2373 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
2374 }
2375
2376 return nullptr;
2377}
2378
2380 GEPOperator *Src) {
2381 // Combine Indices - If the source pointer to this getelementptr instruction
2382 // is a getelementptr instruction with matching element type, combine the
2383 // indices of the two getelementptr instructions into a single instruction.
2384 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2385 return nullptr;
2386
2387 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2388 return I;
2389
2390 // For constant GEPs, use a more general offset-based folding approach.
2391 Type *PtrTy = Src->getType()->getScalarType();
2392 if (GEP.hasAllConstantIndices() &&
2393 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2394 // Split Src into a variable part and a constant suffix.
2396 Type *BaseType = GTI.getIndexedType();
2397 bool IsFirstType = true;
2398 unsigned NumVarIndices = 0;
2399 for (auto Pair : enumerate(Src->indices())) {
2400 if (!isa<ConstantInt>(Pair.value())) {
2401 BaseType = GTI.getIndexedType();
2402 IsFirstType = false;
2403 NumVarIndices = Pair.index() + 1;
2404 }
2405 ++GTI;
2406 }
2407
2408 // Determine the offset for the constant suffix of Src.
2410 if (NumVarIndices != Src->getNumIndices()) {
2411 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2412 if (BaseType->isScalableTy())
2413 return nullptr;
2414
2415 SmallVector<Value *> ConstantIndices;
2416 if (!IsFirstType)
2417 ConstantIndices.push_back(
2419 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2420 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2421 }
2422
2423 // Add the offset for GEP (which is fully constant).
2424 if (!GEP.accumulateConstantOffset(DL, Offset))
2425 return nullptr;
2426
2427 APInt OffsetOld = Offset;
2428 // Convert the total offset back into indices.
2429 SmallVector<APInt> ConstIndices =
2431 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2432 // If both GEP are constant-indexed, and cannot be merged in either way,
2433 // convert them to a GEP of i8.
2434 if (Src->hasAllConstantIndices())
2435 return replaceInstUsesWith(
2437 Builder.getInt8Ty(), Src->getOperand(0),
2438 Builder.getInt(OffsetOld), "",
2439 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2440 return nullptr;
2441 }
2442
2443 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2444 SmallVector<Value *> Indices;
2445 append_range(Indices, drop_end(Src->indices(),
2446 Src->getNumIndices() - NumVarIndices));
2447 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2448 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2449 // Even if the total offset is inbounds, we may end up representing it
2450 // by first performing a larger negative offset, and then a smaller
2451 // positive one. The large negative offset might go out of bounds. Only
2452 // preserve inbounds if all signs are the same.
2453 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2454 }
2455
2456 return replaceInstUsesWith(
2457 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
2458 Indices, "", IsInBounds));
2459 }
2460
2461 if (Src->getResultElementType() != GEP.getSourceElementType())
2462 return nullptr;
2463
2464 SmallVector<Value*, 8> Indices;
2465
2466 // Find out whether the last index in the source GEP is a sequential idx.
2467 bool EndsWithSequential = false;
2468 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2469 I != E; ++I)
2470 EndsWithSequential = I.isSequential();
2471
2472 // Can we combine the two pointer arithmetics offsets?
2473 if (EndsWithSequential) {
2474 // Replace: gep (gep %P, long B), long A, ...
2475 // With: T = long A+B; gep %P, T, ...
2476 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2477 Value *GO1 = GEP.getOperand(1);
2478
2479 // If they aren't the same type, then the input hasn't been processed
2480 // by the loop above yet (which canonicalizes sequential index types to
2481 // intptr_t). Just avoid transforming this until the input has been
2482 // normalized.
2483 if (SO1->getType() != GO1->getType())
2484 return nullptr;
2485
2486 Value *Sum =
2487 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2488 // Only do the combine when we are sure the cost after the
2489 // merge is never more than that before the merge.
2490 if (Sum == nullptr)
2491 return nullptr;
2492
2493 // Update the GEP in place if possible.
2494 if (Src->getNumOperands() == 2) {
2495 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2496 replaceOperand(GEP, 0, Src->getOperand(0));
2497 replaceOperand(GEP, 1, Sum);
2498 return &GEP;
2499 }
2500 Indices.append(Src->op_begin()+1, Src->op_end()-1);
2501 Indices.push_back(Sum);
2502 Indices.append(GEP.op_begin()+2, GEP.op_end());
2503 } else if (isa<Constant>(*GEP.idx_begin()) &&
2504 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2505 Src->getNumOperands() != 1) {
2506 // Otherwise we can do the fold if the first index of the GEP is a zero
2507 Indices.append(Src->op_begin()+1, Src->op_end());
2508 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2509 }
2510
2511 if (!Indices.empty())
2512 return replaceInstUsesWith(
2514 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2515 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
2516
2517 return nullptr;
2518}
2519
2521 BuilderTy *Builder,
2522 bool &DoesConsume, unsigned Depth) {
2523 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2524 // ~(~(X)) -> X.
2525 Value *A, *B;
2526 if (match(V, m_Not(m_Value(A)))) {
2527 DoesConsume = true;
2528 return A;
2529 }
2530
2531 Constant *C;
2532 // Constants can be considered to be not'ed values.
2533 if (match(V, m_ImmConstant(C)))
2534 return ConstantExpr::getNot(C);
2535
2537 return nullptr;
2538
2539 // The rest of the cases require that we invert all uses so don't bother
2540 // doing the analysis if we know we can't use the result.
2541 if (!WillInvertAllUses)
2542 return nullptr;
2543
2544 // Compares can be inverted if all of their uses are being modified to use
2545 // the ~V.
2546 if (auto *I = dyn_cast<CmpInst>(V)) {
2547 if (Builder != nullptr)
2548 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
2549 I->getOperand(1));
2550 return NonNull;
2551 }
2552
2553 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2554 // `(-1 - B) - A` if we are willing to invert all of the uses.
2555 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
2556 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2557 DoesConsume, Depth))
2558 return Builder ? Builder->CreateSub(BV, A) : NonNull;
2559 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2560 DoesConsume, Depth))
2561 return Builder ? Builder->CreateSub(AV, B) : NonNull;
2562 return nullptr;
2563 }
2564
2565 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2566 // into `A ^ B` if we are willing to invert all of the uses.
2567 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
2568 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2569 DoesConsume, Depth))
2570 return Builder ? Builder->CreateXor(A, BV) : NonNull;
2571 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2572 DoesConsume, Depth))
2573 return Builder ? Builder->CreateXor(AV, B) : NonNull;
2574 return nullptr;
2575 }
2576
2577 // If `V` is of the form `B - A` then `-1 - V` can be folded into
2578 // `A + (-1 - B)` if we are willing to invert all of the uses.
2579 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
2580 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2581 DoesConsume, Depth))
2582 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
2583 return nullptr;
2584 }
2585
2586 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
2587 // into `A s>> B` if we are willing to invert all of the uses.
2588 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
2589 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2590 DoesConsume, Depth))
2591 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
2592 return nullptr;
2593 }
2594
2595 Value *Cond;
2596 // LogicOps are special in that we canonicalize them at the cost of an
2597 // instruction.
2598 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
2599 !shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(V));
2600 // Selects/min/max with invertible operands are freely invertible
2601 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
2602 bool LocalDoesConsume = DoesConsume;
2603 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
2604 LocalDoesConsume, Depth))
2605 return nullptr;
2606 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2607 LocalDoesConsume, Depth)) {
2608 DoesConsume = LocalDoesConsume;
2609 if (Builder != nullptr) {
2610 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2611 DoesConsume, Depth);
2612 assert(NotB != nullptr &&
2613 "Unable to build inverted value for known freely invertable op");
2614 if (auto *II = dyn_cast<IntrinsicInst>(V))
2616 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
2617 return Builder->CreateSelect(Cond, NotA, NotB);
2618 }
2619 return NonNull;
2620 }
2621 }
2622
2623 if (PHINode *PN = dyn_cast<PHINode>(V)) {
2624 bool LocalDoesConsume = DoesConsume;
2626 for (Use &U : PN->operands()) {
2627 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
2628 Value *NewIncomingVal = getFreelyInvertedImpl(
2629 U.get(), /*WillInvertAllUses=*/false,
2630 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
2631 if (NewIncomingVal == nullptr)
2632 return nullptr;
2633 // Make sure that we can safely erase the original PHI node.
2634 if (NewIncomingVal == V)
2635 return nullptr;
2636 if (Builder != nullptr)
2637 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
2638 }
2639
2640 DoesConsume = LocalDoesConsume;
2641 if (Builder != nullptr) {
2644 PHINode *NewPN =
2645 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
2646 for (auto [Val, Pred] : IncomingValues)
2647 NewPN->addIncoming(Val, Pred);
2648 return NewPN;
2649 }
2650 return NonNull;
2651 }
2652
2653 if (match(V, m_SExtLike(m_Value(A)))) {
2654 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2655 DoesConsume, Depth))
2656 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
2657 return nullptr;
2658 }
2659
2660 if (match(V, m_Trunc(m_Value(A)))) {
2661 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2662 DoesConsume, Depth))
2663 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
2664 return nullptr;
2665 }
2666
2667 // De Morgan's Laws:
2668 // (~(A | B)) -> (~A & ~B)
2669 // (~(A & B)) -> (~A | ~B)
2670 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
2671 bool IsLogical, Value *A,
2672 Value *B) -> Value * {
2673 bool LocalDoesConsume = DoesConsume;
2674 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
2675 LocalDoesConsume, Depth))
2676 return nullptr;
2677 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
2678 LocalDoesConsume, Depth)) {
2679 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
2680 LocalDoesConsume, Depth);
2681 DoesConsume = LocalDoesConsume;
2682 if (IsLogical)
2683 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
2684 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
2685 }
2686
2687 return nullptr;
2688 };
2689
2690 if (match(V, m_Or(m_Value(A), m_Value(B))))
2691 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
2692 B);
2693
2694 if (match(V, m_And(m_Value(A), m_Value(B))))
2695 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
2696 B);
2697
2698 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
2699 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
2700 B);
2701
2702 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
2703 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
2704 B);
2705
2706 return nullptr;
2707}
2708
2710 Value *PtrOp = GEP.getOperand(0);
2711 SmallVector<Value *, 8> Indices(GEP.indices());
2712 Type *GEPType = GEP.getType();
2713 Type *GEPEltType = GEP.getSourceElementType();
2714 bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
2715 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2717 return replaceInstUsesWith(GEP, V);
2718
2719 // For vector geps, use the generic demanded vector support.
2720 // Skip if GEP return type is scalable. The number of elements is unknown at
2721 // compile-time.
2722 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2723 auto VWidth = GEPFVTy->getNumElements();
2724 APInt PoisonElts(VWidth, 0);
2725 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2726 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2727 PoisonElts)) {
2728 if (V != &GEP)
2729 return replaceInstUsesWith(GEP, V);
2730 return &GEP;
2731 }
2732
2733 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2734 // possible (decide on canonical form for pointer broadcast), 3) exploit
2735 // undef elements to decrease demanded bits
2736 }
2737
2738 // Eliminate unneeded casts for indices, and replace indices which displace
2739 // by multiples of a zero size type with zero.
2740 bool MadeChange = false;
2741
2742 // Index width may not be the same width as pointer width.
2743 // Data layout chooses the right type based on supported integer types.
2744 Type *NewScalarIndexTy =
2745 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2746
2748 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2749 ++I, ++GTI) {
2750 // Skip indices into struct types.
2751 if (GTI.isStruct())
2752 continue;
2753
2754 Type *IndexTy = (*I)->getType();
2755 Type *NewIndexType =
2756 IndexTy->isVectorTy()
2757 ? VectorType::get(NewScalarIndexTy,
2758 cast<VectorType>(IndexTy)->getElementCount())
2759 : NewScalarIndexTy;
2760
2761 // If the element type has zero size then any index over it is equivalent
2762 // to an index of zero, so replace it with zero if it is not zero already.
2763 Type *EltTy = GTI.getIndexedType();
2764 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2765 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2766 *I = Constant::getNullValue(NewIndexType);
2767 MadeChange = true;
2768 }
2769
2770 if (IndexTy != NewIndexType) {
2771 // If we are using a wider index than needed for this platform, shrink
2772 // it to what we need. If narrower, sign-extend it to what we need.
2773 // This explicit cast can make subsequent optimizations more obvious.
2774 *I = Builder.CreateIntCast(*I, NewIndexType, true);
2775 MadeChange = true;
2776 }
2777 }
2778 if (MadeChange)
2779 return &GEP;
2780
2781 // Canonicalize constant GEPs to i8 type.
2782 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2784 if (GEP.accumulateConstantOffset(DL, Offset))
2785 return replaceInstUsesWith(
2787 GEP.isInBounds()));
2788 }
2789
2790 // Check to see if the inputs to the PHI node are getelementptr instructions.
2791 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2792 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2793 if (!Op1)
2794 return nullptr;
2795
2796 // Don't fold a GEP into itself through a PHI node. This can only happen
2797 // through the back-edge of a loop. Folding a GEP into itself means that
2798 // the value of the previous iteration needs to be stored in the meantime,
2799 // thus requiring an additional register variable to be live, but not
2800 // actually achieving anything (the GEP still needs to be executed once per
2801 // loop iteration).
2802 if (Op1 == &GEP)
2803 return nullptr;
2804
2805 int DI = -1;
2806
2807 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2808 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2809 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2810 Op1->getSourceElementType() != Op2->getSourceElementType())
2811 return nullptr;
2812
2813 // As for Op1 above, don't try to fold a GEP into itself.
2814 if (Op2 == &GEP)
2815 return nullptr;
2816
2817 // Keep track of the type as we walk the GEP.
2818 Type *CurTy = nullptr;
2819
2820 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2821 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2822 return nullptr;
2823
2824 if (Op1->getOperand(J) != Op2->getOperand(J)) {
2825 if (DI == -1) {
2826 // We have not seen any differences yet in the GEPs feeding the
2827 // PHI yet, so we record this one if it is allowed to be a
2828 // variable.
2829
2830 // The first two arguments can vary for any GEP, the rest have to be
2831 // static for struct slots
2832 if (J > 1) {
2833 assert(CurTy && "No current type?");
2834 if (CurTy->isStructTy())
2835 return nullptr;
2836 }
2837
2838 DI = J;
2839 } else {
2840 // The GEP is different by more than one input. While this could be
2841 // extended to support GEPs that vary by more than one variable it
2842 // doesn't make sense since it greatly increases the complexity and
2843 // would result in an R+R+R addressing mode which no backend
2844 // directly supports and would need to be broken into several
2845 // simpler instructions anyway.
2846 return nullptr;
2847 }
2848 }
2849
2850 // Sink down a layer of the type for the next iteration.
2851 if (J > 0) {
2852 if (J == 1) {
2853 CurTy = Op1->getSourceElementType();
2854 } else {
2855 CurTy =
2856 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2857 }
2858 }
2859 }
2860 }
2861
2862 // If not all GEPs are identical we'll have to create a new PHI node.
2863 // Check that the old PHI node has only one use so that it will get
2864 // removed.
2865 if (DI != -1 && !PN->hasOneUse())
2866 return nullptr;
2867
2868 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2869 if (DI == -1) {
2870 // All the GEPs feeding the PHI are identical. Clone one down into our
2871 // BB so that it can be merged with the current GEP.
2872 } else {
2873 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2874 // into the current block so it can be merged, and create a new PHI to
2875 // set that index.
2876 PHINode *NewPN;
2877 {
2880 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2881 PN->getNumOperands());
2882 }
2883
2884 for (auto &I : PN->operands())
2885 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2886 PN->getIncomingBlock(I));
2887
2888 NewGEP->setOperand(DI, NewPN);
2889 }
2890
2891 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2892 return replaceOperand(GEP, 0, NewGEP);
2893 }
2894
2895 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2896 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2897 return I;
2898
2899 // Skip if GEP source element type is scalable. The type alloc size is unknown
2900 // at compile-time.
2901 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2902 unsigned AS = GEP.getPointerAddressSpace();
2903 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2904 DL.getIndexSizeInBits(AS)) {
2905 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2906
2907 if (TyAllocSize == 1) {
2908 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
2909 // but only if the result pointer is only used as if it were an integer,
2910 // or both point to the same underlying object (otherwise provenance is
2911 // not necessarily retained).
2912 Value *X = GEP.getPointerOperand();
2913 Value *Y;
2914 if (match(GEP.getOperand(1),
2916 GEPType == Y->getType()) {
2917 bool HasSameUnderlyingObject =
2919 bool Changed = false;
2920 GEP.replaceUsesWithIf(Y, [&](Use &U) {
2921 bool ShouldReplace = HasSameUnderlyingObject ||
2922 isa<ICmpInst>(U.getUser()) ||
2923 isa<PtrToIntInst>(U.getUser());
2924 Changed |= ShouldReplace;
2925 return ShouldReplace;
2926 });
2927 return Changed ? &GEP : nullptr;
2928 }
2929 } else {
2930 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
2931 Value *V;
2932 if ((has_single_bit(TyAllocSize) &&
2933 match(GEP.getOperand(1),
2935 m_SpecificInt(countr_zero(TyAllocSize)))))) ||
2936 match(GEP.getOperand(1),
2937 m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
2939 Builder.getInt8Ty(), GEP.getPointerOperand(), V);
2940 NewGEP->setIsInBounds(GEP.isInBounds());
2941 return NewGEP;
2942 }
2943 }
2944 }
2945 }
2946 // We do not handle pointer-vector geps here.
2947 if (GEPType->isVectorTy())
2948 return nullptr;
2949
2950 if (GEP.getNumIndices() == 1) {
2951 // We can only preserve inbounds if the original gep is inbounds, the add
2952 // is nsw, and the add operands are non-negative.
2953 auto CanPreserveInBounds = [&](bool AddIsNSW, Value *Idx1, Value *Idx2) {
2955 return GEP.isInBounds() && AddIsNSW && isKnownNonNegative(Idx1, Q) &&
2956 isKnownNonNegative(Idx2, Q);
2957 };
2958
2959 // Try to replace ADD + GEP with GEP + GEP.
2960 Value *Idx1, *Idx2;
2961 if (match(GEP.getOperand(1),
2962 m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
2963 // %idx = add i64 %idx1, %idx2
2964 // %gep = getelementptr i32, ptr %ptr, i64 %idx
2965 // as:
2966 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
2967 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
2968 bool IsInBounds = CanPreserveInBounds(
2969 cast<OverflowingBinaryOperator>(GEP.getOperand(1))->hasNoSignedWrap(),
2970 Idx1, Idx2);
2971 auto *NewPtr =
2972 Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(),
2973 Idx1, "", IsInBounds);
2974 return replaceInstUsesWith(
2975 GEP, Builder.CreateGEP(GEP.getResultElementType(), NewPtr, Idx2, "",
2976 IsInBounds));
2977 }
2978 ConstantInt *C;
2979 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAdd(
2980 m_Value(Idx1), m_ConstantInt(C))))))) {
2981 // %add = add nsw i32 %idx1, idx2
2982 // %sidx = sext i32 %add to i64
2983 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
2984 // as:
2985 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
2986 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
2987 bool IsInBounds = CanPreserveInBounds(
2988 /*IsNSW=*/true, Idx1, C);
2989 auto *NewPtr = Builder.CreateGEP(
2990 GEP.getResultElementType(), GEP.getPointerOperand(),
2991 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "",
2992 IsInBounds);
2993 return replaceInstUsesWith(
2994 GEP,
2995 Builder.CreateGEP(GEP.getResultElementType(), NewPtr,
2996 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
2997 "", IsInBounds));
2998 }
2999 }
3000
3001 if (!GEP.isInBounds()) {
3002 unsigned IdxWidth =
3004 APInt BasePtrOffset(IdxWidth, 0);
3005 Value *UnderlyingPtrOp =
3007 BasePtrOffset);
3008 bool CanBeNull, CanBeFreed;
3009 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3010 DL, CanBeNull, CanBeFreed);
3011 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3012 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3013 BasePtrOffset.isNonNegative()) {
3014 APInt AllocSize(IdxWidth, DerefBytes);
3015 if (BasePtrOffset.ule(AllocSize)) {
3017 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3018 }
3019 }
3020 }
3021 }
3022
3024 return R;
3025
3026 return nullptr;
3027}
3028
3030 Instruction *AI) {
3031 if (isa<ConstantPointerNull>(V))
3032 return true;
3033 if (auto *LI = dyn_cast<LoadInst>(V))
3034 return isa<GlobalVariable>(LI->getPointerOperand());
3035 // Two distinct allocations will never be equal.
3036 return isAllocLikeFn(V, &TLI) && V != AI;
3037}
3038
3039/// Given a call CB which uses an address UsedV, return true if we can prove the
3040/// call's only possible effect is storing to V.
3041static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3042 const TargetLibraryInfo &TLI) {
3043 if (!CB.use_empty())
3044 // TODO: add recursion if returned attribute is present
3045 return false;
3046
3047 if (CB.isTerminator())
3048 // TODO: remove implementation restriction
3049 return false;
3050
3051 if (!CB.willReturn() || !CB.doesNotThrow())
3052 return false;
3053
3054 // If the only possible side effect of the call is writing to the alloca,
3055 // and the result isn't used, we can safely remove any reads implied by the
3056 // call including those which might read the alloca itself.
3057 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3058 return Dest && Dest->Ptr == UsedV;
3059}
3060
3063 const TargetLibraryInfo &TLI) {
3065 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3066 Worklist.push_back(AI);
3067
3068 do {
3069 Instruction *PI = Worklist.pop_back_val();
3070 for (User *U : PI->users()) {
3071 Instruction *I = cast<Instruction>(U);
3072 switch (I->getOpcode()) {
3073 default:
3074 // Give up the moment we see something we can't handle.
3075 return false;
3076
3077 case Instruction::AddrSpaceCast:
3078 case Instruction::BitCast:
3079 case Instruction::GetElementPtr:
3080 Users.emplace_back(I);
3081 Worklist.push_back(I);
3082 continue;
3083
3084 case Instruction::ICmp: {
3085 ICmpInst *ICI = cast<ICmpInst>(I);
3086 // We can fold eq/ne comparisons with null to false/true, respectively.
3087 // We also fold comparisons in some conditions provided the alloc has
3088 // not escaped (see isNeverEqualToUnescapedAlloc).
3089 if (!ICI->isEquality())
3090 return false;
3091 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3092 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3093 return false;
3094
3095 // Do not fold compares to aligned_alloc calls, as they may have to
3096 // return null in case the required alignment cannot be satisfied,
3097 // unless we can prove that both alignment and size are valid.
3098 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3099 // Check if alignment and size of a call to aligned_alloc is valid,
3100 // that is alignment is a power-of-2 and the size is a multiple of the
3101 // alignment.
3102 const APInt *Alignment;
3103 const APInt *Size;
3104 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3105 match(CB->getArgOperand(1), m_APInt(Size)) &&
3106 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3107 };
3108 auto *CB = dyn_cast<CallBase>(AI);
3109 LibFunc TheLibFunc;
3110 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3111 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3112 !AlignmentAndSizeKnownValid(CB))
3113 return false;
3114 Users.emplace_back(I);
3115 continue;
3116 }
3117
3118 case Instruction::Call:
3119 // Ignore no-op and store intrinsics.
3120 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3121 switch (II->getIntrinsicID()) {
3122 default:
3123 return false;
3124
3125 case Intrinsic::memmove:
3126 case Intrinsic::memcpy:
3127 case Intrinsic::memset: {
3128 MemIntrinsic *MI = cast<MemIntrinsic>(II);
3129 if (MI->isVolatile() || MI->getRawDest() != PI)
3130 return false;
3131 [[fallthrough]];
3132 }
3133 case Intrinsic::assume:
3134 case Intrinsic::invariant_start:
3135 case Intrinsic::invariant_end:
3136 case Intrinsic::lifetime_start:
3137 case Intrinsic::lifetime_end:
3138 case Intrinsic::objectsize:
3139 Users.emplace_back(I);
3140 continue;
3141 case Intrinsic::launder_invariant_group:
3142 case Intrinsic::strip_invariant_group:
3143 Users.emplace_back(I);
3144 Worklist.push_back(I);
3145 continue;
3146 }
3147 }
3148
3149 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3150 Users.emplace_back(I);
3151 continue;
3152 }
3153
3154 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3155 getAllocationFamily(I, &TLI) == Family) {
3156 assert(Family);
3157 Users.emplace_back(I);
3158 continue;
3159 }
3160
3161 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
3162 getAllocationFamily(I, &TLI) == Family) {
3163 assert(Family);
3164 Users.emplace_back(I);
3165 Worklist.push_back(I);
3166 continue;
3167 }
3168
3169 return false;
3170
3171 case Instruction::Store: {
3172 StoreInst *SI = cast<StoreInst>(I);
3173 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3174 return false;
3175 Users.emplace_back(I);
3176 continue;
3177 }
3178 }
3179 llvm_unreachable("missing a return?");
3180 }
3181 } while (!Worklist.empty());
3182 return true;
3183}
3184
3186 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3187
3188 // If we have a malloc call which is only used in any amount of comparisons to
3189 // null and free calls, delete the calls and replace the comparisons with true
3190 // or false as appropriate.
3191
3192 // This is based on the principle that we can substitute our own allocation
3193 // function (which will never return null) rather than knowledge of the
3194 // specific function being called. In some sense this can change the permitted
3195 // outputs of a program (when we convert a malloc to an alloca, the fact that
3196 // the allocation is now on the stack is potentially visible, for example),
3197 // but we believe in a permissible manner.
3199
3200 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3201 // before each store.
3204 std::unique_ptr<DIBuilder> DIB;
3205 if (isa<AllocaInst>(MI)) {
3206 findDbgUsers(DVIs, &MI, &DVRs);
3207 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3208 }
3209
3210 if (isAllocSiteRemovable(&MI, Users, TLI)) {
3211 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3212 // Lowering all @llvm.objectsize calls first because they may
3213 // use a bitcast/GEP of the alloca we are removing.
3214 if (!Users[i])
3215 continue;
3216
3217 Instruction *I = cast<Instruction>(&*Users[i]);
3218
3219 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3220 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3221 SmallVector<Instruction *> InsertedInstructions;
3222 Value *Result = lowerObjectSizeCall(
3223 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3224 for (Instruction *Inserted : InsertedInstructions)
3225 Worklist.add(Inserted);
3226 replaceInstUsesWith(*I, Result);
3228 Users[i] = nullptr; // Skip examining in the next loop.
3229 }
3230 }
3231 }
3232 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
3233 if (!Users[i])
3234 continue;
3235
3236 Instruction *I = cast<Instruction>(&*Users[i]);
3237
3238 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3240 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3241 C->isFalseWhenEqual()));
3242 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3243 for (auto *DVI : DVIs)
3244 if (DVI->isAddressOfVariable())
3245 ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
3246 for (auto *DVR : DVRs)
3247 if (DVR->isAddressOfVariable())
3248 ConvertDebugDeclareToDebugValue(DVR, SI, *DIB);
3249 } else {
3250 // Casts, GEP, or anything else: we're about to delete this instruction,
3251 // so it can not have any valid uses.
3252 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
3253 }
3255 }
3256
3257 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
3258 // Replace invoke with a NOP intrinsic to maintain the original CFG
3259 Module *M = II->getModule();
3260 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
3261 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
3262 std::nullopt, "", II->getParent());
3263 }
3264
3265 // Remove debug intrinsics which describe the value contained within the
3266 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3267 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3268 //
3269 // ```
3270 // define void @foo(i32 %0) {
3271 // %a = alloca i32 ; Deleted.
3272 // store i32 %0, i32* %a
3273 // dbg.value(i32 %0, "arg0") ; Not deleted.
3274 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3275 // call void @trivially_inlinable_no_op(i32* %a)
3276 // ret void
3277 // }
3278 // ```
3279 //
3280 // This may not be required if we stop describing the contents of allocas
3281 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3282 // the LowerDbgDeclare utility.
3283 //
3284 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3285 // "arg0" dbg.value may be stale after the call. However, failing to remove
3286 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3287 //
3288 // FIXME: the Assignment Tracking project has now likely made this
3289 // redundant (and it's sometimes harmful).
3290 for (auto *DVI : DVIs)
3291 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
3292 DVI->eraseFromParent();
3293 for (auto *DVR : DVRs)
3294 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3295 DVR->eraseFromParent();
3296
3297 return eraseInstFromFunction(MI);
3298 }
3299 return nullptr;
3300}
3301
3302/// Move the call to free before a NULL test.
3303///
3304/// Check if this free is accessed after its argument has been test
3305/// against NULL (property 0).
3306/// If yes, it is legal to move this call in its predecessor block.
3307///
3308/// The move is performed only if the block containing the call to free
3309/// will be removed, i.e.:
3310/// 1. it has only one predecessor P, and P has two successors
3311/// 2. it contains the call, noops, and an unconditional branch
3312/// 3. its successor is the same as its predecessor's successor
3313///
3314/// The profitability is out-of concern here and this function should
3315/// be called only if the caller knows this transformation would be
3316/// profitable (e.g., for code size).
3318 const DataLayout &DL) {
3319 Value *Op = FI.getArgOperand(0);
3320 BasicBlock *FreeInstrBB = FI.getParent();
3321 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3322
3323 // Validate part of constraint #1: Only one predecessor
3324 // FIXME: We can extend the number of predecessor, but in that case, we
3325 // would duplicate the call to free in each predecessor and it may
3326 // not be profitable even for code size.
3327 if (!PredBB)
3328 return nullptr;
3329
3330 // Validate constraint #2: Does this block contains only the call to
3331 // free, noops, and an unconditional branch?
3332 BasicBlock *SuccBB;
3333 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3334 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
3335 return nullptr;
3336
3337 // If there are only 2 instructions in the block, at this point,
3338 // this is the call to free and unconditional.
3339 // If there are more than 2 instructions, check that they are noops
3340 // i.e., they won't hurt the performance of the generated code.
3341 if (FreeInstrBB->size() != 2) {
3342 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3343 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3344 continue;
3345 auto *Cast = dyn_cast<CastInst>(&Inst);
3346 if (!Cast || !Cast->isNoopCast(DL))
3347 return nullptr;
3348 }
3349 }
3350 // Validate the rest of constraint #1 by matching on the pred branch.
3351 Instruction *TI = PredBB->getTerminator();
3352 BasicBlock *TrueBB, *FalseBB;
3354 if (!match(TI, m_Br(m_ICmp(Pred,
3356 m_Specific(Op->stripPointerCasts())),
3357 m_Zero()),
3358 TrueBB, FalseBB)))
3359 return nullptr;
3360 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3361 return nullptr;
3362
3363 // Validate constraint #3: Ensure the null case just falls through.
3364 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3365 return nullptr;
3366 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3367 "Broken CFG: missing edge from predecessor to successor");
3368
3369 // At this point, we know that everything in FreeInstrBB can be moved
3370 // before TI.
3371 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3372 if (&Instr == FreeInstrBBTerminator)
3373 break;
3374 Instr.moveBeforePreserving(TI);
3375 }
3376 assert(FreeInstrBB->size() == 1 &&
3377 "Only the branch instruction should remain");
3378
3379 // Now that we've moved the call to free before the NULL check, we have to
3380 // remove any attributes on its parameter that imply it's non-null, because
3381 // those attributes might have only been valid because of the NULL check, and
3382 // we can get miscompiles if we keep them. This is conservative if non-null is
3383 // also implied by something other than the NULL check, but it's guaranteed to
3384 // be correct, and the conservativeness won't matter in practice, since the
3385 // attributes are irrelevant for the call to free itself and the pointer
3386 // shouldn't be used after the call.
3387 AttributeList Attrs = FI.getAttributes();
3388 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3389 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3390 if (Dereferenceable.isValid()) {
3391 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3392 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3393 Attribute::Dereferenceable);
3394 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
3395 }
3396 FI.setAttributes(Attrs);
3397
3398 return &FI;
3399}
3400
3402 // free undef -> unreachable.
3403 if (isa<UndefValue>(Op)) {
3404 // Leave a marker since we can't modify the CFG here.
3406 return eraseInstFromFunction(FI);
3407 }
3408
3409 // If we have 'free null' delete the instruction. This can happen in stl code
3410 // when lots of inlining happens.
3411 if (isa<ConstantPointerNull>(Op))
3412 return eraseInstFromFunction(FI);
3413
3414 // If we had free(realloc(...)) with no intervening uses, then eliminate the
3415 // realloc() entirely.
3416 CallInst *CI = dyn_cast<CallInst>(Op);
3417 if (CI && CI->hasOneUse())
3418 if (Value *ReallocatedOp = getReallocatedOperand(CI))
3419 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3420
3421 // If we optimize for code size, try to move the call to free before the null
3422 // test so that simplify cfg can remove the empty block and dead code
3423 // elimination the branch. I.e., helps to turn something like:
3424 // if (foo) free(foo);
3425 // into
3426 // free(foo);
3427 //
3428 // Note that we can only do this for 'free' and not for any flavor of
3429 // 'operator delete'; there is no 'operator delete' symbol for which we are
3430 // permitted to invent a call, even if we're passing in a null pointer.
3431 if (MinimizeSize) {
3432 LibFunc Func;
3433 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3435 return I;
3436 }
3437
3438 return nullptr;
3439}
3440
3442 Value *RetVal = RI.getReturnValue();
3443 if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
3444 return nullptr;
3445
3446 Function *F = RI.getFunction();
3447 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
3448 if (ReturnClass == fcNone)
3449 return nullptr;
3450
3451 KnownFPClass KnownClass;
3452 Value *Simplified =
3453 SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
3454 if (!Simplified)
3455 return nullptr;
3456
3457 return ReturnInst::Create(RI.getContext(), Simplified);
3458}
3459
3460// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3462 // Try to remove the previous instruction if it must lead to unreachable.
3463 // This includes instructions like stores and "llvm.assume" that may not get
3464 // removed by simple dead code elimination.
3465 bool Changed = false;
3466 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3467 // While we theoretically can erase EH, that would result in a block that
3468 // used to start with an EH no longer starting with EH, which is invalid.
3469 // To make it valid, we'd need to fixup predecessors to no longer refer to
3470 // this block, but that changes CFG, which is not allowed in InstCombine.
3471 if (Prev->isEHPad())
3472 break; // Can not drop any more instructions. We're done here.
3473
3475 break; // Can not drop any more instructions. We're done here.
3476 // Otherwise, this instruction can be freely erased,
3477 // even if it is not side-effect free.
3478
3479 // A value may still have uses before we process it here (for example, in
3480 // another unreachable block), so convert those to poison.
3481 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3482 eraseInstFromFunction(*Prev);
3483 Changed = true;
3484 }
3485 return Changed;
3486}
3487
3490 return nullptr;
3491}
3492
3494 assert(BI.isUnconditional() && "Only for unconditional branches.");
3495
3496 // If this store is the second-to-last instruction in the basic block
3497 // (excluding debug info and bitcasts of pointers) and if the block ends with
3498 // an unconditional branch, try to move the store to the successor block.
3499
3500 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3501 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3502 return BBI->isDebugOrPseudoInst() ||
3503 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3504 };
3505
3506 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3507 do {
3508 if (BBI != FirstInstr)
3509 --BBI;
3510 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3511
3512 return dyn_cast<StoreInst>(BBI);
3513 };
3514
3515 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3516 if (mergeStoreIntoSuccessor(*SI))
3517 return &BI;
3518
3519 return nullptr;
3520}
3521
3524 if (!DeadEdges.insert({From, To}).second)
3525 return;
3526
3527 // Replace phi node operands in successor with poison.
3528 for (PHINode &PN : To->phis())
3529 for (Use &U : PN.incoming_values())
3530 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
3531 replaceUse(U, PoisonValue::get(PN.getType()));
3532 addToWorklist(&PN);
3533 MadeIRChange = true;
3534 }
3535
3536 Worklist.push_back(To);
3537}
3538
3539// Under the assumption that I is unreachable, remove it and following
3540// instructions. Changes are reported directly to MadeIRChange.
3543 BasicBlock *BB = I->getParent();
3544 for (Instruction &Inst : make_early_inc_range(
3545 make_range(std::next(BB->getTerminator()->getReverseIterator()),
3546 std::next(I->getReverseIterator())))) {
3547 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
3548 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
3549 MadeIRChange = true;
3550 }
3551 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
3552 continue;
3553 // RemoveDIs: erase debug-info on this instruction manually.
3554 Inst.dropDbgRecords();
3556 MadeIRChange = true;
3557 }
3558
3559 SmallVector<Value *> Changed;
3560 if (handleUnreachableTerminator(BB->getTerminator(), Changed)) {
3561 MadeIRChange = true;
3562 for (Value *V : Changed)
3563 addToWorklist(cast<Instruction>(V));
3564 }
3565
3566 // Handle potentially dead successors.
3567 for (BasicBlock *Succ : successors(BB))
3568 addDeadEdge(BB, Succ, Worklist);
3569}
3570
3573 while (!Worklist.empty()) {
3574 BasicBlock *BB = Worklist.pop_back_val();
3575 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
3576 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
3577 }))
3578 continue;
3579
3581 }
3582}
3583
3585 BasicBlock *LiveSucc) {
3587 for (BasicBlock *Succ : successors(BB)) {
3588 // The live successor isn't dead.
3589 if (Succ == LiveSucc)
3590 continue;
3591
3592 addDeadEdge(BB, Succ, Worklist);
3593 }
3594
3596}
3597
3599 if (BI.isUnconditional())
3601
3602 // Change br (not X), label True, label False to: br X, label False, True
3603 Value *Cond = BI.getCondition();
3604 Value *X;
3605 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3606 // Swap Destinations and condition...
3607 BI.swapSuccessors();
3608 if (BPI)
3610 return replaceOperand(BI, 0, X);
3611 }
3612
3613 // Canonicalize logical-and-with-invert as logical-or-with-invert.
3614 // This is done by inverting the condition and swapping successors:
3615 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3616 Value *Y;
3617 if (isa<SelectInst>(Cond) &&
3618 match(Cond,
3620 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3621 Value *Or = Builder.CreateLogicalOr(NotX, Y);
3622 BI.swapSuccessors();
3623 if (BPI)
3625 return replaceOperand(BI, 0, Or);
3626 }
3627
3628 // If the condition is irrelevant, remove the use so that other
3629 // transforms on the condition become more effective.
3630 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3631 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3632
3633 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3634 CmpInst::Predicate Pred;
3635 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3636 !isCanonicalPredicate(Pred)) {
3637 // Swap destinations and condition.
3638 auto *Cmp = cast<CmpInst>(Cond);
3639 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3640 BI.swapSuccessors();
3641 if (BPI)
3643 Worklist.push(Cmp);
3644 return &BI;
3645 }
3646
3647 if (isa<UndefValue>(Cond)) {
3648 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
3649 return nullptr;
3650 }
3651 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3653 BI.getSuccessor(!CI->getZExtValue()));
3654 return nullptr;
3655 }
3656
3657 DC.registerBranch(&BI);
3658 return nullptr;
3659}
3660
3661// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
3662// we can prove that both (switch C) and (switch X) go to the default when cond
3663// is false/true.
3666 bool IsTrueArm) {
3667 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
3668 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
3669 if (!C)
3670 return nullptr;
3671
3672 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
3673 if (CstBB != SI.getDefaultDest())
3674 return nullptr;
3675 Value *X = Select->getOperand(3 - CstOpIdx);
3677 const APInt *RHSC;
3678 if (!match(Select->getCondition(),
3679 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
3680 return nullptr;
3681 if (IsTrueArm)
3682 Pred = ICmpInst::getInversePredicate(Pred);
3683
3684 // See whether we can replace the select with X
3686 for (auto Case : SI.cases())
3687 if (!CR.contains(Case.getCaseValue()->getValue()))
3688 return nullptr;
3689
3690 return X;
3691}
3692
3694 Value *Cond = SI.getCondition();
3695 Value *Op0;
3696 ConstantInt *AddRHS;
3697 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3698 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3699 for (auto Case : SI.cases()) {
3700 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3701 assert(isa<ConstantInt>(NewCase) &&
3702 "Result of expression should be constant");
3703 Case.setValue(cast<ConstantInt>(NewCase));
3704 }
3705 return replaceOperand(SI, 0, Op0);
3706 }
3707
3708 ConstantInt *SubLHS;
3709 if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) {
3710 // Change 'switch (1-X) case 1:' into 'switch (X) case 0'.
3711 for (auto Case : SI.cases()) {
3712 Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue());
3713 assert(isa<ConstantInt>(NewCase) &&
3714 "Result of expression should be constant");
3715 Case.setValue(cast<ConstantInt>(NewCase));
3716 }
3717 return replaceOperand(SI, 0, Op0);
3718 }
3719
3720 uint64_t ShiftAmt;
3721 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
3722 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
3723 all_of(SI.cases(), [&](const auto &Case) {
3724 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
3725 })) {
3726 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
3727 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond);
3728 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
3729 Shl->hasOneUse()) {
3730 Value *NewCond = Op0;
3731 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
3732 // If the shift may wrap, we need to mask off the shifted bits.
3733 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
3734 NewCond = Builder.CreateAnd(
3735 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
3736 }
3737 for (auto Case : SI.cases()) {
3738 const APInt &CaseVal = Case.getCaseValue()->getValue();
3739 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
3740 : CaseVal.lshr(ShiftAmt);
3741 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
3742 }
3743 return replaceOperand(SI, 0, NewCond);
3744 }
3745 }
3746
3747 // Fold switch(zext/sext(X)) into switch(X) if possible.
3748 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
3749 bool IsZExt = isa<ZExtInst>(Cond);
3750 Type *SrcTy = Op0->getType();
3751 unsigned NewWidth = SrcTy->getScalarSizeInBits();
3752
3753 if (all_of(SI.cases(), [&](const auto &Case) {
3754 const APInt &CaseVal = Case.getCaseValue()->getValue();
3755 return IsZExt ? CaseVal.isIntN(NewWidth)
3756 : CaseVal.isSignedIntN(NewWidth);
3757 })) {
3758 for (auto &Case : SI.cases()) {
3759 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3760 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3761 }
3762 return replaceOperand(SI, 0, Op0);
3763 }
3764 }
3765
3766 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
3767 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
3768 if (Value *V =
3769 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
3770 return replaceOperand(SI, 0, V);
3771 if (Value *V =
3772 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
3773 return replaceOperand(SI, 0, V);
3774 }
3775
3776 KnownBits Known = computeKnownBits(Cond, 0, &SI);
3777 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3778 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3779
3780 // Compute the number of leading bits we can ignore.
3781 // TODO: A better way to determine this would use ComputeNumSignBits().
3782 for (const auto &C : SI.cases()) {
3783 LeadingKnownZeros =
3784 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
3785 LeadingKnownOnes =
3786 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
3787 }
3788
3789 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3790
3791 // Shrink the condition operand if the new type is smaller than the old type.
3792 // But do not shrink to a non-standard type, because backend can't generate
3793 // good code for that yet.
3794 // TODO: We can make it aggressive again after fixing PR39569.
3795 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3796 shouldChangeType(Known.getBitWidth(), NewWidth)) {
3797 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3799 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3800
3801 for (auto Case : SI.cases()) {
3802 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3803 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3804 }
3805 return replaceOperand(SI, 0, NewCond);
3806 }
3807
3808 if (isa<UndefValue>(Cond)) {
3809 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
3810 return nullptr;
3811 }
3812 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
3813 handlePotentiallyDeadSuccessors(SI.getParent(),
3814 SI.findCaseValue(CI)->getCaseSuccessor());
3815 return nullptr;
3816 }
3817
3818 return nullptr;
3819}
3820
3822InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3823 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3824 if (!WO)
3825 return nullptr;
3826
3827 Intrinsic::ID OvID = WO->getIntrinsicID();
3828 const APInt *C = nullptr;
3829 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
3830 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3831 OvID == Intrinsic::umul_with_overflow)) {
3832 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3833 if (C->isAllOnes())
3834 return BinaryOperator::CreateNeg(WO->getLHS());
3835 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3836 if (C->isPowerOf2()) {
3837 return BinaryOperator::CreateShl(
3838 WO->getLHS(),
3839 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3840 }
3841 }
3842 }
3843
3844 // We're extracting from an overflow intrinsic. See if we're the only user.
3845 // That allows us to simplify multiple result intrinsics to simpler things
3846 // that just get one value.
3847 if (!WO->hasOneUse())
3848 return nullptr;
3849
3850 // Check if we're grabbing only the result of a 'with overflow' intrinsic
3851 // and replace it with a traditional binary instruction.
3852 if (*EV.idx_begin() == 0) {
3853 Instruction::BinaryOps BinOp = WO->getBinaryOp();
3854 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3855 // Replace the old instruction's uses with poison.
3856 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3858 return BinaryOperator::Create(BinOp, LHS, RHS);
3859 }
3860
3861 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3862
3863 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3864 if (OvID == Intrinsic::usub_with_overflow)
3865 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3866
3867 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3868 // +1 is not possible because we assume signed values.
3869 if (OvID == Intrinsic::smul_with_overflow &&
3870 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3871 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3872
3873 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
3874 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
3875 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
3876 // Only handle even bitwidths for performance reasons.
3877 if (BitWidth % 2 == 0)
3878 return new ICmpInst(
3879 ICmpInst::ICMP_UGT, WO->getLHS(),
3880 ConstantInt::get(WO->getLHS()->getType(),
3882 }
3883
3884 // If only the overflow result is used, and the right hand side is a
3885 // constant (or constant splat), we can remove the intrinsic by directly
3886 // checking for overflow.
3887 if (C) {
3888 // Compute the no-wrap range for LHS given RHS=C, then construct an
3889 // equivalent icmp, potentially using an offset.
3891 WO->getBinaryOp(), *C, WO->getNoWrapKind());
3892
3893 CmpInst::Predicate Pred;
3894 APInt NewRHSC, Offset;
3895 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3896 auto *OpTy = WO->getRHS()->getType();
3897 auto *NewLHS = WO->getLHS();
3898 if (Offset != 0)
3899 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3900 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3901 ConstantInt::get(OpTy, NewRHSC));
3902 }
3903
3904 return nullptr;
3905}
3906
3908 Value *Agg = EV.getAggregateOperand();
3909
3910 if (!EV.hasIndices())
3911 return replaceInstUsesWith(EV, Agg);
3912
3913 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3914 SQ.getWithInstruction(&EV)))
3915 return replaceInstUsesWith(EV, V);
3916
3917 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3918 // We're extracting from an insertvalue instruction, compare the indices
3919 const unsigned *exti, *exte, *insi, *inse;
3920 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3921 exte = EV.idx_end(), inse = IV->idx_end();
3922 exti != exte && insi != inse;
3923 ++exti, ++insi) {
3924 if (*insi != *exti)
3925 // The insert and extract both reference distinctly different elements.
3926 // This means the extract is not influenced by the insert, and we can
3927 // replace the aggregate operand of the extract with the aggregate
3928 // operand of the insert. i.e., replace
3929 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3930 // %E = extractvalue { i32, { i32 } } %I, 0
3931 // with
3932 // %E = extractvalue { i32, { i32 } } %A, 0
3933 return ExtractValueInst::Create(IV->getAggregateOperand(),
3934 EV.getIndices());
3935 }
3936 if (exti == exte && insi == inse)
3937 // Both iterators are at the end: Index lists are identical. Replace
3938 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3939 // %C = extractvalue { i32, { i32 } } %B, 1, 0
3940 // with "i32 42"
3941 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3942 if (exti == exte) {
3943 // The extract list is a prefix of the insert list. i.e. replace
3944 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3945 // %E = extractvalue { i32, { i32 } } %I, 1
3946 // with
3947 // %X = extractvalue { i32, { i32 } } %A, 1
3948 // %E = insertvalue { i32 } %X, i32 42, 0
3949 // by switching the order of the insert and extract (though the
3950 // insertvalue should be left in, since it may have other uses).
3951 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3952 EV.getIndices());
3953 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3954 ArrayRef(insi, inse));
3955 }
3956 if (insi == inse)
3957 // The insert list is a prefix of the extract list
3958 // We can simply remove the common indices from the extract and make it
3959 // operate on the inserted value instead of the insertvalue result.
3960 // i.e., replace
3961 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3962 // %E = extractvalue { i32, { i32 } } %I, 1, 0
3963 // with
3964 // %E extractvalue { i32 } { i32 42 }, 0
3965 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3966 ArrayRef(exti, exte));
3967 }
3968
3969 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3970 return R;
3971
3972 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3973 // Bail out if the aggregate contains scalable vector type
3974 if (auto *STy = dyn_cast<StructType>(Agg->getType());
3975 STy && STy->containsScalableVectorType())
3976 return nullptr;
3977
3978 // If the (non-volatile) load only has one use, we can rewrite this to a
3979 // load from a GEP. This reduces the size of the load. If a load is used
3980 // only by extractvalue instructions then this either must have been
3981 // optimized before, or it is a struct with padding, in which case we
3982 // don't want to do the transformation as it loses padding knowledge.
3983 if (L->isSimple() && L->hasOneUse()) {
3984 // extractvalue has integer indices, getelementptr has Value*s. Convert.
3985 SmallVector<Value*, 4> Indices;
3986 // Prefix an i32 0 since we need the first element.
3987 Indices.push_back(Builder.getInt32(0));
3988 for (unsigned Idx : EV.indices())
3989 Indices.push_back(Builder.getInt32(Idx));
3990
3991 // We need to insert these at the location of the old load, not at that of
3992 // the extractvalue.
3994 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3995 L->getPointerOperand(), Indices);
3997 // Whatever aliasing information we had for the orignal load must also
3998 // hold for the smaller load, so propagate the annotations.
3999 NL->setAAMetadata(L->getAAMetadata());
4000 // Returning the load directly will cause the main loop to insert it in
4001 // the wrong spot, so use replaceInstUsesWith().
4002 return replaceInstUsesWith(EV, NL);
4003 }
4004 }
4005
4006 if (auto *PN = dyn_cast<PHINode>(Agg))
4007 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4008 return Res;
4009
4010 // Canonicalize extract (select Cond, TV, FV)
4011 // -> select cond, (extract TV), (extract FV)
4012 if (auto *SI = dyn_cast<SelectInst>(Agg))
4013 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4014 return R;
4015
4016 // We could simplify extracts from other values. Note that nested extracts may
4017 // already be simplified implicitly by the above: extract (extract (insert) )
4018 // will be translated into extract ( insert ( extract ) ) first and then just
4019 // the value inserted, if appropriate. Similarly for extracts from single-use
4020 // loads: extract (extract (load)) will be translated to extract (load (gep))
4021 // and if again single-use then via load (gep (gep)) to load (gep).
4022 // However, double extracts from e.g. function arguments or return values
4023 // aren't handled yet.
4024 return nullptr;
4025}
4026
4027/// Return 'true' if the given typeinfo will match anything.
4028static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4029 switch (Personality) {
4033 // The GCC C EH and Rust personality only exists to support cleanups, so
4034 // it's not clear what the semantics of catch clauses are.
4035 return false;
4037 return false;
4039 // While __gnat_all_others_value will match any Ada exception, it doesn't
4040 // match foreign exceptions (or didn't, before gcc-4.7).
4041 return false;
4052 return TypeInfo->isNullValue();
4053 }
4054 llvm_unreachable("invalid enum");
4055}
4056
4057static bool shorter_filter(const Value *LHS, const Value *RHS) {
4058 return
4059 cast<ArrayType>(LHS->getType())->getNumElements()
4060 <
4061 cast<ArrayType>(RHS->getType())->getNumElements();
4062}
4063
4065 // The logic here should be correct for any real-world personality function.
4066 // However if that turns out not to be true, the offending logic can always
4067 // be conditioned on the personality function, like the catch-all logic is.
4068 EHPersonality Personality =
4069 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4070
4071 // Simplify the list of clauses, eg by removing repeated catch clauses
4072 // (these are often created by inlining).
4073 bool MakeNewInstruction = false; // If true, recreate using the following:
4074 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4075 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4076
4077 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4078 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4079 bool isLastClause = i + 1 == e;
4080 if (LI.isCatch(i)) {
4081 // A catch clause.
4082 Constant *CatchClause = LI.getClause(i);
4083 Constant *TypeInfo = CatchClause->stripPointerCasts();
4084
4085 // If we already saw this clause, there is no point in having a second
4086 // copy of it.
4087 if (AlreadyCaught.insert(TypeInfo).second) {
4088 // This catch clause was not already seen.
4089 NewClauses.push_back(CatchClause);
4090 } else {
4091 // Repeated catch clause - drop the redundant copy.
4092 MakeNewInstruction = true;
4093 }
4094
4095 // If this is a catch-all then there is no point in keeping any following
4096 // clauses or marking the landingpad as having a cleanup.
4097 if (isCatchAll(Personality, TypeInfo)) {
4098 if (!isLastClause)
4099 MakeNewInstruction = true;
4100 CleanupFlag = false;
4101 break;
4102 }
4103 } else {
4104 // A filter clause. If any of the filter elements were already caught
4105 // then they can be dropped from the filter. It is tempting to try to
4106 // exploit the filter further by saying that any typeinfo that does not
4107 // occur in the filter can't be caught later (and thus can be dropped).
4108 // However this would be wrong, since typeinfos can match without being
4109 // equal (for example if one represents a C++ class, and the other some
4110 // class derived from it).
4111 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4112 Constant *FilterClause = LI.getClause(i);
4113 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4114 unsigned NumTypeInfos = FilterType->getNumElements();
4115
4116 // An empty filter catches everything, so there is no point in keeping any
4117 // following clauses or marking the landingpad as having a cleanup. By
4118 // dealing with this case here the following code is made a bit simpler.
4119 if (!NumTypeInfos) {
4120 NewClauses.push_back(FilterClause);
4121 if (!isLastClause)
4122 MakeNewInstruction = true;
4123 CleanupFlag = false;
4124 break;
4125 }
4126
4127 bool MakeNewFilter = false; // If true, make a new filter.
4128 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4129 if (isa<ConstantAggregateZero>(FilterClause)) {
4130 // Not an empty filter - it contains at least one null typeinfo.
4131 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4132 Constant *TypeInfo =
4134 // If this typeinfo is a catch-all then the filter can never match.
4135 if (isCatchAll(Personality, TypeInfo)) {
4136 // Throw the filter away.
4137 MakeNewInstruction = true;
4138 continue;
4139 }
4140
4141 // There is no point in having multiple copies of this typeinfo, so
4142 // discard all but the first copy if there is more than one.
4143 NewFilterElts.push_back(TypeInfo);
4144 if (NumTypeInfos > 1)
4145 MakeNewFilter = true;
4146 } else {
4147 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4148 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4149 NewFilterElts.reserve(NumTypeInfos);
4150
4151 // Remove any filter elements that were already caught or that already
4152 // occurred in the filter. While there, see if any of the elements are
4153 // catch-alls. If so, the filter can be discarded.
4154 bool SawCatchAll = false;
4155 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4156 Constant *Elt = Filter->getOperand(j);
4157 Constant *TypeInfo = Elt->stripPointerCasts();
4158 if (isCatchAll(Personality, TypeInfo)) {
4159 // This element is a catch-all. Bail out, noting this fact.
4160 SawCatchAll = true;
4161 break;
4162 }
4163
4164 // Even if we've seen a type in a catch clause, we don't want to
4165 // remove it from the filter. An unexpected type handler may be
4166 // set up for a call site which throws an exception of the same
4167 // type caught. In order for the exception thrown by the unexpected
4168 // handler to propagate correctly, the filter must be correctly
4169 // described for the call site.
4170 //
4171 // Example:
4172 //
4173 // void unexpected() { throw 1;}
4174 // void foo() throw (int) {
4175 // std::set_unexpected(unexpected);
4176 // try {
4177 // throw 2.0;
4178 // } catch (int i) {}
4179 // }
4180
4181 // There is no point in having multiple copies of the same typeinfo in
4182 // a filter, so only add it if we didn't already.
4183 if (SeenInFilter.insert(TypeInfo).second)
4184 NewFilterElts.push_back(cast<Constant>(Elt));
4185 }
4186 // A filter containing a catch-all cannot match anything by definition.
4187 if (SawCatchAll) {
4188 // Throw the filter away.
4189 MakeNewInstruction = true;
4190 continue;
4191 }
4192
4193 // If we dropped something from the filter, make a new one.
4194 if (NewFilterElts.size() < NumTypeInfos)
4195 MakeNewFilter = true;
4196 }
4197 if (MakeNewFilter) {
4198 FilterType = ArrayType::get(FilterType->getElementType(),
4199 NewFilterElts.size());
4200 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
4201 MakeNewInstruction = true;
4202 }
4203
4204 NewClauses.push_back(FilterClause);
4205
4206 // If the new filter is empty then it will catch everything so there is
4207 // no point in keeping any following clauses or marking the landingpad
4208 // as having a cleanup. The case of the original filter being empty was
4209 // already handled above.
4210 if (MakeNewFilter && !NewFilterElts.size()) {
4211 assert(MakeNewInstruction && "New filter but not a new instruction!");
4212 CleanupFlag = false;
4213 break;
4214 }
4215 }
4216 }
4217
4218 // If several filters occur in a row then reorder them so that the shortest
4219 // filters come first (those with the smallest number of elements). This is
4220 // advantageous because shorter filters are more likely to match, speeding up
4221 // unwinding, but mostly because it increases the effectiveness of the other
4222 // filter optimizations below.
4223 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4224 unsigned j;
4225 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4226 for (j = i; j != e; ++j)
4227 if (!isa<ArrayType>(NewClauses[j]->getType()))
4228 break;
4229
4230 // Check whether the filters are already sorted by length. We need to know
4231 // if sorting them is actually going to do anything so that we only make a
4232 // new landingpad instruction if it does.
4233 for (unsigned k = i; k + 1 < j; ++k)
4234 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
4235 // Not sorted, so sort the filters now. Doing an unstable sort would be
4236 // correct too but reordering filters pointlessly might confuse users.
4237 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
4239 MakeNewInstruction = true;
4240 break;
4241 }
4242
4243 // Look for the next batch of filters.
4244 i = j + 1;
4245 }
4246
4247 // If typeinfos matched if and only if equal, then the elements of a filter L
4248 // that occurs later than a filter F could be replaced by the intersection of
4249 // the elements of F and L. In reality two typeinfos can match without being
4250 // equal (for example if one represents a C++ class, and the other some class
4251 // derived from it) so it would be wrong to perform this transform in general.
4252 // However the transform is correct and useful if F is a subset of L. In that
4253 // case L can be replaced by F, and thus removed altogether since repeating a
4254 // filter is pointless. So here we look at all pairs of filters F and L where
4255 // L follows F in the list of clauses, and remove L if every element of F is
4256 // an element of L. This can occur when inlining C++ functions with exception
4257 // specifications.
4258 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
4259 // Examine each filter in turn.
4260 Value *Filter = NewClauses[i];
4261 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
4262 if (!FTy)
4263 // Not a filter - skip it.
4264 continue;
4265 unsigned FElts = FTy->getNumElements();
4266 // Examine each filter following this one. Doing this backwards means that
4267 // we don't have to worry about filters disappearing under us when removed.
4268 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
4269 Value *LFilter = NewClauses[j];
4270 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
4271 if (!LTy)
4272 // Not a filter - skip it.
4273 continue;
4274 // If Filter is a subset of LFilter, i.e. every element of Filter is also
4275 // an element of LFilter, then discard LFilter.
4276 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
4277 // If Filter is empty then it is a subset of LFilter.
4278 if (!FElts) {
4279 // Discard LFilter.
4280 NewClauses.erase(J);
4281 MakeNewInstruction = true;
4282 // Move on to the next filter.
4283 continue;
4284 }
4285 unsigned LElts = LTy->getNumElements();
4286 // If Filter is longer than LFilter then it cannot be a subset of it.
4287 if (FElts > LElts)
4288 // Move on to the next filter.
4289 continue;
4290 // At this point we know that LFilter has at least one element.
4291 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
4292 // Filter is a subset of LFilter iff Filter contains only zeros (as we
4293 // already know that Filter is not longer than LFilter).
4294 if (isa<ConstantAggregateZero>(Filter)) {
4295 assert(FElts <= LElts && "Should have handled this case earlier!");
4296 // Discard LFilter.
4297 NewClauses.erase(J);
4298 MakeNewInstruction = true;
4299 }
4300 // Move on to the next filter.
4301 continue;
4302 }
4303 ConstantArray *LArray = cast<ConstantArray>(LFilter);
4304 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
4305 // Since Filter is non-empty and contains only zeros, it is a subset of
4306 // LFilter iff LFilter contains a zero.
4307 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
4308 for (unsigned l = 0; l != LElts; ++l)
4309 if (LArray->getOperand(l)->isNullValue()) {
4310 // LFilter contains a zero - discard it.
4311 NewClauses.erase(J);
4312 MakeNewInstruction = true;
4313 break;
4314 }
4315 // Move on to the next filter.
4316 continue;
4317 }
4318 // At this point we know that both filters are ConstantArrays. Loop over
4319 // operands to see whether every element of Filter is also an element of
4320 // LFilter. Since filters tend to be short this is probably faster than
4321 // using a method that scales nicely.
4322 ConstantArray *FArray = cast<ConstantArray>(Filter);
4323 bool AllFound = true;
4324 for (unsigned f = 0; f != FElts; ++f) {
4325 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
4326 AllFound = false;
4327 for (unsigned l = 0; l != LElts; ++l) {
4328 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
4329 if (LTypeInfo == FTypeInfo) {
4330 AllFound = true;
4331 break;
4332 }
4333 }
4334 if (!AllFound)
4335 break;
4336 }
4337 if (AllFound) {
4338 // Discard LFilter.
4339 NewClauses.erase(J);
4340 MakeNewInstruction = true;
4341 }
4342 // Move on to the next filter.
4343 }
4344 }
4345
4346 // If we changed any of the clauses, replace the old landingpad instruction
4347 // with a new one.
4348 if (MakeNewInstruction) {
4349 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
4350 NewClauses.size());
4351 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
4352 NLI->addClause(NewClauses[i]);
4353 // A landing pad with no clauses must have the cleanup flag set. It is
4354 // theoretically possible, though highly unlikely, that we eliminated all
4355 // clauses. If so, force the cleanup flag to true.
4356 if (NewClauses.empty())
4357 CleanupFlag = true;
4358 NLI->setCleanup(CleanupFlag);
4359 return NLI;
4360 }
4361
4362 // Even if none of the clauses changed, we may nonetheless have understood
4363 // that the cleanup flag is pointless. Clear it if so.
4364 if (LI.isCleanup() != CleanupFlag) {
4365 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
4366 LI.setCleanup(CleanupFlag);
4367 return &LI;
4368 }
4369
4370 return nullptr;
4371}
4372
4373Value *
4375 // Try to push freeze through instructions that propagate but don't produce
4376 // poison as far as possible. If an operand of freeze follows three
4377 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
4378 // guaranteed-non-poison operands then push the freeze through to the one
4379 // operand that is not guaranteed non-poison. The actual transform is as
4380 // follows.
4381 // Op1 = ... ; Op1 can be posion
4382 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
4383 // ; single guaranteed-non-poison operands
4384 // ... = Freeze(Op0)
4385 // =>
4386 // Op1 = ...
4387 // Op1.fr = Freeze(Op1)
4388 // ... = Inst(Op1.fr, NonPoisonOps...)
4389 auto *OrigOp = OrigFI.getOperand(0);
4390 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
4391
4392 // While we could change the other users of OrigOp to use freeze(OrigOp), that
4393 // potentially reduces their optimization potential, so let's only do this iff
4394 // the OrigOp is only used by the freeze.
4395 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
4396 return nullptr;
4397
4398 // We can't push the freeze through an instruction which can itself create
4399 // poison. If the only source of new poison is flags, we can simply
4400 // strip them (since we know the only use is the freeze and nothing can
4401 // benefit from them.)
4402 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
4403 /*ConsiderFlagsAndMetadata*/ false))
4404 return nullptr;
4405
4406 // If operand is guaranteed not to be poison, there is no need to add freeze
4407 // to the operand. So we first find the operand that is not guaranteed to be
4408 // poison.
4409 Use *MaybePoisonOperand = nullptr;
4410 for (Use &U : OrigOpInst->operands()) {
4411 if (isa<MetadataAsValue>(U.get()) ||
4413 continue;
4414 if (!MaybePoisonOperand)
4415 MaybePoisonOperand = &U;
4416 else
4417 return nullptr;
4418 }
4419
4420 OrigOpInst->dropPoisonGeneratingAnnotations();
4421
4422 // If all operands are guaranteed to be non-poison, we can drop freeze.
4423 if (!MaybePoisonOperand)
4424 return OrigOp;
4425
4426 Builder.SetInsertPoint(OrigOpInst);
4427 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
4428 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
4429
4430 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
4431 return OrigOp;
4432}
4433
4435 PHINode *PN) {
4436 // Detect whether this is a recurrence with a start value and some number of
4437 // backedge values. We'll check whether we can push the freeze through the
4438 // backedge values (possibly dropping poison flags along the way) until we
4439 // reach the phi again. In that case, we can move the freeze to the start
4440 // value.
4441 Use *StartU = nullptr;
4443 for (Use &U : PN->incoming_values()) {
4444 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
4445 // Add backedge value to worklist.
4446 Worklist.push_back(U.get());
4447 continue;
4448 }
4449
4450 // Don't bother handling multiple start values.
4451 if (StartU)
4452 return nullptr;
4453 StartU = &U;
4454 }
4455
4456 if (!StartU || Worklist.empty())
4457 return nullptr; // Not a recurrence.
4458
4459 Value *StartV = StartU->get();
4460 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
4461 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
4462 // We can't insert freeze if the start value is the result of the
4463 // terminator (e.g. an invoke).
4464 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
4465 return nullptr;
4466
4469 while (!Worklist.empty()) {
4470 Value *V = Worklist.pop_back_val();
4471 if (!Visited.insert(V).second)
4472 continue;
4473
4474 if (Visited.size() > 32)
4475 return nullptr; // Limit the total number of values we inspect.
4476
4477 // Assume that PN is non-poison, because it will be after the transform.
4478 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
4479 continue;
4480
4481 Instruction *I = dyn_cast<Instruction>(V);
4482 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
4483 /*ConsiderFlagsAndMetadata*/ false))
4484 return nullptr;
4485
4486 DropFlags.push_back(I);
4487 append_range(Worklist, I->operands());
4488 }
4489
4490 for (Instruction *I : DropFlags)
4491 I->dropPoisonGeneratingAnnotations();
4492
4493 if (StartNeedsFreeze) {
4495 Value *FrozenStartV = Builder.CreateFreeze(StartV,
4496 StartV->getName() + ".fr");
4497 replaceUse(*StartU, FrozenStartV);
4498 }
4499 return replaceInstUsesWith(FI, PN);
4500}
4501
4503 Value *Op = FI.getOperand(0);
4504
4505 if (isa<Constant>(Op) || Op->hasOneUse())
4506 return false;
4507
4508 // Move the freeze directly after the definition of its operand, so that
4509 // it dominates the maximum number of uses. Note that it may not dominate
4510 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
4511 // the normal/default destination. This is why the domination check in the
4512 // replacement below is still necessary.
4513 BasicBlock::iterator MoveBefore;
4514 if (isa<Argument>(Op)) {
4515 MoveBefore =
4517 } else {
4518 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
4519 if (!MoveBeforeOpt)
4520 return false;
4521 MoveBefore = *MoveBeforeOpt;
4522 }
4523
4524 // Don't move to the position of a debug intrinsic.
4525 if (isa<DbgInfoIntrinsic>(MoveBefore))
4526 MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator();
4527 // Re-point iterator to come after any debug-info records, if we're
4528 // running in "RemoveDIs" mode
4529 MoveBefore.setHeadBit(false);
4530
4531 bool Changed = false;
4532 if (&FI != &*MoveBefore) {
4533 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
4534 Changed = true;
4535 }
4536
4537 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
4538 bool Dominates = DT.dominates(&FI, U);
4539 Changed |= Dominates;
4540 return Dominates;
4541 });
4542
4543 return Changed;
4544}
4545
4546// Check if any direct or bitcast user of this value is a shuffle instruction.
4548 for (auto *U : V->users()) {
4549 if (isa<ShuffleVectorInst>(U))
4550 return true;
4551 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
4552 return true;
4553 }
4554 return false;
4555}
4556
4558 Value *Op0 = I.getOperand(0);
4559
4561 return replaceInstUsesWith(I, V);
4562
4563 // freeze (phi const, x) --> phi const, (freeze x)
4564 if (auto *PN = dyn_cast<PHINode>(Op0)) {
4565 if (Instruction *NV = foldOpIntoPhi(I, PN))
4566 return NV;
4567 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
4568 return NV;
4569 }
4570
4572 return replaceInstUsesWith(I, NI);
4573
4574 // If I is freeze(undef), check its uses and fold it to a fixed constant.
4575 // - or: pick -1
4576 // - select's condition: if the true value is constant, choose it by making
4577 // the condition true.
4578 // - default: pick 0
4579 //
4580 // Note that this transform is intentionally done here rather than
4581 // via an analysis in InstSimplify or at individual user sites. That is
4582 // because we must produce the same value for all uses of the freeze -
4583 // it's the reason "freeze" exists!
4584 //
4585 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
4586 // duplicating logic for binops at least.
4587 auto getUndefReplacement = [&I](Type *Ty) {
4588 Constant *BestValue = nullptr;
4589 Constant *NullValue = Constant::getNullValue(Ty);
4590 for (const auto *U : I.users()) {
4591 Constant *C = NullValue;
4592 if (match(U, m_Or(m_Value(), m_Value())))
4594 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4595 C = ConstantInt::getTrue(Ty);
4596
4597 if (!BestValue)
4598 BestValue = C;
4599 else if (BestValue != C)
4600 BestValue = NullValue;
4601 }
4602 assert(BestValue && "Must have at least one use");
4603 return BestValue;
4604 };
4605
4606 if (match(Op0, m_Undef())) {
4607 // Don't fold freeze(undef/poison) if it's used as a vector operand in
4608 // a shuffle. This may improve codegen for shuffles that allow
4609 // unspecified inputs.
4611 return nullptr;
4612 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4613 }
4614
4615 Constant *C;
4616 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4617 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4619 }
4620
4621 // Replace uses of Op with freeze(Op).
4622 if (freezeOtherUses(I))
4623 return &I;
4624
4625 return nullptr;
4626}
4627
4628/// Check for case where the call writes to an otherwise dead alloca. This
4629/// shows up for unused out-params in idiomatic C/C++ code. Note that this
4630/// helper *only* analyzes the write; doesn't check any other legality aspect.
4632 auto *CB = dyn_cast<CallBase>(I);
4633 if (!CB)
4634 // TODO: handle e.g. store to alloca here - only worth doing if we extend
4635 // to allow reload along used path as described below. Otherwise, this
4636 // is simply a store to a dead allocation which will be removed.
4637 return false;
4638 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4639 if (!Dest)
4640 return false;
4641 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4642 if (!AI)
4643 // TODO: allow malloc?
4644 return false;
4645 // TODO: allow memory access dominated by move point? Note that since AI
4646 // could have a reference to itself captured by the call, we would need to
4647 // account for cycles in doing so.
4648 SmallVector<const User *> AllocaUsers;
4650 auto pushUsers = [&](const Instruction &I) {
4651 for (const User *U : I.users()) {
4652 if (Visited.insert(U).second)
4653 AllocaUsers.push_back(U);
4654 }
4655 };
4656 pushUsers(*AI);
4657 while (!AllocaUsers.empty()) {
4658 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4659 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4660 isa<AddrSpaceCastInst>(UserI)) {
4661 pushUsers(*UserI);
4662 continue;
4663 }
4664 if (UserI == CB)
4665 continue;
4666 // TODO: support lifetime.start/end here
4667 return false;
4668 }
4669 return true;
4670}
4671
4672/// Try to move the specified instruction from its current block into the
4673/// beginning of DestBlock, which can only happen if it's safe to move the
4674/// instruction past all of the instructions between it and the end of its
4675/// block.
4677 BasicBlock *DestBlock) {
4678 BasicBlock *SrcBlock = I->getParent();
4679
4680 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4681 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4682 I->isTerminator())
4683 return false;
4684
4685 // Do not sink static or dynamic alloca instructions. Static allocas must
4686 // remain in the entry block, and dynamic allocas must not be sunk in between
4687 // a stacksave / stackrestore pair, which would incorrectly shorten its
4688 // lifetime.
4689 if (isa<AllocaInst>(I))
4690 return false;
4691
4692 // Do not sink into catchswitch blocks.
4693 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4694 return false;
4695
4696 // Do not sink convergent call instructions.
4697 if (auto *CI = dyn_cast<CallInst>(I)) {
4698 if (CI->isConvergent())
4699 return false;
4700 }
4701
4702 // Unless we can prove that the memory write isn't visibile except on the
4703 // path we're sinking to, we must bail.
4704 if (I->mayWriteToMemory()) {
4705 if (!SoleWriteToDeadLocal(I, TLI))
4706 return false;
4707 }
4708
4709 // We can only sink load instructions if there is nothing between the load and
4710 // the end of block that could change the value.
4711 if (I->mayReadFromMemory()) {
4712 // We don't want to do any sophisticated alias analysis, so we only check
4713 // the instructions after I in I's parent block if we try to sink to its
4714 // successor block.
4715 if (DestBlock->getUniquePredecessor() != I->getParent())
4716 return false;
4717 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4718 E = I->getParent()->end();
4719 Scan != E; ++Scan)
4720 if (Scan->mayWriteToMemory())
4721 return false;
4722 }
4723
4724 I->dropDroppableUses([&](const Use *U) {
4725 auto *I = dyn_cast<Instruction>(U->getUser());
4726 if (I && I->getParent() != DestBlock) {
4727 Worklist.add(I);
4728 return true;
4729 }
4730 return false;
4731 });
4732 /// FIXME: We could remove droppable uses that are not dominated by
4733 /// the new position.
4734
4735 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4736 I->moveBefore(*DestBlock, InsertPos);
4737 ++NumSunkInst;
4738
4739 // Also sink all related debug uses from the source basic block. Otherwise we
4740 // get debug use before the def. Attempt to salvage debug uses first, to
4741 // maximise the range variables have location for. If we cannot salvage, then
4742 // mark the location undef: we know it was supposed to receive a new location
4743 // here, but that computation has been sunk.
4745 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
4746 findDbgUsers(DbgUsers, I, &DbgVariableRecords);
4747 if (!DbgUsers.empty())
4748 tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
4749 if (!DbgVariableRecords.empty())
4750 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
4751 DbgVariableRecords);
4752
4753 // PS: there are numerous flaws with this behaviour, not least that right now
4754 // assignments can be re-ordered past other assignments to the same variable
4755 // if they use different Values. Creating more undef assignements can never be
4756 // undone. And salvaging all users outside of this block can un-necessarily
4757 // alter the lifetime of the live-value that the variable refers to.
4758 // Some of these things can be resolved by tolerating debug use-before-defs in
4759 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
4760 // being used for more architectures.
4761
4762 return true;
4763}
4764
4766 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4768 // For all debug values in the destination block, the sunk instruction
4769 // will still be available, so they do not need to be dropped.
4771 for (auto &DbgUser : DbgUsers)
4772 if (DbgUser->getParent() != DestBlock)
4773 DbgUsersToSalvage.push_back(DbgUser);
4774
4775 // Process the sinking DbgUsersToSalvage in reverse order, as we only want
4776 // to clone the last appearing debug intrinsic for each given variable.
4778 for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
4779 if (DVI->getParent() == SrcBlock)
4780 DbgUsersToSink.push_back(DVI);
4781 llvm::sort(DbgUsersToSink,
4782 [](auto *A, auto *B) { return B->comesBefore(A); });
4783
4785 SmallSet<DebugVariable, 4> SunkVariables;
4786 for (auto *User : DbgUsersToSink) {
4787 // A dbg.declare instruction should not be cloned, since there can only be
4788 // one per variable fragment. It should be left in the original place
4789 // because the sunk instruction is not an alloca (otherwise we could not be
4790 // here).
4791 if (isa<DbgDeclareInst>(User))
4792 continue;
4793
4794 DebugVariable DbgUserVariable =
4795 DebugVariable(User->getVariable(), User->getExpression(),
4796 User->getDebugLoc()->getInlinedAt());
4797
4798 if (!SunkVariables.insert(DbgUserVariable).second)
4799 continue;
4800
4801 // Leave dbg.assign intrinsics in their original positions and there should
4802 // be no need to insert a clone.
4803 if (isa<DbgAssignIntrinsic>(User))
4804 continue;
4805
4806 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4807 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4808 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4809 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4810 }
4811
4812 // Perform salvaging without the clones, then sink the clones.
4813 if (!DIIClones.empty()) {
4814 salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
4815 // The clones are in reverse order of original appearance, reverse again to
4816 // maintain the original order.
4817 for (auto &DIIClone : llvm::reverse(DIIClones)) {
4818 DIIClone->insertBefore(&*InsertPos);
4819 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4820 }
4821 }
4822}
4823
4825 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
4826 BasicBlock *DestBlock,
4827 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
4828 // Implementation of tryToSinkInstructionDbgValues, but for the
4829 // DbgVariableRecord of variable assignments rather than dbg.values.
4830
4831 // Fetch all DbgVariableRecords not already in the destination.
4832 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
4833 for (auto &DVR : DbgVariableRecords)
4834 if (DVR->getParent() != DestBlock)
4835 DbgVariableRecordsToSalvage.push_back(DVR);
4836
4837 // Fetch a second collection, of DbgVariableRecords in the source block that
4838 // we're going to sink.
4839 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
4840 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
4841 if (DVR->getParent() == SrcBlock)
4842 DbgVariableRecordsToSink.push_back(DVR);
4843
4844 // Sort DbgVariableRecords according to their position in the block. This is a
4845 // partial order: DbgVariableRecords attached to different instructions will
4846 // be ordered by the instruction order, but DbgVariableRecords attached to the
4847 // same instruction won't have an order.
4848 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
4849 return B->getInstruction()->comesBefore(A->getInstruction());
4850 };
4851 llvm::stable_sort(DbgVariableRecordsToSink, Order);
4852
4853 // If there are two assignments to the same variable attached to the same
4854 // instruction, the ordering between the two assignments is important. Scan
4855 // for this (rare) case and establish which is the last assignment.
4856 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
4858 if (DbgVariableRecordsToSink.size() > 1) {
4860 // Count how many assignments to each variable there is per instruction.
4861 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4862 DebugVariable DbgUserVariable =
4863 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4864 DVR->getDebugLoc()->getInlinedAt());
4865 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
4866 }
4867
4868 // If there are any instructions with two assignments, add them to the
4869 // FilterOutMap to record that they need extra filtering.
4871 for (auto It : CountMap) {
4872 if (It.second > 1) {
4873 FilterOutMap[It.first] = nullptr;
4874 DupSet.insert(It.first.first);
4875 }
4876 }
4877
4878 // For all instruction/variable pairs needing extra filtering, find the
4879 // latest assignment.
4880 for (const Instruction *Inst : DupSet) {
4881 for (DbgVariableRecord &DVR :
4882 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
4883 DebugVariable DbgUserVariable =
4884 DebugVariable(DVR.getVariable(), DVR.getExpression(),
4885 DVR.getDebugLoc()->getInlinedAt());
4886 auto FilterIt =
4887 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
4888 if (FilterIt == FilterOutMap.end())
4889 continue;
4890 if (FilterIt->second != nullptr)
4891 continue;
4892 FilterIt->second = &DVR;
4893 }
4894 }
4895 }
4896
4897 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
4898 // out any duplicate assignments identified above.
4900 SmallSet<DebugVariable, 4> SunkVariables;
4901 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
4903 continue;
4904
4905 DebugVariable DbgUserVariable =
4906 DebugVariable(DVR->getVariable(), DVR->getExpression(),
4907 DVR->getDebugLoc()->getInlinedAt());
4908
4909 // For any variable where there were multiple assignments in the same place,
4910 // ignore all but the last assignment.
4911 if (!FilterOutMap.empty()) {
4912 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
4913 auto It = FilterOutMap.find(IVP);
4914
4915 // Filter out.
4916 if (It != FilterOutMap.end() && It->second != DVR)
4917 continue;
4918 }
4919
4920 if (!SunkVariables.insert(DbgUserVariable).second)
4921 continue;
4922
4923 if (DVR->isDbgAssign())
4924 continue;
4925
4926 DVRClones.emplace_back(DVR->clone());
4927 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
4928 }
4929
4930 // Perform salvaging without the clones, then sink the clones.
4931 if (DVRClones.empty())
4932 return;
4933
4934 salvageDebugInfoForDbgValues(*I, {}, DbgVariableRecordsToSalvage);
4935
4936 // The clones are in reverse order of original appearance. Assert that the
4937 // head bit is set on the iterator as we _should_ have received it via
4938 // getFirstInsertionPt. Inserting like this will reverse the clone order as
4939 // we'll repeatedly insert at the head, such as:
4940 // DVR-3 (third insertion goes here)
4941 // DVR-2 (second insertion goes here)
4942 // DVR-1 (first insertion goes here)
4943 // Any-Prior-DVRs
4944 // InsertPtInst
4945 assert(InsertPos.getHeadBit());
4946 for (DbgVariableRecord *DVRClone : DVRClones) {
4947 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
4948 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
4949 }
4950}
4951
4953 while (!Worklist.isEmpty()) {
4954 // Walk deferred instructions in reverse order, and push them to the
4955 // worklist, which means they'll end up popped from the worklist in-order.
4956 while (Instruction *I = Worklist.popDeferred()) {
4957 // Check to see if we can DCE the instruction. We do this already here to
4958 // reduce the number of uses and thus allow other folds to trigger.
4959 // Note that eraseInstFromFunction() may push additional instructions on
4960 // the deferred worklist, so this will DCE whole instruction chains.
4963 ++NumDeadInst;
4964 continue;
4965 }
4966
4967 Worklist.push(I);
4968 }
4969
4971 if (I == nullptr) continue; // skip null values.
4972
4973 // Check to see if we can DCE the instruction.
4976 ++NumDeadInst;
4977 continue;
4978 }
4979
4980 if (!DebugCounter::shouldExecute(VisitCounter))
4981 continue;
4982
4983 // See if we can trivially sink this instruction to its user if we can
4984 // prove that the successor is not executed more frequently than our block.
4985 // Return the UserBlock if successful.
4986 auto getOptionalSinkBlockForInst =
4987 [this](Instruction *I) -> std::optional<BasicBlock *> {
4988 if (!EnableCodeSinking)
4989 return std::nullopt;
4990
4991 BasicBlock *BB = I->getParent();
4992 BasicBlock *UserParent = nullptr;
4993 unsigned NumUsers = 0;
4994
4995 for (auto *U : I->users()) {
4996 if (U->isDroppable())
4997 continue;
4998 if (NumUsers > MaxSinkNumUsers)
4999 return std::nullopt;
5000
5001 Instruction *UserInst = cast<Instruction>(U);
5002 // Special handling for Phi nodes - get the block the use occurs in.
5003 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
5004 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
5005 if (PN->getIncomingValue(i) == I) {
5006 // Bail out if we have uses in different blocks. We don't do any
5007 // sophisticated analysis (i.e finding NearestCommonDominator of
5008 // these use blocks).
5009 if (UserParent && UserParent != PN->getIncomingBlock(i))
5010 return std::nullopt;
5011 UserParent = PN->getIncomingBlock(i);
5012 }
5013 }
5014 assert(UserParent && "expected to find user block!");
5015 } else {
5016 if (UserParent && UserParent != UserInst->getParent())
5017 return std::nullopt;
5018 UserParent = UserInst->getParent();
5019 }
5020
5021 // Make sure these checks are done only once, naturally we do the checks
5022 // the first time we get the userparent, this will save compile time.
5023 if (NumUsers == 0) {
5024 // Try sinking to another block. If that block is unreachable, then do
5025 // not bother. SimplifyCFG should handle it.
5026 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5027 return std::nullopt;
5028
5029 auto *Term = UserParent->getTerminator();
5030 // See if the user is one of our successors that has only one
5031 // predecessor, so that we don't have to split the critical edge.
5032 // Another option where we can sink is a block that ends with a
5033 // terminator that does not pass control to other block (such as
5034 // return or unreachable or resume). In this case:
5035 // - I dominates the User (by SSA form);
5036 // - the User will be executed at most once.
5037 // So sinking I down to User is always profitable or neutral.
5038 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5039 return std::nullopt;
5040
5041 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5042 }
5043
5044 NumUsers++;
5045 }
5046
5047 // No user or only has droppable users.
5048 if (!UserParent)
5049 return std::nullopt;
5050
5051 return UserParent;
5052 };
5053
5054 auto OptBB = getOptionalSinkBlockForInst(I);
5055 if (OptBB) {
5056 auto *UserParent = *OptBB;
5057 // Okay, the CFG is simple enough, try to sink this instruction.
5058 if (tryToSinkInstruction(I, UserParent)) {
5059 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5060 MadeIRChange = true;
5061 // We'll add uses of the sunk instruction below, but since
5062 // sinking can expose opportunities for it's *operands* add
5063 // them to the worklist
5064 for (Use &U : I->operands())
5065 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5066 Worklist.push(OpI);
5067 }
5068 }
5069
5070 // Now that we have an instruction, try combining it to simplify it.
5073 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5074
5075#ifndef NDEBUG
5076 std::string OrigI;
5077#endif
5078 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5079 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5080
5081 if (Instruction *Result = visit(*I)) {
5082 ++NumCombined;
5083 // Should we replace the old instruction with a new one?
5084 if (Result != I) {
5085 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5086 << " New = " << *Result << '\n');
5087
5088 Result->copyMetadata(*I,
5089 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5090 // Everything uses the new instruction now.
5091 I->replaceAllUsesWith(Result);
5092
5093 // Move the name to the new instruction first.
5094 Result->takeName(I);
5095
5096 // Insert the new instruction into the basic block...
5097 BasicBlock *InstParent = I->getParent();
5098 BasicBlock::iterator InsertPos = I->getIterator();
5099
5100 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5101 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5102 // We need to fix up the insertion point.
5103 if (isa<PHINode>(I)) // PHI -> Non-PHI
5104 InsertPos = InstParent->getFirstInsertionPt();
5105 else // Non-PHI -> PHI
5106 InsertPos = InstParent->getFirstNonPHIIt();
5107 }
5108
5109 Result->insertInto(InstParent, InsertPos);
5110
5111 // Push the new instruction and any users onto the worklist.
5113 Worklist.push(Result);
5114
5116 } else {
5117 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5118 << " New = " << *I << '\n');
5119
5120 // If the instruction was modified, it's possible that it is now dead.
5121 // if so, remove it.
5124 } else {
5126 Worklist.push(I);
5127 }
5128 }
5129 MadeIRChange = true;
5130 }
5131 }
5132
5133 Worklist.zap();
5134 return MadeIRChange;
5135}
5136
5137// Track the scopes used by !alias.scope and !noalias. In a function, a
5138// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5139// by both sets. If not, the declaration of the scope can be safely omitted.
5140// The MDNode of the scope can be omitted as well for the instructions that are
5141// part of this function. We do not do that at this point, as this might become
5142// too time consuming to do.
5144 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5145 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5146
5147public:
5149 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5150 if (!I->hasMetadataOtherThanDebugLoc())
5151 return;
5152
5153 auto Track = [](Metadata *ScopeList, auto &Container) {
5154 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5155 if (!MDScopeList || !Container.insert(MDScopeList).second)
5156 return;
5157 for (const auto &MDOperand : MDScopeList->operands())
5158 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5159 Container.insert(MDScope);
5160 };
5161
5162 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5163 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5164 }
5165
5167 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
5168 if (!Decl)
5169 return false;
5170
5171 assert(Decl->use_empty() &&
5172 "llvm.experimental.noalias.scope.decl in use ?");
5173 const MDNode *MDSL = Decl->getScopeList();
5174 assert(MDSL->getNumOperands() == 1 &&
5175 "llvm.experimental.noalias.scope should refer to a single scope");
5176 auto &MDOperand = MDSL->getOperand(0);
5177 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5178 return !UsedAliasScopesAndLists.contains(MD) ||
5179 !UsedNoAliasScopesAndLists.contains(MD);
5180
5181 // Not an MDNode ? throw away.
5182 return true;
5183 }
5184};
5185
5186/// Populate the IC worklist from a function, by walking it in reverse
5187/// post-order and adding all reachable code to the worklist.
5188///
5189/// This has a couple of tricks to make the code faster and more powerful. In
5190/// particular, we constant fold and DCE instructions as we go, to avoid adding
5191/// them to the worklist (this significantly speeds up instcombine on code where
5192/// many instructions are dead or constant). Additionally, if we find a branch
5193/// whose condition is a known constant, we only visit the reachable successors.
5196 bool MadeIRChange = false;
5198 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5199 DenseMap<Constant *, Constant *> FoldedConstants;
5200 AliasScopeTracker SeenAliasScopes;
5201
5202 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5203 for (BasicBlock *Succ : successors(BB))
5204 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
5205 for (PHINode &PN : Succ->phis())
5206 for (Use &U : PN.incoming_values())
5207 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
5208 U.set(PoisonValue::get(PN.getType()));
5209 MadeIRChange = true;
5210 }
5211 };
5212
5213 for (BasicBlock *BB : RPOT) {
5214 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
5215 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
5216 })) {
5217 HandleOnlyLiveSuccessor(BB, nullptr);
5218 continue;
5219 }
5220 LiveBlocks.insert(BB);
5221
5222 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
5223 // ConstantProp instruction if trivially constant.
5224 if (!Inst.use_empty() &&
5225 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
5226 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
5227 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5228 << '\n');
5229 Inst.replaceAllUsesWith(C);
5230 ++NumConstProp;
5231 if (isInstructionTriviallyDead(&Inst, &TLI))
5232 Inst.eraseFromParent();
5233 MadeIRChange = true;
5234 continue;
5235 }
5236
5237 // See if we can constant fold its operands.
5238 for (Use &U : Inst.operands()) {
5239 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
5240 continue;
5241
5242 auto *C = cast<Constant>(U);
5243 Constant *&FoldRes = FoldedConstants[C];
5244 if (!FoldRes)
5245 FoldRes = ConstantFoldConstant(C, DL, &TLI);
5246
5247 if (FoldRes != C) {
5248 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
5249 << "\n Old = " << *C
5250 << "\n New = " << *FoldRes << '\n');
5251 U = FoldRes;
5252 MadeIRChange = true;
5253 }
5254 }
5255
5256 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
5257 // these call instructions consumes non-trivial amount of time and
5258 // provides no value for the optimization.
5259 if (!Inst.isDebugOrPseudoInst()) {
5260 InstrsForInstructionWorklist.push_back(&Inst);
5261 SeenAliasScopes.analyse(&Inst);
5262 }
5263 }
5264
5265 // If this is a branch or switch on a constant, mark only the single
5266 // live successor. Otherwise assume all successors are live.
5267 Instruction *TI = BB->getTerminator();
5268 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
5269 if (isa<UndefValue>(BI->getCondition())) {
5270 // Branch on undef is UB.
5271 HandleOnlyLiveSuccessor(BB, nullptr);
5272 continue;
5273 }
5274 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
5275 bool CondVal = Cond->getZExtValue();
5276 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
5277 continue;
5278 }
5279 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5280 if (isa<UndefValue>(SI->getCondition())) {
5281 // Switch on undef is UB.
5282 HandleOnlyLiveSuccessor(BB, nullptr);
5283 continue;
5284 }
5285 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5286 HandleOnlyLiveSuccessor(BB,
5287 SI->findCaseValue(Cond)->getCaseSuccessor());
5288 continue;
5289 }
5290 }
5291 }
5292
5293 // Remove instructions inside unreachable blocks. This prevents the
5294 // instcombine code from having to deal with some bad special cases, and
5295 // reduces use counts of instructions.
5296 for (BasicBlock &BB : F) {
5297 if (LiveBlocks.count(&BB))
5298 continue;
5299
5300 unsigned NumDeadInstInBB;
5301 unsigned NumDeadDbgInstInBB;
5302 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
5304
5305 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
5306 NumDeadInst += NumDeadInstInBB;
5307 }
5308
5309 // Once we've found all of the instructions to add to instcombine's worklist,
5310 // add them in reverse order. This way instcombine will visit from the top
5311 // of the function down. This jives well with the way that it adds all uses
5312 // of instructions to the worklist after doing a transformation, thus avoiding
5313 // some N^2 behavior in pathological cases.
5314 Worklist.reserve(InstrsForInstructionWorklist.size());
5315 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
5316 // DCE instruction if trivially dead. As we iterate in reverse program
5317 // order here, we will clean up whole chains of dead instructions.
5318 if (isInstructionTriviallyDead(Inst, &TLI) ||
5319 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
5320 ++NumDeadInst;
5321 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
5322 salvageDebugInfo(*Inst);
5323 Inst->eraseFromParent();
5324 MadeIRChange = true;
5325 continue;
5326 }
5327
5328 Worklist.push(Inst);
5329 }
5330
5331 return MadeIRChange;
5332}
5333
5339 const InstCombineOptions &Opts) {
5340 auto &DL = F.getParent()->getDataLayout();
5341
5342 /// Builder - This is an IRBuilder that automatically inserts new
5343 /// instructions into the worklist when they are created.
5345 F.getContext(), TargetFolder(DL),
5346 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
5347 Worklist.add(I);
5348 if (auto *Assume = dyn_cast<AssumeInst>(I))
5349 AC.registerAssumption(Assume);
5350 }));
5351
5353
5354 // Lower dbg.declare intrinsics otherwise their value may be clobbered
5355 // by instcombiner.
5356 bool MadeIRChange = false;
5358 MadeIRChange = LowerDbgDeclare(F);
5359
5360 // Iterate while there is work to do.
5361 unsigned Iteration = 0;
5362 while (true) {
5363 ++Iteration;
5364
5365 if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
5366 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
5367 << " on " << F.getName()
5368 << " reached; stopping without verifying fixpoint\n");
5369 break;
5370 }
5371
5372 ++NumWorklistIterations;
5373 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5374 << F.getName() << "\n");
5375
5376 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
5377 ORE, BFI, BPI, PSI, DL, LI);
5379 bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
5380 MadeChangeInThisIteration |= IC.run();
5381 if (!MadeChangeInThisIteration)
5382 break;
5383
5384 MadeIRChange = true;
5385 if (Iteration > Opts.MaxIterations) {
5387 "Instruction Combining did not reach a fixpoint after " +
5388 Twine(Opts.MaxIterations) + " iterations",
5389 /*GenCrashDiag=*/false);
5390 }
5391 }
5392
5393 if (Iteration == 1)
5394 ++NumOneIteration;
5395 else if (Iteration == 2)
5396 ++NumTwoIterations;
5397 else if (Iteration == 3)
5398 ++NumThreeIterations;
5399 else
5400 ++NumFourOrMoreIterations;
5401
5402 return MadeIRChange;
5403}
5404
5406
5408 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
5409 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
5410 OS, MapClassName2PassName);
5411 OS << '<';
5412 OS << "max-iterations=" << Options.MaxIterations << ";";
5413 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;";
5414 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
5415 OS << '>';
5416}
5417
5420 auto &AC = AM.getResult<AssumptionAnalysis>(F);
5421 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
5422 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
5424 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
5425
5426 // TODO: Only use LoopInfo when the option is set. This requires that the
5427 // callers in the pass pipeline explicitly set the option.
5428 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
5429 if (!LI && Options.UseLoopInfo)
5430 LI = &AM.getResult<LoopAnalysis>(F);
5431
5432 auto *AA = &AM.getResult<AAManager>(F);
5433 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
5434 ProfileSummaryInfo *PSI =
5435 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
5436 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
5437 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
5439
5440 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5441 BFI, BPI, PSI, LI, Options))
5442 // No changes, all analyses are preserved.
5443 return PreservedAnalyses::all();
5444
5445 // Mark all the analyses that instcombine updates as preserved.
5448 return PA;
5449}
5450
5452 AU.setPreservesCFG();
5465}
5466
5468 if (skipFunction(F))
5469 return false;
5470
5471 // Required analyses.
5472 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
5473 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
5474 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
5475 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
5476 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5477 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
5478
5479 // Optional analyses.
5480 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
5481 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
5482 ProfileSummaryInfo *PSI =
5483 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
5484 BlockFrequencyInfo *BFI =
5485 (PSI && PSI->hasProfileSummary()) ?
5486 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
5487 nullptr;
5488 BranchProbabilityInfo *BPI = nullptr;
5489 if (auto *WrapperPass =
5490 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
5491 BPI = &WrapperPass->getBPI();
5492
5493 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
5494 BFI, BPI, PSI, LI,
5496}
5497
5499
5502}
5503
5505 "Combine redundant instructions", false, false)
5517
5518// Initialization Routines
5521}
5522
5524 return new InstructionCombiningPass();
5525}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
Expand Atomic instructions
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static bool IsSelect(MachineInstr &MI)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1898
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:805
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1911
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Class to represent array types.
Definition: DerivedTypes.h:371
uint64_t getNumElements() const
Definition: DerivedTypes.h:383
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
Type * getElementType() const
Definition: DerivedTypes.h:384
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:390
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:193
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:367
const Instruction & front() const
Definition: BasicBlock.h:453
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:564
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:423
size_t size() const
Definition: BasicBlock.h:451
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:392
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
void swapSuccEdgesProbabilities(const BasicBlock *Src)
Swap outgoing edges probabilities for Src with branch terminator.
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1823
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:2277
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
ConstantArray - Constant Array Declarations.
Definition: Constants.h:423
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:766
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2542
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2529
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2402
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2535
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2596
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:863
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:507
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:767
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
Definition: DataLayout.cpp:998
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:774
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:905
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:920
This is the common base class for debug info intrinsics for variables.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
void registerBranch(BranchInst *BI)
Add a branch condition to the cache.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:201
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:883
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:420
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:475
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Create an "inbounds" getelementptr.
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:921
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1682
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:502
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr, BasicBlock::iterator InsertBefore)
InstCombinePass(InstCombineOptions Opts={})
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
bool prepareWorklist(Function &F, ReversePostOrderTraversal< BasicBlock * > &RPOT)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Constant * getLosslessTrunc(Constant *C, Type *TruncTy, unsigned ExtOp)
Value * SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask, KnownFPClass &Known, unsigned Depth, Instruction *CxtI)
Attempts to replace V with a simpler value based on the demanded floating-point classes.
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
void tryToSinkInstructionDbgValues(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableIntrinsic * > &DbgUsers)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Instruction * visitBranchInst(BranchInst &BI)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
Definition: InstCombiner.h:157
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
Definition: InstCombiner.h:232
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
Definition: InstCombiner.h:139
TargetLibraryInfo & TLI
Definition: InstCombiner.h:73
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:366
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
Definition: InstCombiner.h:191
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
Definition: InstCombiner.h:418
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
BranchProbabilityInfo * BPI
Definition: InstCombiner.h:79
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DomConditionCache DC
Definition: InstCombiner.h:81
const bool MinimizeSize
Definition: InstCombiner.h:67
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
DominatorTree & DT
Definition: InstCombiner.h:74
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
Definition: InstCombiner.h:90
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
Definition: InstCombiner.h:213
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition: InstCombine.h:71
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void add(Instruction *I)
Add instruction to the worklist.
void push(Instruction *I)
Push the instruction onto the worklist stack.
void zap()
Check that the worklist is empty and nuke the backing store for the map.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:301
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
bool isTerminator() const
Definition: Instruction.h:255
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:306
bool isShift() const
Definition: Instruction.h:259
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isIntDivRem() const
Definition: Instruction.h:258
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, BasicBlock::iterator InsertBefore)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Definition: Instructions.h:184
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:889
This is the common base class for memset/memcpy/memmove.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition: Metadata.h:62
This class represents min/max intrinsics.
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:76
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:109
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:103
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1396
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
This class represents a cast from signed integer to floating point.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This class represents a cast unsigned integer to floating point.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:851
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isZero() const
Definition: TypeSize.h:156
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNoFPClassCompatibleType(Type *Ty)
Returns true if this is a type legal for the 'nofpclass' attribute.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:485
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:933
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:777
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:836
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
Definition: PatternMatch.h:186
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
Definition: PatternMatch.h:521
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CmpClass_match< LHS, RHS, FCmpInst, FCmpInst::Predicate > m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
Definition: PatternMatch.h:305
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:815
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CastInst_match< OpTy, SIToFPInst > m_SIToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
Exact_match< T > m_Exact(const T &SubPattern)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
Definition: PatternMatch.h:752
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:573
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
FunctionPass * createInstructionCombiningPass()
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2801
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic * > Insns, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:2242
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:148
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1652
auto successors(const MachineBasicBlock *BB)
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
gep_type_iterator gep_type_end(const User *GEP)
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition: Local.cpp:2783
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1916
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1691
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2711
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Or
Bitwise or logical OR of integers.
DWARFExpression::Operation Op
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
void initializeInstructionCombiningPassPass(PassRegistry &)
std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:231
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96
SimplifyQuery getWithoutUndef() const