LLVM 17.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117STATISTIC(NumOneIteration, "Number of functions with one iteration");
118STATISTIC(NumTwoIterations, "Number of functions with two iterations");
119STATISTIC(NumThreeIterations, "Number of functions with three iterations");
120STATISTIC(NumFourOrMoreIterations,
121 "Number of functions with four or more iterations");
122
123STATISTIC(NumCombined , "Number of insts combined");
124STATISTIC(NumConstProp, "Number of constant folds");
125STATISTIC(NumDeadInst , "Number of dead inst eliminated");
126STATISTIC(NumSunkInst , "Number of instructions sunk");
127STATISTIC(NumExpand, "Number of expansions");
128STATISTIC(NumFactor , "Number of factorizations");
129STATISTIC(NumReassoc , "Number of reassociations");
130DEBUG_COUNTER(VisitCounter, "instcombine-visit",
131 "Controls which instructions are visited");
132
133// FIXME: these limits eventually should be as low as 2.
134#ifndef NDEBUG
135static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
136#else
137static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
138#endif
139
140static cl::opt<bool>
141EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
142 cl::init(true));
143
145 "instcombine-max-sink-users", cl::init(32),
146 cl::desc("Maximum number of undroppable users for instruction sinking"));
147
149 "instcombine-infinite-loop-threshold",
150 cl::desc("Number of instruction combining iterations considered an "
151 "infinite loop"),
153
155MaxArraySize("instcombine-maxarray-size", cl::init(1024),
156 cl::desc("Maximum array size considered when doing a combine"));
157
158// FIXME: Remove this flag when it is no longer necessary to convert
159// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
160// increases variable availability at the cost of accuracy. Variables that
161// cannot be promoted by mem2reg or SROA will be described as living in memory
162// for their entire lifetime. However, passes like DSE and instcombine can
163// delete stores to the alloca, leading to misleading and inaccurate debug
164// information. This flag can be removed when those passes are fixed.
165static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
166 cl::Hidden, cl::init(true));
167
168std::optional<Instruction *>
170 // Handle target specific intrinsics
172 return TTI.instCombineIntrinsic(*this, II);
173 }
174 return std::nullopt;
175}
176
178 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
179 bool &KnownBitsComputed) {
180 // Handle target specific intrinsics
182 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
183 KnownBitsComputed);
184 }
185 return std::nullopt;
186}
187
189 IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
190 APInt &UndefElts3,
191 std::function<void(Instruction *, unsigned, APInt, APInt &)>
192 SimplifyAndSetOp) {
193 // Handle target specific intrinsics
196 *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
197 SimplifyAndSetOp);
198 }
199 return std::nullopt;
200}
201
202bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
203 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
204}
205
206Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
208}
209
210/// Legal integers and common types are considered desirable. This is used to
211/// avoid creating instructions with types that may not be supported well by the
212/// the backend.
213/// NOTE: This treats i8, i16 and i32 specially because they are common
214/// types in frontend languages.
215bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
216 switch (BitWidth) {
217 case 8:
218 case 16:
219 case 32:
220 return true;
221 default:
222 return DL.isLegalInteger(BitWidth);
223 }
224}
225
226/// Return true if it is desirable to convert an integer computation from a
227/// given bit width to a new bit width.
228/// We don't want to convert from a legal or desirable type (like i8) to an
229/// illegal type or from a smaller to a larger illegal type. A width of '1'
230/// is always treated as a desirable type because i1 is a fundamental type in
231/// IR, and there are many specialized optimizations for i1 types.
232/// Common/desirable widths are equally treated as legal to convert to, in
233/// order to open up more combining opportunities.
234bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
235 unsigned ToWidth) const {
236 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
237 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
238
239 // Convert to desirable widths even if they are not legal types.
240 // Only shrink types, to prevent infinite loops.
241 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
242 return true;
243
244 // If this is a legal or desiable integer from type, and the result would be
245 // an illegal type, don't do the transformation.
246 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
247 return false;
248
249 // Otherwise, if both are illegal, do not increase the size of the result. We
250 // do allow things like i160 -> i64, but not i64 -> i160.
251 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
252 return false;
253
254 return true;
255}
256
257/// Return true if it is desirable to convert a computation from 'From' to 'To'.
258/// We don't want to convert from a legal to an illegal type or from a smaller
259/// to a larger illegal type. i1 is always treated as a legal type because it is
260/// a fundamental type in IR, and there are many specialized optimizations for
261/// i1 types.
262bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
263 // TODO: This could be extended to allow vectors. Datalayout changes might be
264 // needed to properly support that.
265 if (!From->isIntegerTy() || !To->isIntegerTy())
266 return false;
267
268 unsigned FromWidth = From->getPrimitiveSizeInBits();
269 unsigned ToWidth = To->getPrimitiveSizeInBits();
270 return shouldChangeType(FromWidth, ToWidth);
271}
272
273// Return true, if No Signed Wrap should be maintained for I.
274// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
275// where both B and C should be ConstantInts, results in a constant that does
276// not overflow. This function only handles the Add and Sub opcodes. For
277// all other opcodes, the function conservatively returns false.
279 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
280 if (!OBO || !OBO->hasNoSignedWrap())
281 return false;
282
283 // We reason about Add and Sub Only.
284 Instruction::BinaryOps Opcode = I.getOpcode();
285 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
286 return false;
287
288 const APInt *BVal, *CVal;
289 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
290 return false;
291
292 bool Overflow = false;
293 if (Opcode == Instruction::Add)
294 (void)BVal->sadd_ov(*CVal, Overflow);
295 else
296 (void)BVal->ssub_ov(*CVal, Overflow);
297
298 return !Overflow;
299}
300
302 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
303 return OBO && OBO->hasNoUnsignedWrap();
304}
305
307 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
308 return OBO && OBO->hasNoSignedWrap();
309}
310
311/// Conservatively clears subclassOptionalData after a reassociation or
312/// commutation. We preserve fast-math flags when applicable as they can be
313/// preserved.
315 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
316 if (!FPMO) {
317 I.clearSubclassOptionalData();
318 return;
319 }
320
321 FastMathFlags FMF = I.getFastMathFlags();
322 I.clearSubclassOptionalData();
323 I.setFastMathFlags(FMF);
324}
325
326/// Combine constant operands of associative operations either before or after a
327/// cast to eliminate one of the associative operations:
328/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
329/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
331 InstCombinerImpl &IC) {
332 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
333 if (!Cast || !Cast->hasOneUse())
334 return false;
335
336 // TODO: Enhance logic for other casts and remove this check.
337 auto CastOpcode = Cast->getOpcode();
338 if (CastOpcode != Instruction::ZExt)
339 return false;
340
341 // TODO: Enhance logic for other BinOps and remove this check.
342 if (!BinOp1->isBitwiseLogicOp())
343 return false;
344
345 auto AssocOpcode = BinOp1->getOpcode();
346 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
347 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
348 return false;
349
350 Constant *C1, *C2;
351 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
352 !match(BinOp2->getOperand(1), m_Constant(C2)))
353 return false;
354
355 // TODO: This assumes a zext cast.
356 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
357 // to the destination type might lose bits.
358
359 // Fold the constants together in the destination type:
360 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
361 Type *DestTy = C1->getType();
362 Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
363 Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
364 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
365 IC.replaceOperand(*BinOp1, 1, FoldedC);
366 return true;
367}
368
369// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
370// inttoptr ( ptrtoint (x) ) --> x
371Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
372 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
373 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
374 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
375 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
376 Type *CastTy = IntToPtr->getDestTy();
377 if (PtrToInt &&
378 CastTy->getPointerAddressSpace() ==
379 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
380 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
381 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
382 return PtrToInt->getOperand(0);
383 }
384 return nullptr;
385}
386
387/// This performs a few simplifications for operators that are associative or
388/// commutative:
389///
390/// Commutative operators:
391///
392/// 1. Order operands such that they are listed from right (least complex) to
393/// left (most complex). This puts constants before unary operators before
394/// binary operators.
395///
396/// Associative operators:
397///
398/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
399/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
400///
401/// Associative and commutative operators:
402///
403/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
404/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
405/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
406/// if C1 and C2 are constants.
408 Instruction::BinaryOps Opcode = I.getOpcode();
409 bool Changed = false;
410
411 do {
412 // Order operands such that they are listed from right (least complex) to
413 // left (most complex). This puts constants before unary operators before
414 // binary operators.
415 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
416 getComplexity(I.getOperand(1)))
417 Changed = !I.swapOperands();
418
419 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
420 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
421
422 if (I.isAssociative()) {
423 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
424 if (Op0 && Op0->getOpcode() == Opcode) {
425 Value *A = Op0->getOperand(0);
426 Value *B = Op0->getOperand(1);
427 Value *C = I.getOperand(1);
428
429 // Does "B op C" simplify?
430 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
431 // It simplifies to V. Form "A op V".
432 replaceOperand(I, 0, A);
433 replaceOperand(I, 1, V);
434 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
435 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
436
437 // Conservatively clear all optional flags since they may not be
438 // preserved by the reassociation. Reset nsw/nuw based on the above
439 // analysis.
441
442 // Note: this is only valid because SimplifyBinOp doesn't look at
443 // the operands to Op0.
444 if (IsNUW)
445 I.setHasNoUnsignedWrap(true);
446
447 if (IsNSW)
448 I.setHasNoSignedWrap(true);
449
450 Changed = true;
451 ++NumReassoc;
452 continue;
453 }
454 }
455
456 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
457 if (Op1 && Op1->getOpcode() == Opcode) {
458 Value *A = I.getOperand(0);
459 Value *B = Op1->getOperand(0);
460 Value *C = Op1->getOperand(1);
461
462 // Does "A op B" simplify?
463 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
464 // It simplifies to V. Form "V op C".
465 replaceOperand(I, 0, V);
466 replaceOperand(I, 1, C);
467 // Conservatively clear the optional flags, since they may not be
468 // preserved by the reassociation.
470 Changed = true;
471 ++NumReassoc;
472 continue;
473 }
474 }
475 }
476
477 if (I.isAssociative() && I.isCommutative()) {
478 if (simplifyAssocCastAssoc(&I, *this)) {
479 Changed = true;
480 ++NumReassoc;
481 continue;
482 }
483
484 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
485 if (Op0 && Op0->getOpcode() == Opcode) {
486 Value *A = Op0->getOperand(0);
487 Value *B = Op0->getOperand(1);
488 Value *C = I.getOperand(1);
489
490 // Does "C op A" simplify?
491 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
492 // It simplifies to V. Form "V op B".
493 replaceOperand(I, 0, V);
494 replaceOperand(I, 1, B);
495 // Conservatively clear the optional flags, since they may not be
496 // preserved by the reassociation.
498 Changed = true;
499 ++NumReassoc;
500 continue;
501 }
502 }
503
504 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
505 if (Op1 && Op1->getOpcode() == Opcode) {
506 Value *A = I.getOperand(0);
507 Value *B = Op1->getOperand(0);
508 Value *C = Op1->getOperand(1);
509
510 // Does "C op A" simplify?
511 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
512 // It simplifies to V. Form "B op V".
513 replaceOperand(I, 0, B);
514 replaceOperand(I, 1, V);
515 // Conservatively clear the optional flags, since they may not be
516 // preserved by the reassociation.
518 Changed = true;
519 ++NumReassoc;
520 continue;
521 }
522 }
523
524 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
525 // if C1 and C2 are constants.
526 Value *A, *B;
527 Constant *C1, *C2, *CRes;
528 if (Op0 && Op1 &&
529 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
530 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
531 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
532 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
533 bool IsNUW = hasNoUnsignedWrap(I) &&
534 hasNoUnsignedWrap(*Op0) &&
535 hasNoUnsignedWrap(*Op1);
536 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
537 BinaryOperator::CreateNUW(Opcode, A, B) :
538 BinaryOperator::Create(Opcode, A, B);
539
540 if (isa<FPMathOperator>(NewBO)) {
541 FastMathFlags Flags = I.getFastMathFlags();
542 Flags &= Op0->getFastMathFlags();
543 Flags &= Op1->getFastMathFlags();
544 NewBO->setFastMathFlags(Flags);
545 }
546 InsertNewInstWith(NewBO, I);
547 NewBO->takeName(Op1);
548 replaceOperand(I, 0, NewBO);
549 replaceOperand(I, 1, CRes);
550 // Conservatively clear the optional flags, since they may not be
551 // preserved by the reassociation.
553 if (IsNUW)
554 I.setHasNoUnsignedWrap(true);
555
556 Changed = true;
557 continue;
558 }
559 }
560
561 // No further simplifications.
562 return Changed;
563 } while (true);
564}
565
566/// Return whether "X LOp (Y ROp Z)" is always equal to
567/// "(X LOp Y) ROp (X LOp Z)".
570 // X & (Y | Z) <--> (X & Y) | (X & Z)
571 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
572 if (LOp == Instruction::And)
573 return ROp == Instruction::Or || ROp == Instruction::Xor;
574
575 // X | (Y & Z) <--> (X | Y) & (X | Z)
576 if (LOp == Instruction::Or)
577 return ROp == Instruction::And;
578
579 // X * (Y + Z) <--> (X * Y) + (X * Z)
580 // X * (Y - Z) <--> (X * Y) - (X * Z)
581 if (LOp == Instruction::Mul)
582 return ROp == Instruction::Add || ROp == Instruction::Sub;
583
584 return false;
585}
586
587/// Return whether "(X LOp Y) ROp Z" is always equal to
588/// "(X ROp Z) LOp (Y ROp Z)".
592 return leftDistributesOverRight(ROp, LOp);
593
594 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
596
597 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
598 // but this requires knowing that the addition does not overflow and other
599 // such subtleties.
600}
601
602/// This function returns identity value for given opcode, which can be used to
603/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
605 if (isa<Constant>(V))
606 return nullptr;
607
608 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
609}
610
611/// This function predicates factorization using distributive laws. By default,
612/// it just returns the 'Op' inputs. But for special-cases like
613/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
614/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
615/// allow more factorization opportunities.
618 Value *&LHS, Value *&RHS) {
619 assert(Op && "Expected a binary operator");
620 LHS = Op->getOperand(0);
621 RHS = Op->getOperand(1);
622 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
623 Constant *C;
624 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
625 // X << C --> X * (1 << C)
626 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
627 return Instruction::Mul;
628 }
629 // TODO: We can add other conversions e.g. shr => div etc.
630 }
631 return Op->getOpcode();
632}
633
634/// This tries to simplify binary operations by factorizing out common terms
635/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
638 Instruction::BinaryOps InnerOpcode, Value *A,
639 Value *B, Value *C, Value *D) {
640 assert(A && B && C && D && "All values must be provided");
641
642 Value *V = nullptr;
643 Value *RetVal = nullptr;
644 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
645 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
646
647 // Does "X op' Y" always equal "Y op' X"?
648 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
649
650 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
651 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
652 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
653 // commutative case, "(A op' B) op (C op' A)"?
654 if (A == C || (InnerCommutative && A == D)) {
655 if (A != C)
656 std::swap(C, D);
657 // Consider forming "A op' (B op D)".
658 // If "B op D" simplifies then it can be formed with no cost.
659 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
660
661 // If "B op D" doesn't simplify then only go on if one of the existing
662 // operations "A op' B" and "C op' D" will be zapped as no longer used.
663 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
664 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
665 if (V)
666 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
667 }
668 }
669
670 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
671 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
672 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
673 // commutative case, "(A op' B) op (B op' D)"?
674 if (B == D || (InnerCommutative && B == C)) {
675 if (B != D)
676 std::swap(C, D);
677 // Consider forming "(A op C) op' B".
678 // If "A op C" simplifies then it can be formed with no cost.
679 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
680
681 // If "A op C" doesn't simplify then only go on if one of the existing
682 // operations "A op' B" and "C op' D" will be zapped as no longer used.
683 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
684 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
685 if (V)
686 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
687 }
688 }
689
690 if (!RetVal)
691 return nullptr;
692
693 ++NumFactor;
694 RetVal->takeName(&I);
695
696 // Try to add no-overflow flags to the final value.
697 if (isa<OverflowingBinaryOperator>(RetVal)) {
698 bool HasNSW = false;
699 bool HasNUW = false;
700 if (isa<OverflowingBinaryOperator>(&I)) {
701 HasNSW = I.hasNoSignedWrap();
702 HasNUW = I.hasNoUnsignedWrap();
703 }
704 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
705 HasNSW &= LOBO->hasNoSignedWrap();
706 HasNUW &= LOBO->hasNoUnsignedWrap();
707 }
708
709 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
710 HasNSW &= ROBO->hasNoSignedWrap();
711 HasNUW &= ROBO->hasNoUnsignedWrap();
712 }
713
714 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
715 // We can propagate 'nsw' if we know that
716 // %Y = mul nsw i16 %X, C
717 // %Z = add nsw i16 %Y, %X
718 // =>
719 // %Z = mul nsw i16 %X, C+1
720 //
721 // iff C+1 isn't INT_MIN
722 const APInt *CInt;
723 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
724 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
725
726 // nuw can be propagated with any constant or nuw value.
727 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
728 }
729 }
730 return RetVal;
731}
732
734 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
735 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
736 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
737 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
738 Value *A, *B, *C, *D;
739 Instruction::BinaryOps LHSOpcode, RHSOpcode;
740
741 if (Op0)
742 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
743 if (Op1)
744 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
745
746 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
747 // a common term.
748 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
749 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
750 return V;
751
752 // The instruction has the form "(A op' B) op (C)". Try to factorize common
753 // term.
754 if (Op0)
755 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
756 if (Value *V =
757 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
758 return V;
759
760 // The instruction has the form "(B) op (C op' D)". Try to factorize common
761 // term.
762 if (Op1)
763 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
764 if (Value *V =
765 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
766 return V;
767
768 return nullptr;
769}
770
771/// This tries to simplify binary operations which some other binary operation
772/// distributes over either by factorizing out common terms
773/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
774/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
775/// Returns the simplified value, or null if it didn't simplify.
777 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
778 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
779 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
780 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
781
782 // Factorization.
783 if (Value *R = tryFactorizationFolds(I))
784 return R;
785
786 // Expansion.
787 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
788 // The instruction has the form "(A op' B) op C". See if expanding it out
789 // to "(A op C) op' (B op C)" results in simplifications.
790 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
791 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
792
793 // Disable the use of undef because it's not safe to distribute undef.
794 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
795 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
796 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
797
798 // Do "A op C" and "B op C" both simplify?
799 if (L && R) {
800 // They do! Return "L op' R".
801 ++NumExpand;
802 C = Builder.CreateBinOp(InnerOpcode, L, R);
803 C->takeName(&I);
804 return C;
805 }
806
807 // Does "A op C" simplify to the identity value for the inner opcode?
808 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
809 // They do! Return "B op C".
810 ++NumExpand;
811 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
812 C->takeName(&I);
813 return C;
814 }
815
816 // Does "B op C" simplify to the identity value for the inner opcode?
817 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
818 // They do! Return "A op C".
819 ++NumExpand;
820 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
821 C->takeName(&I);
822 return C;
823 }
824 }
825
826 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
827 // The instruction has the form "A op (B op' C)". See if expanding it out
828 // to "(A op B) op' (A op C)" results in simplifications.
829 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
830 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
831
832 // Disable the use of undef because it's not safe to distribute undef.
833 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
834 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
835 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
836
837 // Do "A op B" and "A op C" both simplify?
838 if (L && R) {
839 // They do! Return "L op' R".
840 ++NumExpand;
841 A = Builder.CreateBinOp(InnerOpcode, L, R);
842 A->takeName(&I);
843 return A;
844 }
845
846 // Does "A op B" simplify to the identity value for the inner opcode?
847 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
848 // They do! Return "A op C".
849 ++NumExpand;
850 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
851 A->takeName(&I);
852 return A;
853 }
854
855 // Does "A op C" simplify to the identity value for the inner opcode?
856 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
857 // They do! Return "A op B".
858 ++NumExpand;
859 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
860 A->takeName(&I);
861 return A;
862 }
863 }
864
866}
867
869 Value *LHS,
870 Value *RHS) {
871 Value *A, *B, *C, *D, *E, *F;
872 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
873 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
874 if (!LHSIsSelect && !RHSIsSelect)
875 return nullptr;
876
877 FastMathFlags FMF;
879 if (isa<FPMathOperator>(&I)) {
880 FMF = I.getFastMathFlags();
882 }
883
884 Instruction::BinaryOps Opcode = I.getOpcode();
886
887 Value *Cond, *True = nullptr, *False = nullptr;
888
889 // Special-case for add/negate combination. Replace the zero in the negation
890 // with the trailing add operand:
891 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
892 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
893 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
894 // We need an 'add' and exactly 1 arm of the select to have been simplified.
895 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
896 return nullptr;
897
898 Value *N;
899 if (True && match(FVal, m_Neg(m_Value(N)))) {
900 Value *Sub = Builder.CreateSub(Z, N);
901 return Builder.CreateSelect(Cond, True, Sub, I.getName());
902 }
903 if (False && match(TVal, m_Neg(m_Value(N)))) {
904 Value *Sub = Builder.CreateSub(Z, N);
905 return Builder.CreateSelect(Cond, Sub, False, I.getName());
906 }
907 return nullptr;
908 };
909
910 if (LHSIsSelect && RHSIsSelect && A == D) {
911 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
912 Cond = A;
913 True = simplifyBinOp(Opcode, B, E, FMF, Q);
914 False = simplifyBinOp(Opcode, C, F, FMF, Q);
915
916 if (LHS->hasOneUse() && RHS->hasOneUse()) {
917 if (False && !True)
918 True = Builder.CreateBinOp(Opcode, B, E);
919 else if (True && !False)
920 False = Builder.CreateBinOp(Opcode, C, F);
921 }
922 } else if (LHSIsSelect && LHS->hasOneUse()) {
923 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
924 Cond = A;
925 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
926 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
927 if (Value *NewSel = foldAddNegate(B, C, RHS))
928 return NewSel;
929 } else if (RHSIsSelect && RHS->hasOneUse()) {
930 // X op (D ? E : F) -> D ? (X op E) : (X op F)
931 Cond = D;
932 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
933 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
934 if (Value *NewSel = foldAddNegate(E, F, LHS))
935 return NewSel;
936 }
937
938 if (!True || !False)
939 return nullptr;
940
941 Value *SI = Builder.CreateSelect(Cond, True, False);
942 SI->takeName(&I);
943 return SI;
944}
945
946/// Freely adapt every user of V as-if V was changed to !V.
947/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
948void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
949 for (User *U : make_early_inc_range(I->users())) {
950 if (U == IgnoredUser)
951 continue; // Don't consider this user.
952 switch (cast<Instruction>(U)->getOpcode()) {
953 case Instruction::Select: {
954 auto *SI = cast<SelectInst>(U);
955 SI->swapValues();
956 SI->swapProfMetadata();
957 break;
958 }
959 case Instruction::Br:
960 cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
961 break;
962 case Instruction::Xor:
963 replaceInstUsesWith(cast<Instruction>(*U), I);
964 break;
965 default:
966 llvm_unreachable("Got unexpected user - out of sync with "
967 "canFreelyInvertAllUsersOf() ?");
968 }
969 }
970}
971
972/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
973/// constant zero (which is the 'negate' form).
974Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
975 Value *NegV;
976 if (match(V, m_Neg(m_Value(NegV))))
977 return NegV;
978
979 // Constants can be considered to be negated values if they can be folded.
980 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
981 return ConstantExpr::getNeg(C);
982
983 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
984 if (C->getType()->getElementType()->isIntegerTy())
985 return ConstantExpr::getNeg(C);
986
987 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
988 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
989 Constant *Elt = CV->getAggregateElement(i);
990 if (!Elt)
991 return nullptr;
992
993 if (isa<UndefValue>(Elt))
994 continue;
995
996 if (!isa<ConstantInt>(Elt))
997 return nullptr;
998 }
999 return ConstantExpr::getNeg(CV);
1000 }
1001
1002 // Negate integer vector splats.
1003 if (auto *CV = dyn_cast<Constant>(V))
1004 if (CV->getType()->isVectorTy() &&
1005 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1006 return ConstantExpr::getNeg(CV);
1007
1008 return nullptr;
1009}
1010
1011/// A binop with a constant operand and a sign-extended boolean operand may be
1012/// converted into a select of constants by applying the binary operation to
1013/// the constant with the two possible values of the extended boolean (0 or -1).
1014Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1015 // TODO: Handle non-commutative binop (constant is operand 0).
1016 // TODO: Handle zext.
1017 // TODO: Peek through 'not' of cast.
1018 Value *BO0 = BO.getOperand(0);
1019 Value *BO1 = BO.getOperand(1);
1020 Value *X;
1021 Constant *C;
1022 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1023 !X->getType()->isIntOrIntVectorTy(1))
1024 return nullptr;
1025
1026 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1029 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1030 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1031 return SelectInst::Create(X, TVal, FVal);
1032}
1033
1035 SelectInst *SI,
1036 bool IsTrueArm) {
1037 SmallVector<Constant *> ConstOps;
1038 for (Value *Op : I.operands()) {
1039 CmpInst::Predicate Pred;
1040 Constant *C = nullptr;
1041 if (Op == SI) {
1042 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1043 : SI->getFalseValue());
1044 } else if (match(SI->getCondition(),
1045 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1046 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1048 // Pass
1049 } else {
1050 C = dyn_cast<Constant>(Op);
1051 }
1052 if (C == nullptr)
1053 return nullptr;
1054
1055 ConstOps.push_back(C);
1056 }
1057
1058 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1059}
1060
1062 Value *NewOp, InstCombiner &IC) {
1063 Instruction *Clone = I.clone();
1064 Clone->replaceUsesOfWith(SI, NewOp);
1065 IC.InsertNewInstBefore(Clone, *SI);
1066 return Clone;
1067}
1068
1070 bool FoldWithMultiUse) {
1071 // Don't modify shared select instructions unless set FoldWithMultiUse
1072 if (!SI->hasOneUse() && !FoldWithMultiUse)
1073 return nullptr;
1074
1075 Value *TV = SI->getTrueValue();
1076 Value *FV = SI->getFalseValue();
1077 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1078 return nullptr;
1079
1080 // Bool selects with constant operands can be folded to logical ops.
1081 if (SI->getType()->isIntOrIntVectorTy(1))
1082 return nullptr;
1083
1084 // If it's a bitcast involving vectors, make sure it has the same number of
1085 // elements on both sides.
1086 if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1087 VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1088 VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1089
1090 // Verify that either both or neither are vectors.
1091 if ((SrcTy == nullptr) != (DestTy == nullptr))
1092 return nullptr;
1093
1094 // If vectors, verify that they have the same number of elements.
1095 if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1096 return nullptr;
1097 }
1098
1099 // Make sure that one of the select arms constant folds successfully.
1100 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1101 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1102 if (!NewTV && !NewFV)
1103 return nullptr;
1104
1105 // Create an instruction for the arm that did not fold.
1106 if (!NewTV)
1107 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1108 if (!NewFV)
1109 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1110 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1111}
1112
1114 unsigned NumPHIValues = PN->getNumIncomingValues();
1115 if (NumPHIValues == 0)
1116 return nullptr;
1117
1118 // We normally only transform phis with a single use. However, if a PHI has
1119 // multiple uses and they are all the same operation, we can fold *all* of the
1120 // uses into the PHI.
1121 if (!PN->hasOneUse()) {
1122 // Walk the use list for the instruction, comparing them to I.
1123 for (User *U : PN->users()) {
1124 Instruction *UI = cast<Instruction>(U);
1125 if (UI != &I && !I.isIdenticalTo(UI))
1126 return nullptr;
1127 }
1128 // Otherwise, we can replace *all* users with the new PHI we form.
1129 }
1130
1131 // Check to see whether the instruction can be folded into each phi operand.
1132 // If there is one operand that does not fold, remember the BB it is in.
1133 // If there is more than one or if *it* is a PHI, bail out.
1134 SmallVector<Value *> NewPhiValues;
1135 BasicBlock *NonSimplifiedBB = nullptr;
1136 Value *NonSimplifiedInVal = nullptr;
1137 for (unsigned i = 0; i != NumPHIValues; ++i) {
1138 Value *InVal = PN->getIncomingValue(i);
1139 BasicBlock *InBB = PN->getIncomingBlock(i);
1140
1141 // NB: It is a precondition of this transform that the operands be
1142 // phi translatable! This is usually trivially satisfied by limiting it
1143 // to constant ops, and for selects we do a more sophisticated check.
1145 for (Value *Op : I.operands()) {
1146 if (Op == PN)
1147 Ops.push_back(InVal);
1148 else
1149 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1150 }
1151
1152 // Don't consider the simplification successful if we get back a constant
1153 // expression. That's just an instruction in hiding.
1154 // Also reject the case where we simplify back to the phi node. We wouldn't
1155 // be able to remove it in that case.
1157 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1158 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
1159 NewPhiValues.push_back(NewVal);
1160 continue;
1161 }
1162
1163 if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1164 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1165
1166 NonSimplifiedBB = InBB;
1167 NonSimplifiedInVal = InVal;
1168 NewPhiValues.push_back(nullptr);
1169
1170 // If the InVal is an invoke at the end of the pred block, then we can't
1171 // insert a computation after it without breaking the edge.
1172 if (isa<InvokeInst>(InVal))
1173 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1174 return nullptr;
1175
1176 // If the incoming non-constant value is reachable from the phis block,
1177 // we'll push the operation across a loop backedge. This could result in
1178 // an infinite combine loop, and is generally non-profitable (especially
1179 // if the operation was originally outside the loop).
1180 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1181 LI))
1182 return nullptr;
1183 }
1184
1185 // If there is exactly one non-simplified value, we can insert a copy of the
1186 // operation in that block. However, if this is a critical edge, we would be
1187 // inserting the computation on some other paths (e.g. inside a loop). Only
1188 // do this if the pred block is unconditionally branching into the phi block.
1189 // Also, make sure that the pred block is not dead code.
1190 if (NonSimplifiedBB != nullptr) {
1191 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1192 if (!BI || !BI->isUnconditional() ||
1193 !DT.isReachableFromEntry(NonSimplifiedBB))
1194 return nullptr;
1195 }
1196
1197 // Okay, we can do the transformation: create the new PHI node.
1198 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1199 InsertNewInstBefore(NewPN, *PN);
1200 NewPN->takeName(PN);
1201 NewPN->setDebugLoc(PN->getDebugLoc());
1202
1203 // If we are going to have to insert a new computation, do so right before the
1204 // predecessor's terminator.
1205 Instruction *Clone = nullptr;
1206 if (NonSimplifiedBB) {
1207 Clone = I.clone();
1208 for (Use &U : Clone->operands()) {
1209 if (U == PN)
1210 U = NonSimplifiedInVal;
1211 else
1212 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1213 }
1214 InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
1215 }
1216
1217 for (unsigned i = 0; i != NumPHIValues; ++i) {
1218 if (NewPhiValues[i])
1219 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1220 else
1221 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1222 }
1223
1224 for (User *U : make_early_inc_range(PN->users())) {
1225 Instruction *User = cast<Instruction>(U);
1226 if (User == &I) continue;
1227 replaceInstUsesWith(*User, NewPN);
1229 }
1230
1231 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1232 const_cast<PHINode &>(*NewPN),
1233 const_cast<PHINode &>(*PN), DT);
1234 return replaceInstUsesWith(I, NewPN);
1235}
1236
1238 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1239 // we are guarding against replicating the binop in >1 predecessor.
1240 // This could miss matching a phi with 2 constant incoming values.
1241 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1242 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1243 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1244 Phi0->getNumOperands() != Phi1->getNumOperands())
1245 return nullptr;
1246
1247 // TODO: Remove the restriction for binop being in the same block as the phis.
1248 if (BO.getParent() != Phi0->getParent() ||
1249 BO.getParent() != Phi1->getParent())
1250 return nullptr;
1251
1252 // Fold if there is at least one specific constant value in phi0 or phi1's
1253 // incoming values that comes from the same block and this specific constant
1254 // value can be used to do optimization for specific binary operator.
1255 // For example:
1256 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1257 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1258 // %add = add i32 %phi0, %phi1
1259 // ==>
1260 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1262 /*AllowRHSConstant*/ false);
1263 if (C) {
1264 SmallVector<Value *, 4> NewIncomingValues;
1265 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1266 auto &Phi0Use = std::get<0>(T);
1267 auto &Phi1Use = std::get<1>(T);
1268 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1269 return false;
1270 Value *Phi0UseV = Phi0Use.get();
1271 Value *Phi1UseV = Phi1Use.get();
1272 if (Phi0UseV == C)
1273 NewIncomingValues.push_back(Phi1UseV);
1274 else if (Phi1UseV == C)
1275 NewIncomingValues.push_back(Phi0UseV);
1276 else
1277 return false;
1278 return true;
1279 };
1280
1281 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1282 CanFoldIncomingValuePair)) {
1283 PHINode *NewPhi =
1284 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1285 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1286 "The number of collected incoming values should equal the number "
1287 "of the original PHINode operands!");
1288 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1289 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1290 return NewPhi;
1291 }
1292 }
1293
1294 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1295 return nullptr;
1296
1297 // Match a pair of incoming constants for one of the predecessor blocks.
1298 BasicBlock *ConstBB, *OtherBB;
1299 Constant *C0, *C1;
1300 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1301 ConstBB = Phi0->getIncomingBlock(0);
1302 OtherBB = Phi0->getIncomingBlock(1);
1303 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1304 ConstBB = Phi0->getIncomingBlock(1);
1305 OtherBB = Phi0->getIncomingBlock(0);
1306 } else {
1307 return nullptr;
1308 }
1309 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1310 return nullptr;
1311
1312 // The block that we are hoisting to must reach here unconditionally.
1313 // Otherwise, we could be speculatively executing an expensive or
1314 // non-speculative op.
1315 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1316 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1317 !DT.isReachableFromEntry(OtherBB))
1318 return nullptr;
1319
1320 // TODO: This check could be tightened to only apply to binops (div/rem) that
1321 // are not safe to speculatively execute. But that could allow hoisting
1322 // potentially expensive instructions (fdiv for example).
1323 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1325 return nullptr;
1326
1327 // Fold constants for the predecessor block with constant incoming values.
1328 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1329 if (!NewC)
1330 return nullptr;
1331
1332 // Make a new binop in the predecessor block with the non-constant incoming
1333 // values.
1334 Builder.SetInsertPoint(PredBlockBranch);
1335 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1336 Phi0->getIncomingValueForBlock(OtherBB),
1337 Phi1->getIncomingValueForBlock(OtherBB));
1338 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1339 NotFoldedNewBO->copyIRFlags(&BO);
1340
1341 // Replace the binop with a phi of the new values. The old phis are dead.
1342 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1343 NewPhi->addIncoming(NewBO, OtherBB);
1344 NewPhi->addIncoming(NewC, ConstBB);
1345 return NewPhi;
1346}
1347
1349 if (!isa<Constant>(I.getOperand(1)))
1350 return nullptr;
1351
1352 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1353 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1354 return NewSel;
1355 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1356 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1357 return NewPhi;
1358 }
1359 return nullptr;
1360}
1361
1363 // If this GEP has only 0 indices, it is the same pointer as
1364 // Src. If Src is not a trivial GEP too, don't combine
1365 // the indices.
1366 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1367 !Src.hasOneUse())
1368 return false;
1369 return true;
1370}
1371
1373 if (!isa<VectorType>(Inst.getType()))
1374 return nullptr;
1375
1376 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1377 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1378 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1379 cast<VectorType>(Inst.getType())->getElementCount());
1380 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1381 cast<VectorType>(Inst.getType())->getElementCount());
1382
1383 // If both operands of the binop are vector concatenations, then perform the
1384 // narrow binop on each pair of the source operands followed by concatenation
1385 // of the results.
1386 Value *L0, *L1, *R0, *R1;
1387 ArrayRef<int> Mask;
1388 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1389 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
1390 LHS->hasOneUse() && RHS->hasOneUse() &&
1391 cast<ShuffleVectorInst>(LHS)->isConcat() &&
1392 cast<ShuffleVectorInst>(RHS)->isConcat()) {
1393 // This transform does not have the speculative execution constraint as
1394 // below because the shuffle is a concatenation. The new binops are
1395 // operating on exactly the same elements as the existing binop.
1396 // TODO: We could ease the mask requirement to allow different undef lanes,
1397 // but that requires an analysis of the binop-with-undef output value.
1398 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1399 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1400 BO->copyIRFlags(&Inst);
1401 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1402 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1403 BO->copyIRFlags(&Inst);
1404 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1405 }
1406
1407 auto createBinOpReverse = [&](Value *X, Value *Y) {
1408 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
1409 if (auto *BO = dyn_cast<BinaryOperator>(V))
1410 BO->copyIRFlags(&Inst);
1411 Module *M = Inst.getModule();
1413 M, Intrinsic::experimental_vector_reverse, V->getType());
1414 return CallInst::Create(F, V);
1415 };
1416
1417 // NOTE: Reverse shuffles don't require the speculative execution protection
1418 // below because they don't affect which lanes take part in the computation.
1419
1420 Value *V1, *V2;
1421 if (match(LHS, m_VecReverse(m_Value(V1)))) {
1422 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
1423 if (match(RHS, m_VecReverse(m_Value(V2))) &&
1424 (LHS->hasOneUse() || RHS->hasOneUse() ||
1425 (LHS == RHS && LHS->hasNUses(2))))
1426 return createBinOpReverse(V1, V2);
1427
1428 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
1429 if (LHS->hasOneUse() && isSplatValue(RHS))
1430 return createBinOpReverse(V1, RHS);
1431 }
1432 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
1433 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
1434 return createBinOpReverse(LHS, V2);
1435
1436 // It may not be safe to reorder shuffles and things like div, urem, etc.
1437 // because we may trap when executing those ops on unknown vector elements.
1438 // See PR20059.
1439 if (!isSafeToSpeculativelyExecute(&Inst))
1440 return nullptr;
1441
1442 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1443 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1444 if (auto *BO = dyn_cast<BinaryOperator>(XY))
1445 BO->copyIRFlags(&Inst);
1446 return new ShuffleVectorInst(XY, M);
1447 };
1448
1449 // If both arguments of the binary operation are shuffles that use the same
1450 // mask and shuffle within a single vector, move the shuffle after the binop.
1451 if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1452 match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
1453 V1->getType() == V2->getType() &&
1454 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1455 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1456 return createBinOpShuffle(V1, V2, Mask);
1457 }
1458
1459 // If both arguments of a commutative binop are select-shuffles that use the
1460 // same mask with commuted operands, the shuffles are unnecessary.
1461 if (Inst.isCommutative() &&
1462 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1463 match(RHS,
1464 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
1465 auto *LShuf = cast<ShuffleVectorInst>(LHS);
1466 auto *RShuf = cast<ShuffleVectorInst>(RHS);
1467 // TODO: Allow shuffles that contain undefs in the mask?
1468 // That is legal, but it reduces undef knowledge.
1469 // TODO: Allow arbitrary shuffles by shuffling after binop?
1470 // That might be legal, but we have to deal with poison.
1471 if (LShuf->isSelect() &&
1472 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
1473 RShuf->isSelect() &&
1474 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
1475 // Example:
1476 // LHS = shuffle V1, V2, <0, 5, 6, 3>
1477 // RHS = shuffle V2, V1, <0, 5, 6, 3>
1478 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1479 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1480 NewBO->copyIRFlags(&Inst);
1481 return NewBO;
1482 }
1483 }
1484
1485 // If one argument is a shuffle within one vector and the other is a constant,
1486 // try moving the shuffle after the binary operation. This canonicalization
1487 // intends to move shuffles closer to other shuffles and binops closer to
1488 // other binops, so they can be folded. It may also enable demanded elements
1489 // transforms.
1490 Constant *C;
1491 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1492 if (InstVTy &&
1493 match(&Inst,
1495 m_ImmConstant(C))) &&
1496 cast<FixedVectorType>(V1->getType())->getNumElements() <=
1497 InstVTy->getNumElements()) {
1498 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1499 "Shuffle should not change scalar type");
1500
1501 // Find constant NewC that has property:
1502 // shuffle(NewC, ShMask) = C
1503 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1504 // reorder is not possible. A 1-to-1 mapping is not required. Example:
1505 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1506 bool ConstOp1 = isa<Constant>(RHS);
1507 ArrayRef<int> ShMask = Mask;
1508 unsigned SrcVecNumElts =
1509 cast<FixedVectorType>(V1->getType())->getNumElements();
1510 UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1511 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1512 bool MayChange = true;
1513 unsigned NumElts = InstVTy->getNumElements();
1514 for (unsigned I = 0; I < NumElts; ++I) {
1515 Constant *CElt = C->getAggregateElement(I);
1516 if (ShMask[I] >= 0) {
1517 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1518 Constant *NewCElt = NewVecC[ShMask[I]];
1519 // Bail out if:
1520 // 1. The constant vector contains a constant expression.
1521 // 2. The shuffle needs an element of the constant vector that can't
1522 // be mapped to a new constant vector.
1523 // 3. This is a widening shuffle that copies elements of V1 into the
1524 // extended elements (extending with undef is allowed).
1525 if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1526 I >= SrcVecNumElts) {
1527 MayChange = false;
1528 break;
1529 }
1530 NewVecC[ShMask[I]] = CElt;
1531 }
1532 // If this is a widening shuffle, we must be able to extend with undef
1533 // elements. If the original binop does not produce an undef in the high
1534 // lanes, then this transform is not safe.
1535 // Similarly for undef lanes due to the shuffle mask, we can only
1536 // transform binops that preserve undef.
1537 // TODO: We could shuffle those non-undef constant values into the
1538 // result by using a constant vector (rather than an undef vector)
1539 // as operand 1 of the new binop, but that might be too aggressive
1540 // for target-independent shuffle creation.
1541 if (I >= SrcVecNumElts || ShMask[I] < 0) {
1542 Constant *MaybeUndef =
1543 ConstOp1
1544 ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1545 : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1546 if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1547 MayChange = false;
1548 break;
1549 }
1550 }
1551 }
1552 if (MayChange) {
1553 Constant *NewC = ConstantVector::get(NewVecC);
1554 // It may not be safe to execute a binop on a vector with undef elements
1555 // because the entire instruction can be folded to undef or create poison
1556 // that did not exist in the original code.
1557 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1558 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1559
1560 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1561 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1562 Value *NewLHS = ConstOp1 ? V1 : NewC;
1563 Value *NewRHS = ConstOp1 ? NewC : V1;
1564 return createBinOpShuffle(NewLHS, NewRHS, Mask);
1565 }
1566 }
1567
1568 // Try to reassociate to sink a splat shuffle after a binary operation.
1569 if (Inst.isAssociative() && Inst.isCommutative()) {
1570 // Canonicalize shuffle operand as LHS.
1571 if (isa<ShuffleVectorInst>(RHS))
1572 std::swap(LHS, RHS);
1573
1574 Value *X;
1575 ArrayRef<int> MaskC;
1576 int SplatIndex;
1577 Value *Y, *OtherOp;
1578 if (!match(LHS,
1579 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1580 !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1581 X->getType() != Inst.getType() ||
1582 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1583 return nullptr;
1584
1585 // FIXME: This may not be safe if the analysis allows undef elements. By
1586 // moving 'Y' before the splat shuffle, we are implicitly assuming
1587 // that it is not undef/poison at the splat index.
1588 if (isSplatValue(OtherOp, SplatIndex)) {
1589 std::swap(Y, OtherOp);
1590 } else if (!isSplatValue(Y, SplatIndex)) {
1591 return nullptr;
1592 }
1593
1594 // X and Y are splatted values, so perform the binary operation on those
1595 // values followed by a splat followed by the 2nd binary operation:
1596 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1597 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1598 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1599 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1600 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1601
1602 // Intersect FMF on both new binops. Other (poison-generating) flags are
1603 // dropped to be safe.
1604 if (isa<FPMathOperator>(R)) {
1605 R->copyFastMathFlags(&Inst);
1606 R->andIRFlags(RHS);
1607 }
1608 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1609 NewInstBO->copyIRFlags(R);
1610 return R;
1611 }
1612
1613 return nullptr;
1614}
1615
1616/// Try to narrow the width of a binop if at least 1 operand is an extend of
1617/// of a value. This requires a potentially expensive known bits check to make
1618/// sure the narrow op does not overflow.
1619Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1620 // We need at least one extended operand.
1621 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1622
1623 // If this is a sub, we swap the operands since we always want an extension
1624 // on the RHS. The LHS can be an extension or a constant.
1625 if (BO.getOpcode() == Instruction::Sub)
1626 std::swap(Op0, Op1);
1627
1628 Value *X;
1629 bool IsSext = match(Op0, m_SExt(m_Value(X)));
1630 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1631 return nullptr;
1632
1633 // If both operands are the same extension from the same source type and we
1634 // can eliminate at least one (hasOneUse), this might work.
1635 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1636 Value *Y;
1637 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1638 cast<Operator>(Op1)->getOpcode() == CastOpc &&
1639 (Op0->hasOneUse() || Op1->hasOneUse()))) {
1640 // If that did not match, see if we have a suitable constant operand.
1641 // Truncating and extending must produce the same constant.
1642 Constant *WideC;
1643 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1644 return nullptr;
1645 Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1646 if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1647 return nullptr;
1648 Y = NarrowC;
1649 }
1650
1651 // Swap back now that we found our operands.
1652 if (BO.getOpcode() == Instruction::Sub)
1653 std::swap(X, Y);
1654
1655 // Both operands have narrow versions. Last step: the math must not overflow
1656 // in the narrow width.
1657 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1658 return nullptr;
1659
1660 // bo (ext X), (ext Y) --> ext (bo X, Y)
1661 // bo (ext X), C --> ext (bo X, C')
1662 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1663 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1664 if (IsSext)
1665 NewBinOp->setHasNoSignedWrap();
1666 else
1667 NewBinOp->setHasNoUnsignedWrap();
1668 }
1669 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1670}
1671
1673 // At least one GEP must be inbounds.
1674 if (!GEP1.isInBounds() && !GEP2.isInBounds())
1675 return false;
1676
1677 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1678 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1679}
1680
1681/// Thread a GEP operation with constant indices through the constant true/false
1682/// arms of a select.
1684 InstCombiner::BuilderTy &Builder) {
1685 if (!GEP.hasAllConstantIndices())
1686 return nullptr;
1687
1688 Instruction *Sel;
1689 Value *Cond;
1690 Constant *TrueC, *FalseC;
1691 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1692 !match(Sel,
1693 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1694 return nullptr;
1695
1696 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1697 // Propagate 'inbounds' and metadata from existing instructions.
1698 // Note: using IRBuilder to create the constants for efficiency.
1699 SmallVector<Value *, 4> IndexC(GEP.indices());
1700 bool IsInBounds = GEP.isInBounds();
1701 Type *Ty = GEP.getSourceElementType();
1702 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1703 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1704 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1705}
1706
1708 GEPOperator *Src) {
1709 // Combine Indices - If the source pointer to this getelementptr instruction
1710 // is a getelementptr instruction with matching element type, combine the
1711 // indices of the two getelementptr instructions into a single instruction.
1712 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1713 return nullptr;
1714
1715 // For constant GEPs, use a more general offset-based folding approach.
1716 Type *PtrTy = Src->getType()->getScalarType();
1717 if (GEP.hasAllConstantIndices() &&
1718 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
1719 // Split Src into a variable part and a constant suffix.
1721 Type *BaseType = GTI.getIndexedType();
1722 bool IsFirstType = true;
1723 unsigned NumVarIndices = 0;
1724 for (auto Pair : enumerate(Src->indices())) {
1725 if (!isa<ConstantInt>(Pair.value())) {
1726 BaseType = GTI.getIndexedType();
1727 IsFirstType = false;
1728 NumVarIndices = Pair.index() + 1;
1729 }
1730 ++GTI;
1731 }
1732
1733 // Determine the offset for the constant suffix of Src.
1735 if (NumVarIndices != Src->getNumIndices()) {
1736 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
1737 if (isa<ScalableVectorType>(BaseType))
1738 return nullptr;
1739
1740 SmallVector<Value *> ConstantIndices;
1741 if (!IsFirstType)
1742 ConstantIndices.push_back(
1744 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
1745 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
1746 }
1747
1748 // Add the offset for GEP (which is fully constant).
1749 if (!GEP.accumulateConstantOffset(DL, Offset))
1750 return nullptr;
1751
1752 APInt OffsetOld = Offset;
1753 // Convert the total offset back into indices.
1754 SmallVector<APInt> ConstIndices =
1756 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
1757 // If both GEP are constant-indexed, and cannot be merged in either way,
1758 // convert them to a GEP of i8.
1759 if (Src->hasAllConstantIndices())
1760 return replaceInstUsesWith(
1762 Builder.getInt8Ty(), Src->getOperand(0),
1763 Builder.getInt(OffsetOld), "",
1764 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
1765 return nullptr;
1766 }
1767
1768 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
1769 SmallVector<Value *> Indices;
1770 append_range(Indices, drop_end(Src->indices(),
1771 Src->getNumIndices() - NumVarIndices));
1772 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
1773 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
1774 // Even if the total offset is inbounds, we may end up representing it
1775 // by first performing a larger negative offset, and then a smaller
1776 // positive one. The large negative offset might go out of bounds. Only
1777 // preserve inbounds if all signs are the same.
1778 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
1779 }
1780
1781 return replaceInstUsesWith(
1782 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
1783 Indices, "", IsInBounds));
1784 }
1785
1786 if (Src->getResultElementType() != GEP.getSourceElementType())
1787 return nullptr;
1788
1789 SmallVector<Value*, 8> Indices;
1790
1791 // Find out whether the last index in the source GEP is a sequential idx.
1792 bool EndsWithSequential = false;
1793 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
1794 I != E; ++I)
1795 EndsWithSequential = I.isSequential();
1796
1797 // Can we combine the two pointer arithmetics offsets?
1798 if (EndsWithSequential) {
1799 // Replace: gep (gep %P, long B), long A, ...
1800 // With: T = long A+B; gep %P, T, ...
1801 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
1802 Value *GO1 = GEP.getOperand(1);
1803
1804 // If they aren't the same type, then the input hasn't been processed
1805 // by the loop above yet (which canonicalizes sequential index types to
1806 // intptr_t). Just avoid transforming this until the input has been
1807 // normalized.
1808 if (SO1->getType() != GO1->getType())
1809 return nullptr;
1810
1811 Value *Sum =
1812 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
1813 // Only do the combine when we are sure the cost after the
1814 // merge is never more than that before the merge.
1815 if (Sum == nullptr)
1816 return nullptr;
1817
1818 // Update the GEP in place if possible.
1819 if (Src->getNumOperands() == 2) {
1820 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
1821 replaceOperand(GEP, 0, Src->getOperand(0));
1822 replaceOperand(GEP, 1, Sum);
1823 return &GEP;
1824 }
1825 Indices.append(Src->op_begin()+1, Src->op_end()-1);
1826 Indices.push_back(Sum);
1827 Indices.append(GEP.op_begin()+2, GEP.op_end());
1828 } else if (isa<Constant>(*GEP.idx_begin()) &&
1829 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
1830 Src->getNumOperands() != 1) {
1831 // Otherwise we can do the fold if the first index of the GEP is a zero
1832 Indices.append(Src->op_begin()+1, Src->op_end());
1833 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
1834 }
1835
1836 if (!Indices.empty())
1837 return replaceInstUsesWith(
1839 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
1840 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
1841
1842 return nullptr;
1843}
1844
1846 Value *PtrOp = GEP.getOperand(0);
1847 SmallVector<Value *, 8> Indices(GEP.indices());
1848 Type *GEPType = GEP.getType();
1849 Type *GEPEltType = GEP.getSourceElementType();
1850 bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
1851 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
1853 return replaceInstUsesWith(GEP, V);
1854
1855 // For vector geps, use the generic demanded vector support.
1856 // Skip if GEP return type is scalable. The number of elements is unknown at
1857 // compile-time.
1858 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
1859 auto VWidth = GEPFVTy->getNumElements();
1860 APInt UndefElts(VWidth, 0);
1861 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1862 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
1863 UndefElts)) {
1864 if (V != &GEP)
1865 return replaceInstUsesWith(GEP, V);
1866 return &GEP;
1867 }
1868
1869 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
1870 // possible (decide on canonical form for pointer broadcast), 3) exploit
1871 // undef elements to decrease demanded bits
1872 }
1873
1874 // Eliminate unneeded casts for indices, and replace indices which displace
1875 // by multiples of a zero size type with zero.
1876 bool MadeChange = false;
1877
1878 // Index width may not be the same width as pointer width.
1879 // Data layout chooses the right type based on supported integer types.
1880 Type *NewScalarIndexTy =
1881 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
1882
1884 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
1885 ++I, ++GTI) {
1886 // Skip indices into struct types.
1887 if (GTI.isStruct())
1888 continue;
1889
1890 Type *IndexTy = (*I)->getType();
1891 Type *NewIndexType =
1892 IndexTy->isVectorTy()
1893 ? VectorType::get(NewScalarIndexTy,
1894 cast<VectorType>(IndexTy)->getElementCount())
1895 : NewScalarIndexTy;
1896
1897 // If the element type has zero size then any index over it is equivalent
1898 // to an index of zero, so replace it with zero if it is not zero already.
1899 Type *EltTy = GTI.getIndexedType();
1900 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
1901 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
1902 *I = Constant::getNullValue(NewIndexType);
1903 MadeChange = true;
1904 }
1905
1906 if (IndexTy != NewIndexType) {
1907 // If we are using a wider index than needed for this platform, shrink
1908 // it to what we need. If narrower, sign-extend it to what we need.
1909 // This explicit cast can make subsequent optimizations more obvious.
1910 *I = Builder.CreateIntCast(*I, NewIndexType, true);
1911 MadeChange = true;
1912 }
1913 }
1914 if (MadeChange)
1915 return &GEP;
1916
1917 // Check to see if the inputs to the PHI node are getelementptr instructions.
1918 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
1919 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
1920 if (!Op1)
1921 return nullptr;
1922
1923 // Don't fold a GEP into itself through a PHI node. This can only happen
1924 // through the back-edge of a loop. Folding a GEP into itself means that
1925 // the value of the previous iteration needs to be stored in the meantime,
1926 // thus requiring an additional register variable to be live, but not
1927 // actually achieving anything (the GEP still needs to be executed once per
1928 // loop iteration).
1929 if (Op1 == &GEP)
1930 return nullptr;
1931
1932 int DI = -1;
1933
1934 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
1935 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
1936 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
1937 Op1->getSourceElementType() != Op2->getSourceElementType())
1938 return nullptr;
1939
1940 // As for Op1 above, don't try to fold a GEP into itself.
1941 if (Op2 == &GEP)
1942 return nullptr;
1943
1944 // Keep track of the type as we walk the GEP.
1945 Type *CurTy = nullptr;
1946
1947 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
1948 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
1949 return nullptr;
1950
1951 if (Op1->getOperand(J) != Op2->getOperand(J)) {
1952 if (DI == -1) {
1953 // We have not seen any differences yet in the GEPs feeding the
1954 // PHI yet, so we record this one if it is allowed to be a
1955 // variable.
1956
1957 // The first two arguments can vary for any GEP, the rest have to be
1958 // static for struct slots
1959 if (J > 1) {
1960 assert(CurTy && "No current type?");
1961 if (CurTy->isStructTy())
1962 return nullptr;
1963 }
1964
1965 DI = J;
1966 } else {
1967 // The GEP is different by more than one input. While this could be
1968 // extended to support GEPs that vary by more than one variable it
1969 // doesn't make sense since it greatly increases the complexity and
1970 // would result in an R+R+R addressing mode which no backend
1971 // directly supports and would need to be broken into several
1972 // simpler instructions anyway.
1973 return nullptr;
1974 }
1975 }
1976
1977 // Sink down a layer of the type for the next iteration.
1978 if (J > 0) {
1979 if (J == 1) {
1980 CurTy = Op1->getSourceElementType();
1981 } else {
1982 CurTy =
1983 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
1984 }
1985 }
1986 }
1987 }
1988
1989 // If not all GEPs are identical we'll have to create a new PHI node.
1990 // Check that the old PHI node has only one use so that it will get
1991 // removed.
1992 if (DI != -1 && !PN->hasOneUse())
1993 return nullptr;
1994
1995 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
1996 if (DI == -1) {
1997 // All the GEPs feeding the PHI are identical. Clone one down into our
1998 // BB so that it can be merged with the current GEP.
1999 } else {
2000 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2001 // into the current block so it can be merged, and create a new PHI to
2002 // set that index.
2003 PHINode *NewPN;
2004 {
2007 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2008 PN->getNumOperands());
2009 }
2010
2011 for (auto &I : PN->operands())
2012 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2013 PN->getIncomingBlock(I));
2014
2015 NewGEP->setOperand(DI, NewPN);
2016 }
2017
2018 NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2019 return replaceOperand(GEP, 0, NewGEP);
2020 }
2021
2022 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2023 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2024 return I;
2025
2026 // Skip if GEP source element type is scalable. The type alloc size is unknown
2027 // at compile-time.
2028 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2029 unsigned AS = GEP.getPointerAddressSpace();
2030 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2031 DL.getIndexSizeInBits(AS)) {
2032 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2033
2034 bool Matched = false;
2035 uint64_t C;
2036 Value *V = nullptr;
2037 if (TyAllocSize == 1) {
2038 V = GEP.getOperand(1);
2039 Matched = true;
2040 } else if (match(GEP.getOperand(1),
2041 m_AShr(m_Value(V), m_ConstantInt(C)))) {
2042 if (TyAllocSize == 1ULL << C)
2043 Matched = true;
2044 } else if (match(GEP.getOperand(1),
2045 m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2046 if (TyAllocSize == C)
2047 Matched = true;
2048 }
2049
2050 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2051 // only if both point to the same underlying object (otherwise provenance
2052 // is not necessarily retained).
2053 Value *Y;
2054 Value *X = GEP.getOperand(0);
2055 if (Matched &&
2059 }
2060 }
2061
2062 // We do not handle pointer-vector geps here.
2063 if (GEPType->isVectorTy())
2064 return nullptr;
2065
2066 if (!GEP.isInBounds()) {
2067 unsigned IdxWidth =
2069 APInt BasePtrOffset(IdxWidth, 0);
2070 Value *UnderlyingPtrOp =
2072 BasePtrOffset);
2073 if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2074 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2075 BasePtrOffset.isNonNegative()) {
2076 APInt AllocSize(
2077 IdxWidth,
2078 DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue());
2079 if (BasePtrOffset.ule(AllocSize)) {
2081 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2082 }
2083 }
2084 }
2085 }
2086
2088 return R;
2089
2090 return nullptr;
2091}
2092
2094 Instruction *AI) {
2095 if (isa<ConstantPointerNull>(V))
2096 return true;
2097 if (auto *LI = dyn_cast<LoadInst>(V))
2098 return isa<GlobalVariable>(LI->getPointerOperand());
2099 // Two distinct allocations will never be equal.
2100 return isAllocLikeFn(V, &TLI) && V != AI;
2101}
2102
2103/// Given a call CB which uses an address UsedV, return true if we can prove the
2104/// call's only possible effect is storing to V.
2105static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2106 const TargetLibraryInfo &TLI) {
2107 if (!CB.use_empty())
2108 // TODO: add recursion if returned attribute is present
2109 return false;
2110
2111 if (CB.isTerminator())
2112 // TODO: remove implementation restriction
2113 return false;
2114
2115 if (!CB.willReturn() || !CB.doesNotThrow())
2116 return false;
2117
2118 // If the only possible side effect of the call is writing to the alloca,
2119 // and the result isn't used, we can safely remove any reads implied by the
2120 // call including those which might read the alloca itself.
2121 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2122 return Dest && Dest->Ptr == UsedV;
2123}
2124
2127 const TargetLibraryInfo &TLI) {
2129 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2130 Worklist.push_back(AI);
2131
2132 do {
2133 Instruction *PI = Worklist.pop_back_val();
2134 for (User *U : PI->users()) {
2135 Instruction *I = cast<Instruction>(U);
2136 switch (I->getOpcode()) {
2137 default:
2138 // Give up the moment we see something we can't handle.
2139 return false;
2140
2141 case Instruction::AddrSpaceCast:
2142 case Instruction::BitCast:
2143 case Instruction::GetElementPtr:
2144 Users.emplace_back(I);
2145 Worklist.push_back(I);
2146 continue;
2147
2148 case Instruction::ICmp: {
2149 ICmpInst *ICI = cast<ICmpInst>(I);
2150 // We can fold eq/ne comparisons with null to false/true, respectively.
2151 // We also fold comparisons in some conditions provided the alloc has
2152 // not escaped (see isNeverEqualToUnescapedAlloc).
2153 if (!ICI->isEquality())
2154 return false;
2155 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2156 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2157 return false;
2158 Users.emplace_back(I);
2159 continue;
2160 }
2161
2162 case Instruction::Call:
2163 // Ignore no-op and store intrinsics.
2164 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2165 switch (II->getIntrinsicID()) {
2166 default:
2167 return false;
2168
2169 case Intrinsic::memmove:
2170 case Intrinsic::memcpy:
2171 case Intrinsic::memset: {
2172 MemIntrinsic *MI = cast<MemIntrinsic>(II);
2173 if (MI->isVolatile() || MI->getRawDest() != PI)
2174 return false;
2175 [[fallthrough]];
2176 }
2177 case Intrinsic::assume:
2178 case Intrinsic::invariant_start:
2179 case Intrinsic::invariant_end:
2180 case Intrinsic::lifetime_start:
2181 case Intrinsic::lifetime_end:
2182 case Intrinsic::objectsize:
2183 Users.emplace_back(I);
2184 continue;
2185 case Intrinsic::launder_invariant_group:
2186 case Intrinsic::strip_invariant_group:
2187 Users.emplace_back(I);
2188 Worklist.push_back(I);
2189 continue;
2190 }
2191 }
2192
2193 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2194 Users.emplace_back(I);
2195 continue;
2196 }
2197
2198 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2199 getAllocationFamily(I, &TLI) == Family) {
2200 assert(Family);
2201 Users.emplace_back(I);
2202 continue;
2203 }
2204
2205 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
2206 getAllocationFamily(I, &TLI) == Family) {
2207 assert(Family);
2208 Users.emplace_back(I);
2209 Worklist.push_back(I);
2210 continue;
2211 }
2212
2213 return false;
2214
2215 case Instruction::Store: {
2216 StoreInst *SI = cast<StoreInst>(I);
2217 if (SI->isVolatile() || SI->getPointerOperand() != PI)
2218 return false;
2219 Users.emplace_back(I);
2220 continue;
2221 }
2222 }
2223 llvm_unreachable("missing a return?");
2224 }
2225 } while (!Worklist.empty());
2226 return true;
2227}
2228
2230 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2231
2232 // If we have a malloc call which is only used in any amount of comparisons to
2233 // null and free calls, delete the calls and replace the comparisons with true
2234 // or false as appropriate.
2235
2236 // This is based on the principle that we can substitute our own allocation
2237 // function (which will never return null) rather than knowledge of the
2238 // specific function being called. In some sense this can change the permitted
2239 // outputs of a program (when we convert a malloc to an alloca, the fact that
2240 // the allocation is now on the stack is potentially visible, for example),
2241 // but we believe in a permissible manner.
2243
2244 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2245 // before each store.
2247 std::unique_ptr<DIBuilder> DIB;
2248 if (isa<AllocaInst>(MI)) {
2249 findDbgUsers(DVIs, &MI);
2250 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2251 }
2252
2253 if (isAllocSiteRemovable(&MI, Users, TLI)) {
2254 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2255 // Lowering all @llvm.objectsize calls first because they may
2256 // use a bitcast/GEP of the alloca we are removing.
2257 if (!Users[i])
2258 continue;
2259
2260 Instruction *I = cast<Instruction>(&*Users[i]);
2261
2262 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2263 if (II->getIntrinsicID() == Intrinsic::objectsize) {
2264 Value *Result =
2265 lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2266 replaceInstUsesWith(*I, Result);
2268 Users[i] = nullptr; // Skip examining in the next loop.
2269 }
2270 }
2271 }
2272 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2273 if (!Users[i])
2274 continue;
2275
2276 Instruction *I = cast<Instruction>(&*Users[i]);
2277
2278 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2280 ConstantInt::get(Type::getInt1Ty(C->getContext()),
2281 C->isFalseWhenEqual()));
2282 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2283 for (auto *DVI : DVIs)
2284 if (DVI->isAddressOfVariable())
2286 } else {
2287 // Casts, GEP, or anything else: we're about to delete this instruction,
2288 // so it can not have any valid uses.
2289 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2290 }
2292 }
2293
2294 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2295 // Replace invoke with a NOP intrinsic to maintain the original CFG
2296 Module *M = II->getModule();
2297 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2298 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2299 std::nullopt, "", II->getParent());
2300 }
2301
2302 // Remove debug intrinsics which describe the value contained within the
2303 // alloca. In addition to removing dbg.{declare,addr} which simply point to
2304 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2305 //
2306 // ```
2307 // define void @foo(i32 %0) {
2308 // %a = alloca i32 ; Deleted.
2309 // store i32 %0, i32* %a
2310 // dbg.value(i32 %0, "arg0") ; Not deleted.
2311 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2312 // call void @trivially_inlinable_no_op(i32* %a)
2313 // ret void
2314 // }
2315 // ```
2316 //
2317 // This may not be required if we stop describing the contents of allocas
2318 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2319 // the LowerDbgDeclare utility.
2320 //
2321 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2322 // "arg0" dbg.value may be stale after the call. However, failing to remove
2323 // the DW_OP_deref dbg.value causes large gaps in location coverage.
2324 for (auto *DVI : DVIs)
2325 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2326 DVI->eraseFromParent();
2327
2328 return eraseInstFromFunction(MI);
2329 }
2330 return nullptr;
2331}
2332
2333/// Move the call to free before a NULL test.
2334///
2335/// Check if this free is accessed after its argument has been test
2336/// against NULL (property 0).
2337/// If yes, it is legal to move this call in its predecessor block.
2338///
2339/// The move is performed only if the block containing the call to free
2340/// will be removed, i.e.:
2341/// 1. it has only one predecessor P, and P has two successors
2342/// 2. it contains the call, noops, and an unconditional branch
2343/// 3. its successor is the same as its predecessor's successor
2344///
2345/// The profitability is out-of concern here and this function should
2346/// be called only if the caller knows this transformation would be
2347/// profitable (e.g., for code size).
2349 const DataLayout &DL) {
2350 Value *Op = FI.getArgOperand(0);
2351 BasicBlock *FreeInstrBB = FI.getParent();
2352 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2353
2354 // Validate part of constraint #1: Only one predecessor
2355 // FIXME: We can extend the number of predecessor, but in that case, we
2356 // would duplicate the call to free in each predecessor and it may
2357 // not be profitable even for code size.
2358 if (!PredBB)
2359 return nullptr;
2360
2361 // Validate constraint #2: Does this block contains only the call to
2362 // free, noops, and an unconditional branch?
2363 BasicBlock *SuccBB;
2364 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2365 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2366 return nullptr;
2367
2368 // If there are only 2 instructions in the block, at this point,
2369 // this is the call to free and unconditional.
2370 // If there are more than 2 instructions, check that they are noops
2371 // i.e., they won't hurt the performance of the generated code.
2372 if (FreeInstrBB->size() != 2) {
2373 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2374 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2375 continue;
2376 auto *Cast = dyn_cast<CastInst>(&Inst);
2377 if (!Cast || !Cast->isNoopCast(DL))
2378 return nullptr;
2379 }
2380 }
2381 // Validate the rest of constraint #1 by matching on the pred branch.
2382 Instruction *TI = PredBB->getTerminator();
2383 BasicBlock *TrueBB, *FalseBB;
2385 if (!match(TI, m_Br(m_ICmp(Pred,
2387 m_Specific(Op->stripPointerCasts())),
2388 m_Zero()),
2389 TrueBB, FalseBB)))
2390 return nullptr;
2391 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2392 return nullptr;
2393
2394 // Validate constraint #3: Ensure the null case just falls through.
2395 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2396 return nullptr;
2397 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2398 "Broken CFG: missing edge from predecessor to successor");
2399
2400 // At this point, we know that everything in FreeInstrBB can be moved
2401 // before TI.
2402 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2403 if (&Instr == FreeInstrBBTerminator)
2404 break;
2405 Instr.moveBefore(TI);
2406 }
2407 assert(FreeInstrBB->size() == 1 &&
2408 "Only the branch instruction should remain");
2409
2410 // Now that we've moved the call to free before the NULL check, we have to
2411 // remove any attributes on its parameter that imply it's non-null, because
2412 // those attributes might have only been valid because of the NULL check, and
2413 // we can get miscompiles if we keep them. This is conservative if non-null is
2414 // also implied by something other than the NULL check, but it's guaranteed to
2415 // be correct, and the conservativeness won't matter in practice, since the
2416 // attributes are irrelevant for the call to free itself and the pointer
2417 // shouldn't be used after the call.
2418 AttributeList Attrs = FI.getAttributes();
2419 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
2420 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
2421 if (Dereferenceable.isValid()) {
2422 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
2423 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
2424 Attribute::Dereferenceable);
2425 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
2426 }
2427 FI.setAttributes(Attrs);
2428
2429 return &FI;
2430}
2431
2433 // free undef -> unreachable.
2434 if (isa<UndefValue>(Op)) {
2435 // Leave a marker since we can't modify the CFG here.
2437 return eraseInstFromFunction(FI);
2438 }
2439
2440 // If we have 'free null' delete the instruction. This can happen in stl code
2441 // when lots of inlining happens.
2442 if (isa<ConstantPointerNull>(Op))
2443 return eraseInstFromFunction(FI);
2444
2445 // If we had free(realloc(...)) with no intervening uses, then eliminate the
2446 // realloc() entirely.
2447 CallInst *CI = dyn_cast<CallInst>(Op);
2448 if (CI && CI->hasOneUse())
2449 if (Value *ReallocatedOp = getReallocatedOperand(CI))
2450 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
2451
2452 // If we optimize for code size, try to move the call to free before the null
2453 // test so that simplify cfg can remove the empty block and dead code
2454 // elimination the branch. I.e., helps to turn something like:
2455 // if (foo) free(foo);
2456 // into
2457 // free(foo);
2458 //
2459 // Note that we can only do this for 'free' and not for any flavor of
2460 // 'operator delete'; there is no 'operator delete' symbol for which we are
2461 // permitted to invent a call, even if we're passing in a null pointer.
2462 if (MinimizeSize) {
2463 LibFunc Func;
2464 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
2466 return I;
2467 }
2468
2469 return nullptr;
2470}
2471
2473 // Nothing for now.
2474 return nullptr;
2475}
2476
2477// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
2479 // Try to remove the previous instruction if it must lead to unreachable.
2480 // This includes instructions like stores and "llvm.assume" that may not get
2481 // removed by simple dead code elimination.
2482 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
2483 // While we theoretically can erase EH, that would result in a block that
2484 // used to start with an EH no longer starting with EH, which is invalid.
2485 // To make it valid, we'd need to fixup predecessors to no longer refer to
2486 // this block, but that changes CFG, which is not allowed in InstCombine.
2487 if (Prev->isEHPad())
2488 return nullptr; // Can not drop any more instructions. We're done here.
2489
2491 return nullptr; // Can not drop any more instructions. We're done here.
2492 // Otherwise, this instruction can be freely erased,
2493 // even if it is not side-effect free.
2494
2495 // A value may still have uses before we process it here (for example, in
2496 // another unreachable block), so convert those to poison.
2497 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
2498 eraseInstFromFunction(*Prev);
2499 }
2500 assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
2501 // FIXME: recurse into unconditional predecessors?
2502 return nullptr;
2503}
2504
2506 assert(BI.isUnconditional() && "Only for unconditional branches.");
2507
2508 // If this store is the second-to-last instruction in the basic block
2509 // (excluding debug info and bitcasts of pointers) and if the block ends with
2510 // an unconditional branch, try to move the store to the successor block.
2511
2512 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
2513 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
2514 return BBI->isDebugOrPseudoInst() ||
2515 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
2516 };
2517
2518 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
2519 do {
2520 if (BBI != FirstInstr)
2521 --BBI;
2522 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
2523
2524 return dyn_cast<StoreInst>(BBI);
2525 };
2526
2527 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
2529 return &BI;
2530
2531 return nullptr;
2532}
2533
2534/// If a block is dead due to a known branch condition, remove instructions
2535/// in it.
2537 // We only know one edge to this block is dead, but there may be others.
2538 // TODO: We could track dead edges globally.
2539 if (!BB->getSinglePredecessor())
2540 return false;
2541
2542 bool Changed = false;
2544 std::next(BB->getTerminator()->getReverseIterator()), BB->rend()))) {
2545 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
2546 IC.replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
2547 Changed = true;
2548 }
2549 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
2550 continue;
2551 IC.eraseInstFromFunction(Inst);
2552 Changed = true;
2553 }
2554
2555 // TODO: Successor blocks may also be dead.
2556 return Changed;
2557}
2558
2560 BasicBlock *LiveSucc,
2561 InstCombiner &IC) {
2562 bool Changed = false;
2563 for (BasicBlock *Succ : successors(BB))
2564 if (Succ != LiveSucc)
2565 Changed |= handlePotentiallyDeadBlock(Succ, IC);
2566 return Changed;
2567}
2568
2570 if (BI.isUnconditional())
2572
2573 // Change br (not X), label True, label False to: br X, label False, True
2574 Value *Cond = BI.getCondition();
2575 Value *X;
2576 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
2577 // Swap Destinations and condition...
2578 BI.swapSuccessors();
2579 return replaceOperand(BI, 0, X);
2580 }
2581
2582 // Canonicalize logical-and-with-invert as logical-or-with-invert.
2583 // This is done by inverting the condition and swapping successors:
2584 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
2585 Value *Y;
2586 if (isa<SelectInst>(Cond) &&
2587 match(Cond,
2589 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
2590 Value *Or = Builder.CreateLogicalOr(NotX, Y);
2591 BI.swapSuccessors();
2592 return replaceOperand(BI, 0, Or);
2593 }
2594
2595 // If the condition is irrelevant, remove the use so that other
2596 // transforms on the condition become more effective.
2597 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
2598 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
2599
2600 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
2601 CmpInst::Predicate Pred;
2602 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
2603 !isCanonicalPredicate(Pred)) {
2604 // Swap destinations and condition.
2605 auto *Cmp = cast<CmpInst>(Cond);
2606 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
2607 BI.swapSuccessors();
2608 Worklist.push(Cmp);
2609 return &BI;
2610 }
2611
2612 if (isa<UndefValue>(Cond) && handlePotentiallyDeadSuccessors(
2613 BI.getParent(), /*LiveSucc*/ nullptr, *this))
2614 return &BI;
2615 if (auto *CI = dyn_cast<ConstantInt>(Cond))
2617 BI.getParent(), BI.getSuccessor(!CI->getZExtValue()), *this))
2618 return &BI;
2619
2620 return nullptr;
2621}
2622
2624 Value *Cond = SI.getCondition();
2625 Value *Op0;
2626 ConstantInt *AddRHS;
2627 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
2628 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2629 for (auto Case : SI.cases()) {
2630 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
2631 assert(isa<ConstantInt>(NewCase) &&
2632 "Result of expression should be constant");
2633 Case.setValue(cast<ConstantInt>(NewCase));
2634 }
2635 return replaceOperand(SI, 0, Op0);
2636 }
2637
2638 if (isa<UndefValue>(Cond) && handlePotentiallyDeadSuccessors(
2639 SI.getParent(), /*LiveSucc*/ nullptr, *this))
2640 return &SI;
2641 if (auto *CI = dyn_cast<ConstantInt>(Cond))
2643 SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor(), *this))
2644 return &SI;
2645
2646 KnownBits Known = computeKnownBits(Cond, 0, &SI);
2647 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
2648 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
2649
2650 // Compute the number of leading bits we can ignore.
2651 // TODO: A better way to determine this would use ComputeNumSignBits().
2652 for (const auto &C : SI.cases()) {
2653 LeadingKnownZeros =
2654 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
2655 LeadingKnownOnes =
2656 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
2657 }
2658
2659 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
2660
2661 // Shrink the condition operand if the new type is smaller than the old type.
2662 // But do not shrink to a non-standard type, because backend can't generate
2663 // good code for that yet.
2664 // TODO: We can make it aggressive again after fixing PR39569.
2665 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
2666 shouldChangeType(Known.getBitWidth(), NewWidth)) {
2667 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
2669 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
2670
2671 for (auto Case : SI.cases()) {
2672 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
2673 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
2674 }
2675 return replaceOperand(SI, 0, NewCond);
2676 }
2677
2678 return nullptr;
2679}
2680
2682InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
2683 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
2684 if (!WO)
2685 return nullptr;
2686
2687 Intrinsic::ID OvID = WO->getIntrinsicID();
2688 const APInt *C = nullptr;
2689 if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
2690 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
2691 OvID == Intrinsic::umul_with_overflow)) {
2692 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
2693 if (C->isAllOnes())
2694 return BinaryOperator::CreateNeg(WO->getLHS());
2695 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
2696 if (C->isPowerOf2()) {
2697 return BinaryOperator::CreateShl(
2698 WO->getLHS(),
2699 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
2700 }
2701 }
2702 }
2703
2704 // We're extracting from an overflow intrinsic. See if we're the only user.
2705 // That allows us to simplify multiple result intrinsics to simpler things
2706 // that just get one value.
2707 if (!WO->hasOneUse())
2708 return nullptr;
2709
2710 // Check if we're grabbing only the result of a 'with overflow' intrinsic
2711 // and replace it with a traditional binary instruction.
2712 if (*EV.idx_begin() == 0) {
2713 Instruction::BinaryOps BinOp = WO->getBinaryOp();
2714 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
2715 // Replace the old instruction's uses with poison.
2716 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
2718 return BinaryOperator::Create(BinOp, LHS, RHS);
2719 }
2720
2721 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
2722
2723 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
2724 if (OvID == Intrinsic::usub_with_overflow)
2725 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
2726
2727 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
2728 // +1 is not possible because we assume signed values.
2729 if (OvID == Intrinsic::smul_with_overflow &&
2730 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
2731 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
2732
2733 // If only the overflow result is used, and the right hand side is a
2734 // constant (or constant splat), we can remove the intrinsic by directly
2735 // checking for overflow.
2736 if (C) {
2737 // Compute the no-wrap range for LHS given RHS=C, then construct an
2738 // equivalent icmp, potentially using an offset.
2740 WO->getBinaryOp(), *C, WO->getNoWrapKind());
2741
2742 CmpInst::Predicate Pred;
2743 APInt NewRHSC, Offset;
2744 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
2745 auto *OpTy = WO->getRHS()->getType();
2746 auto *NewLHS = WO->getLHS();
2747 if (Offset != 0)
2748 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
2749 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
2750 ConstantInt::get(OpTy, NewRHSC));
2751 }
2752
2753 return nullptr;
2754}
2755
2757 Value *Agg = EV.getAggregateOperand();
2758
2759 if (!EV.hasIndices())
2760 return replaceInstUsesWith(EV, Agg);
2761
2762 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
2763 SQ.getWithInstruction(&EV)))
2764 return replaceInstUsesWith(EV, V);
2765
2766 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
2767 // We're extracting from an insertvalue instruction, compare the indices
2768 const unsigned *exti, *exte, *insi, *inse;
2769 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
2770 exte = EV.idx_end(), inse = IV->idx_end();
2771 exti != exte && insi != inse;
2772 ++exti, ++insi) {
2773 if (*insi != *exti)
2774 // The insert and extract both reference distinctly different elements.
2775 // This means the extract is not influenced by the insert, and we can
2776 // replace the aggregate operand of the extract with the aggregate
2777 // operand of the insert. i.e., replace
2778 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2779 // %E = extractvalue { i32, { i32 } } %I, 0
2780 // with
2781 // %E = extractvalue { i32, { i32 } } %A, 0
2782 return ExtractValueInst::Create(IV->getAggregateOperand(),
2783 EV.getIndices());
2784 }
2785 if (exti == exte && insi == inse)
2786 // Both iterators are at the end: Index lists are identical. Replace
2787 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2788 // %C = extractvalue { i32, { i32 } } %B, 1, 0
2789 // with "i32 42"
2790 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
2791 if (exti == exte) {
2792 // The extract list is a prefix of the insert list. i.e. replace
2793 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2794 // %E = extractvalue { i32, { i32 } } %I, 1
2795 // with
2796 // %X = extractvalue { i32, { i32 } } %A, 1
2797 // %E = insertvalue { i32 } %X, i32 42, 0
2798 // by switching the order of the insert and extract (though the
2799 // insertvalue should be left in, since it may have other uses).
2800 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
2801 EV.getIndices());
2802 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
2803 ArrayRef(insi, inse));
2804 }
2805 if (insi == inse)
2806 // The insert list is a prefix of the extract list
2807 // We can simply remove the common indices from the extract and make it
2808 // operate on the inserted value instead of the insertvalue result.
2809 // i.e., replace
2810 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2811 // %E = extractvalue { i32, { i32 } } %I, 1, 0
2812 // with
2813 // %E extractvalue { i32 } { i32 42 }, 0
2814 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
2815 ArrayRef(exti, exte));
2816 }
2817
2818 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
2819 return R;
2820
2821 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
2822 // Bail out if the aggregate contains scalable vector type
2823 if (auto *STy = dyn_cast<StructType>(Agg->getType());
2824 STy && STy->containsScalableVectorType())
2825 return nullptr;
2826
2827 // If the (non-volatile) load only has one use, we can rewrite this to a
2828 // load from a GEP. This reduces the size of the load. If a load is used
2829 // only by extractvalue instructions then this either must have been
2830 // optimized before, or it is a struct with padding, in which case we
2831 // don't want to do the transformation as it loses padding knowledge.
2832 if (L->isSimple() && L->hasOneUse()) {
2833 // extractvalue has integer indices, getelementptr has Value*s. Convert.
2834 SmallVector<Value*, 4> Indices;
2835 // Prefix an i32 0 since we need the first element.
2836 Indices.push_back(Builder.getInt32(0));
2837 for (unsigned Idx : EV.indices())
2838 Indices.push_back(Builder.getInt32(Idx));
2839
2840 // We need to insert these at the location of the old load, not at that of
2841 // the extractvalue.
2843 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
2844 L->getPointerOperand(), Indices);
2846 // Whatever aliasing information we had for the orignal load must also
2847 // hold for the smaller load, so propagate the annotations.
2848 NL->setAAMetadata(L->getAAMetadata());
2849 // Returning the load directly will cause the main loop to insert it in
2850 // the wrong spot, so use replaceInstUsesWith().
2851 return replaceInstUsesWith(EV, NL);
2852 }
2853 }
2854
2855 if (auto *PN = dyn_cast<PHINode>(Agg))
2856 if (Instruction *Res = foldOpIntoPhi(EV, PN))
2857 return Res;
2858
2859 // We could simplify extracts from other values. Note that nested extracts may
2860 // already be simplified implicitly by the above: extract (extract (insert) )
2861 // will be translated into extract ( insert ( extract ) ) first and then just
2862 // the value inserted, if appropriate. Similarly for extracts from single-use
2863 // loads: extract (extract (load)) will be translated to extract (load (gep))
2864 // and if again single-use then via load (gep (gep)) to load (gep).
2865 // However, double extracts from e.g. function arguments or return values
2866 // aren't handled yet.
2867 return nullptr;
2868}
2869
2870/// Return 'true' if the given typeinfo will match anything.
2871static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
2872 switch (Personality) {
2876 // The GCC C EH and Rust personality only exists to support cleanups, so
2877 // it's not clear what the semantics of catch clauses are.
2878 return false;
2880 return false;
2882 // While __gnat_all_others_value will match any Ada exception, it doesn't
2883 // match foreign exceptions (or didn't, before gcc-4.7).
2884 return false;
2894 return TypeInfo->isNullValue();
2895 }
2896 llvm_unreachable("invalid enum");
2897}
2898
2899static bool shorter_filter(const Value *LHS, const Value *RHS) {
2900 return
2901 cast<ArrayType>(LHS->getType())->getNumElements()
2902 <
2903 cast<ArrayType>(RHS->getType())->getNumElements();
2904}
2905
2907 // The logic here should be correct for any real-world personality function.
2908 // However if that turns out not to be true, the offending logic can always
2909 // be conditioned on the personality function, like the catch-all logic is.
2910 EHPersonality Personality =
2911 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
2912
2913 // Simplify the list of clauses, eg by removing repeated catch clauses
2914 // (these are often created by inlining).
2915 bool MakeNewInstruction = false; // If true, recreate using the following:
2916 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
2917 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
2918
2919 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
2920 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
2921 bool isLastClause = i + 1 == e;
2922 if (LI.isCatch(i)) {
2923 // A catch clause.
2924 Constant *CatchClause = LI.getClause(i);
2925 Constant *TypeInfo = CatchClause->stripPointerCasts();
2926
2927 // If we already saw this clause, there is no point in having a second
2928 // copy of it.
2929 if (AlreadyCaught.insert(TypeInfo).second) {
2930 // This catch clause was not already seen.
2931 NewClauses.push_back(CatchClause);
2932 } else {
2933 // Repeated catch clause - drop the redundant copy.
2934 MakeNewInstruction = true;
2935 }
2936
2937 // If this is a catch-all then there is no point in keeping any following
2938 // clauses or marking the landingpad as having a cleanup.
2939 if (isCatchAll(Personality, TypeInfo)) {
2940 if (!isLastClause)
2941 MakeNewInstruction = true;
2942 CleanupFlag = false;
2943 break;
2944 }
2945 } else {
2946 // A filter clause. If any of the filter elements were already caught
2947 // then they can be dropped from the filter. It is tempting to try to
2948 // exploit the filter further by saying that any typeinfo that does not
2949 // occur in the filter can't be caught later (and thus can be dropped).
2950 // However this would be wrong, since typeinfos can match without being
2951 // equal (for example if one represents a C++ class, and the other some
2952 // class derived from it).
2953 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
2954 Constant *FilterClause = LI.getClause(i);
2955 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
2956 unsigned NumTypeInfos = FilterType->getNumElements();
2957
2958 // An empty filter catches everything, so there is no point in keeping any
2959 // following clauses or marking the landingpad as having a cleanup. By
2960 // dealing with this case here the following code is made a bit simpler.
2961 if (!NumTypeInfos) {
2962 NewClauses.push_back(FilterClause);
2963 if (!isLastClause)
2964 MakeNewInstruction = true;
2965 CleanupFlag = false;
2966 break;
2967 }
2968
2969 bool MakeNewFilter = false; // If true, make a new filter.
2970 SmallVector<Constant *, 16> NewFilterElts; // New elements.
2971 if (isa<ConstantAggregateZero>(FilterClause)) {
2972 // Not an empty filter - it contains at least one null typeinfo.
2973 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
2974 Constant *TypeInfo =
2976 // If this typeinfo is a catch-all then the filter can never match.
2977 if (isCatchAll(Personality, TypeInfo)) {
2978 // Throw the filter away.
2979 MakeNewInstruction = true;
2980 continue;
2981 }
2982
2983 // There is no point in having multiple copies of this typeinfo, so
2984 // discard all but the first copy if there is more than one.
2985 NewFilterElts.push_back(TypeInfo);
2986 if (NumTypeInfos > 1)
2987 MakeNewFilter = true;
2988 } else {
2989 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
2990 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
2991 NewFilterElts.reserve(NumTypeInfos);
2992
2993 // Remove any filter elements that were already caught or that already
2994 // occurred in the filter. While there, see if any of the elements are
2995 // catch-alls. If so, the filter can be discarded.
2996 bool SawCatchAll = false;
2997 for (unsigned j = 0; j != NumTypeInfos; ++j) {
2998 Constant *Elt = Filter->getOperand(j);
2999 Constant *TypeInfo = Elt->stripPointerCasts();
3000 if (isCatchAll(Personality, TypeInfo)) {
3001 // This element is a catch-all. Bail out, noting this fact.
3002 SawCatchAll = true;
3003 break;
3004 }
3005
3006 // Even if we've seen a type in a catch clause, we don't want to
3007 // remove it from the filter. An unexpected type handler may be
3008 // set up for a call site which throws an exception of the same
3009 // type caught. In order for the exception thrown by the unexpected
3010 // handler to propagate correctly, the filter must be correctly
3011 // described for the call site.
3012 //
3013 // Example:
3014 //
3015 // void unexpected() { throw 1;}
3016 // void foo() throw (int) {
3017 // std::set_unexpected(unexpected);
3018 // try {
3019 // throw 2.0;
3020 // } catch (int i) {}
3021 // }
3022
3023 // There is no point in having multiple copies of the same typeinfo in
3024 // a filter, so only add it if we didn't already.
3025 if (SeenInFilter.insert(TypeInfo).second)
3026 NewFilterElts.push_back(cast<Constant>(Elt));
3027 }
3028 // A filter containing a catch-all cannot match anything by definition.
3029 if (SawCatchAll) {
3030 // Throw the filter away.
3031 MakeNewInstruction = true;
3032 continue;
3033 }
3034
3035 // If we dropped something from the filter, make a new one.
3036 if (NewFilterElts.size() < NumTypeInfos)
3037 MakeNewFilter = true;
3038 }
3039 if (MakeNewFilter) {
3040 FilterType = ArrayType::get(FilterType->getElementType(),
3041 NewFilterElts.size());
3042 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3043 MakeNewInstruction = true;
3044 }
3045
3046 NewClauses.push_back(FilterClause);
3047
3048 // If the new filter is empty then it will catch everything so there is
3049 // no point in keeping any following clauses or marking the landingpad
3050 // as having a cleanup. The case of the original filter being empty was
3051 // already handled above.
3052 if (MakeNewFilter && !NewFilterElts.size()) {
3053 assert(MakeNewInstruction && "New filter but not a new instruction!");
3054 CleanupFlag = false;
3055 break;
3056 }
3057 }
3058 }
3059
3060 // If several filters occur in a row then reorder them so that the shortest
3061 // filters come first (those with the smallest number of elements). This is
3062 // advantageous because shorter filters are more likely to match, speeding up
3063 // unwinding, but mostly because it increases the effectiveness of the other
3064 // filter optimizations below.
3065 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3066 unsigned j;
3067 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3068 for (j = i; j != e; ++j)
3069 if (!isa<ArrayType>(NewClauses[j]->getType()))
3070 break;
3071
3072 // Check whether the filters are already sorted by length. We need to know
3073 // if sorting them is actually going to do anything so that we only make a
3074 // new landingpad instruction if it does.
3075 for (unsigned k = i; k + 1 < j; ++k)
3076 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3077 // Not sorted, so sort the filters now. Doing an unstable sort would be
3078 // correct too but reordering filters pointlessly might confuse users.
3079 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3081 MakeNewInstruction = true;
3082 break;
3083 }
3084
3085 // Look for the next batch of filters.
3086 i = j + 1;
3087 }
3088
3089 // If typeinfos matched if and only if equal, then the elements of a filter L
3090 // that occurs later than a filter F could be replaced by the intersection of
3091 // the elements of F and L. In reality two typeinfos can match without being
3092 // equal (for example if one represents a C++ class, and the other some class
3093 // derived from it) so it would be wrong to perform this transform in general.
3094 // However the transform is correct and useful if F is a subset of L. In that
3095 // case L can be replaced by F, and thus removed altogether since repeating a
3096 // filter is pointless. So here we look at all pairs of filters F and L where
3097 // L follows F in the list of clauses, and remove L if every element of F is
3098 // an element of L. This can occur when inlining C++ functions with exception
3099 // specifications.
3100 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3101 // Examine each filter in turn.
3102 Value *Filter = NewClauses[i];
3103 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3104 if (!FTy)
3105 // Not a filter - skip it.
3106 continue;
3107 unsigned FElts = FTy->getNumElements();
3108 // Examine each filter following this one. Doing this backwards means that
3109 // we don't have to worry about filters disappearing under us when removed.
3110 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3111 Value *LFilter = NewClauses[j];
3112 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3113 if (!LTy)
3114 // Not a filter - skip it.
3115 continue;
3116 // If Filter is a subset of LFilter, i.e. every element of Filter is also
3117 // an element of LFilter, then discard LFilter.
3118 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3119 // If Filter is empty then it is a subset of LFilter.
3120 if (!FElts) {
3121 // Discard LFilter.
3122 NewClauses.erase(J);
3123 MakeNewInstruction = true;
3124 // Move on to the next filter.
3125 continue;
3126 }
3127 unsigned LElts = LTy->getNumElements();
3128 // If Filter is longer than LFilter then it cannot be a subset of it.
3129 if (FElts > LElts)
3130 // Move on to the next filter.
3131 continue;
3132 // At this point we know that LFilter has at least one element.
3133 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3134 // Filter is a subset of LFilter iff Filter contains only zeros (as we
3135 // already know that Filter is not longer than LFilter).
3136 if (isa<ConstantAggregateZero>(Filter)) {
3137 assert(FElts <= LElts && "Should have handled this case earlier!");
3138 // Discard LFilter.
3139 NewClauses.erase(J);
3140 MakeNewInstruction = true;
3141 }
3142 // Move on to the next filter.
3143 continue;
3144 }
3145 ConstantArray *LArray = cast<ConstantArray>(LFilter);
3146 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3147 // Since Filter is non-empty and contains only zeros, it is a subset of
3148 // LFilter iff LFilter contains a zero.
3149 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3150 for (unsigned l = 0; l != LElts; ++l)
3151 if (LArray->getOperand(l)->isNullValue()) {
3152 // LFilter contains a zero - discard it.
3153 NewClauses.erase(J);
3154 MakeNewInstruction = true;
3155 break;
3156 }
3157 // Move on to the next filter.
3158 continue;
3159 }
3160 // At this point we know that both filters are ConstantArrays. Loop over
3161 // operands to see whether every element of Filter is also an element of
3162 // LFilter. Since filters tend to be short this is probably faster than
3163 // using a method that scales nicely.
3164 ConstantArray *FArray = cast<ConstantArray>(Filter);
3165 bool AllFound = true;
3166 for (unsigned f = 0; f != FElts; ++f) {
3167 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3168 AllFound = false;
3169 for (unsigned l = 0; l != LElts; ++l) {
3170 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3171 if (LTypeInfo == FTypeInfo) {
3172 AllFound = true;
3173 break;
3174 }
3175 }
3176 if (!AllFound)
3177 break;
3178 }
3179 if (AllFound) {
3180 // Discard LFilter.
3181 NewClauses.erase(J);
3182 MakeNewInstruction = true;
3183 }
3184 // Move on to the next filter.
3185 }
3186 }
3187
3188 // If we changed any of the clauses, replace the old landingpad instruction
3189 // with a new one.
3190 if (MakeNewInstruction) {
3191 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
3192 NewClauses.size());
3193 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3194 NLI->addClause(NewClauses[i]);
3195 // A landing pad with no clauses must have the cleanup flag set. It is
3196 // theoretically possible, though highly unlikely, that we eliminated all
3197 // clauses. If so, force the cleanup flag to true.
3198 if (NewClauses.empty())
3199 CleanupFlag = true;
3200 NLI->setCleanup(CleanupFlag);
3201 return NLI;
3202 }
3203
3204 // Even if none of the clauses changed, we may nonetheless have understood
3205 // that the cleanup flag is pointless. Clear it if so.
3206 if (LI.isCleanup() != CleanupFlag) {
3207 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3208 LI.setCleanup(CleanupFlag);
3209 return &LI;
3210 }
3211
3212 return nullptr;
3213}
3214
3215Value *
3217 // Try to push freeze through instructions that propagate but don't produce
3218 // poison as far as possible. If an operand of freeze follows three
3219 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3220 // guaranteed-non-poison operands then push the freeze through to the one
3221 // operand that is not guaranteed non-poison. The actual transform is as
3222 // follows.
3223 // Op1 = ... ; Op1 can be posion
3224 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3225 // ; single guaranteed-non-poison operands
3226 // ... = Freeze(Op0)
3227 // =>
3228 // Op1 = ...
3229 // Op1.fr = Freeze(Op1)
3230 // ... = Inst(Op1.fr, NonPoisonOps...)
3231 auto *OrigOp = OrigFI.getOperand(0);
3232 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3233
3234 // While we could change the other users of OrigOp to use freeze(OrigOp), that
3235 // potentially reduces their optimization potential, so let's only do this iff
3236 // the OrigOp is only used by the freeze.
3237 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3238 return nullptr;
3239
3240 // We can't push the freeze through an instruction which can itself create
3241 // poison. If the only source of new poison is flags, we can simply
3242 // strip them (since we know the only use is the freeze and nothing can
3243 // benefit from them.)
3244 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
3245 /*ConsiderFlagsAndMetadata*/ false))
3246 return nullptr;
3247
3248 // If operand is guaranteed not to be poison, there is no need to add freeze
3249 // to the operand. So we first find the operand that is not guaranteed to be
3250 // poison.
3251 Use *MaybePoisonOperand = nullptr;
3252 for (Use &U : OrigOpInst->operands()) {
3253 if (isa<MetadataAsValue>(U.get()) ||
3255 continue;
3256 if (!MaybePoisonOperand)
3257 MaybePoisonOperand = &U;
3258 else
3259 return nullptr;
3260 }
3261
3262 OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
3263
3264 // If all operands are guaranteed to be non-poison, we can drop freeze.
3265 if (!MaybePoisonOperand)
3266 return OrigOp;
3267
3268 Builder.SetInsertPoint(OrigOpInst);
3269 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3270 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3271
3272 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3273 return OrigOp;
3274}
3275
3277 PHINode *PN) {
3278 // Detect whether this is a recurrence with a start value and some number of
3279 // backedge values. We'll check whether we can push the freeze through the
3280 // backedge values (possibly dropping poison flags along the way) until we
3281 // reach the phi again. In that case, we can move the freeze to the start
3282 // value.
3283 Use *StartU = nullptr;
3285 for (Use &U : PN->incoming_values()) {
3286 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3287 // Add backedge value to worklist.
3288 Worklist.push_back(U.get());
3289 continue;
3290 }
3291
3292 // Don't bother handling multiple start values.
3293 if (StartU)
3294 return nullptr;
3295 StartU = &U;
3296 }
3297
3298 if (!StartU || Worklist.empty())
3299 return nullptr; // Not a recurrence.
3300
3301 Value *StartV = StartU->get();
3302 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3303 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3304 // We can't insert freeze if the the start value is the result of the
3305 // terminator (e.g. an invoke).
3306 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3307 return nullptr;
3308
3311 while (!Worklist.empty()) {
3312 Value *V = Worklist.pop_back_val();
3313 if (!Visited.insert(V).second)
3314 continue;
3315
3316 if (Visited.size() > 32)
3317 return nullptr; // Limit the total number of values we inspect.
3318
3319 // Assume that PN is non-poison, because it will be after the transform.
3320 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3321 continue;
3322
3323 Instruction *I = dyn_cast<Instruction>(V);
3324 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3325 /*ConsiderFlagsAndMetadata*/ false))
3326 return nullptr;
3327
3328 DropFlags.push_back(I);
3329 append_range(Worklist, I->operands());
3330 }
3331
3332 for (Instruction *I : DropFlags)
3333 I->dropPoisonGeneratingFlagsAndMetadata();
3334
3335 if (StartNeedsFreeze) {
3337 Value *FrozenStartV = Builder.CreateFreeze(StartV,
3338 StartV->getName() + ".fr");
3339 replaceUse(*StartU, FrozenStartV);
3340 }
3341 return replaceInstUsesWith(FI, PN);
3342}
3343
3345 Value *Op = FI.getOperand(0);
3346
3347 if (isa<Constant>(Op) || Op->hasOneUse())
3348 return false;
3349
3350 // Move the freeze directly after the definition of its operand, so that
3351 // it dominates the maximum number of uses. Note that it may not dominate
3352 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3353 // the normal/default destination. This is why the domination check in the
3354 // replacement below is still necessary.
3355 Instruction *MoveBefore;
3356 if (isa<Argument>(Op)) {
3357 MoveBefore =
3359 } else {
3360 MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3361 if (!MoveBefore)
3362 return false;
3363 }
3364
3365 bool Changed = false;
3366 if (&FI != MoveBefore) {
3367 FI.moveBefore(MoveBefore);
3368 Changed = true;
3369 }
3370
3371 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3372 bool Dominates = DT.dominates(&FI, U);
3373 Changed |= Dominates;
3374 return Dominates;
3375 });
3376
3377 return Changed;
3378}
3379
3380// Check if any direct or bitcast user of this value is a shuffle instruction.
3382 for (auto *U : V->users()) {
3383 if (isa<ShuffleVectorInst>(U))
3384 return true;
3385 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
3386 return true;
3387 }
3388 return false;
3389}
3390
3392 Value *Op0 = I.getOperand(0);
3393
3395 return replaceInstUsesWith(I, V);
3396
3397 // freeze (phi const, x) --> phi const, (freeze x)
3398 if (auto *PN = dyn_cast<PHINode>(Op0)) {
3399 if (Instruction *NV = foldOpIntoPhi(I, PN))
3400 return NV;
3401 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3402 return NV;
3403 }
3404
3406 return replaceInstUsesWith(I, NI);
3407
3408 // If I is freeze(undef), check its uses and fold it to a fixed constant.
3409 // - or: pick -1
3410 // - select's condition: if the true value is constant, choose it by making
3411 // the condition true.
3412 // - default: pick 0
3413 //
3414 // Note that this transform is intentionally done here rather than
3415 // via an analysis in InstSimplify or at individual user sites. That is
3416 // because we must produce the same value for all uses of the freeze -
3417 // it's the reason "freeze" exists!
3418 //
3419 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3420 // duplicating logic for binops at least.
3421 auto getUndefReplacement = [&I](Type *Ty) {
3422 Constant *BestValue = nullptr;
3423 Constant *NullValue = Constant::getNullValue(Ty);
3424 for (const auto *U : I.users()) {
3425 Constant *C = NullValue;
3426 if (match(U, m_Or(m_Value(), m_Value())))
3428 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
3429 C = ConstantInt::getTrue(Ty);
3430
3431 if (!BestValue)
3432 BestValue = C;
3433 else if (BestValue != C)
3434 BestValue = NullValue;
3435 }
3436 assert(BestValue && "Must have at least one use");
3437 return BestValue;
3438 };
3439
3440 if (match(Op0, m_Undef())) {
3441 // Don't fold freeze(undef/poison) if it's used as a vector operand in
3442 // a shuffle. This may improve codegen for shuffles that allow
3443 // unspecified inputs.
3445 return nullptr;
3446 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
3447 }
3448
3449 Constant *C;
3450 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
3451 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
3453 }
3454
3455 // Replace uses of Op with freeze(Op).
3456 if (freezeOtherUses(I))
3457 return &I;
3458
3459 return nullptr;
3460}
3461
3462/// Check for case where the call writes to an otherwise dead alloca. This
3463/// shows up for unused out-params in idiomatic C/C++ code. Note that this
3464/// helper *only* analyzes the write; doesn't check any other legality aspect.
3466 auto *CB = dyn_cast<CallBase>(I);
3467 if (!CB)
3468 // TODO: handle e.g. store to alloca here - only worth doing if we extend
3469 // to allow reload along used path as described below. Otherwise, this
3470 // is simply a store to a dead allocation which will be removed.
3471 return false;
3472 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
3473 if (!Dest)
3474 return false;
3475 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
3476 if (!AI)
3477 // TODO: allow malloc?
3478 return false;
3479 // TODO: allow memory access dominated by move point? Note that since AI
3480 // could have a reference to itself captured by the call, we would need to
3481 // account for cycles in doing so.
3482 SmallVector<const User *> AllocaUsers;
3484 auto pushUsers = [&](const Instruction &I) {
3485 for (const User *U : I.users()) {
3486 if (Visited.insert(U).second)
3487 AllocaUsers.push_back(U);
3488 }
3489 };
3490 pushUsers(*AI);
3491 while (!AllocaUsers.empty()) {
3492 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
3493 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
3494 isa<AddrSpaceCastInst>(UserI)) {
3495 pushUsers(*UserI);
3496 continue;
3497 }
3498 if (UserI == CB)
3499 continue;
3500 // TODO: support lifetime.start/end here
3501 return false;
3502 }
3503 return true;
3504}
3505
3506/// Try to move the specified instruction from its current block into the
3507/// beginning of DestBlock, which can only happen if it's safe to move the
3508/// instruction past all of the instructions between it and the end of its
3509/// block.
3511 BasicBlock *DestBlock) {
3512 BasicBlock *SrcBlock = I->getParent();
3513
3514 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3515 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
3516 I->isTerminator())
3517 return false;
3518
3519 // Do not sink static or dynamic alloca instructions. Static allocas must
3520 // remain in the entry block, and dynamic allocas must not be sunk in between
3521 // a stacksave / stackrestore pair, which would incorrectly shorten its
3522 // lifetime.
3523 if (isa<AllocaInst>(I))
3524 return false;
3525
3526 // Do not sink into catchswitch blocks.
3527 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3528 return false;
3529
3530 // Do not sink convergent call instructions.
3531 if (auto *CI = dyn_cast<CallInst>(I)) {
3532 if (CI->isConvergent())
3533 return false;
3534 }
3535
3536 // Unless we can prove that the memory write isn't visibile except on the
3537 // path we're sinking to, we must bail.
3538 if (I->mayWriteToMemory()) {
3539 if (!SoleWriteToDeadLocal(I, TLI))
3540 return false;
3541 }
3542
3543 // We can only sink load instructions if there is nothing between the load and
3544 // the end of block that could change the value.
3545 if (I->mayReadFromMemory()) {
3546 // We don't want to do any sophisticated alias analysis, so we only check
3547 // the instructions after I in I's parent block if we try to sink to its
3548 // successor block.
3549 if (DestBlock->getUniquePredecessor() != I->getParent())
3550 return false;
3551 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
3552 E = I->getParent()->end();
3553 Scan != E; ++Scan)
3554 if (Scan->mayWriteToMemory())
3555 return false;
3556 }
3557
3558 I->dropDroppableUses([&](const Use *U) {
3559 auto *I = dyn_cast<Instruction>(U->getUser());
3560 if (I && I->getParent() != DestBlock) {
3561 Worklist.add(I);
3562 return true;
3563 }
3564 return false;
3565 });
3566 /// FIXME: We could remove droppable uses that are not dominated by
3567 /// the new position.
3568
3569 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3570 I->moveBefore(&*InsertPos);
3571 ++NumSunkInst;
3572
3573 // Also sink all related debug uses from the source basic block. Otherwise we
3574 // get debug use before the def. Attempt to salvage debug uses first, to
3575 // maximise the range variables have location for. If we cannot salvage, then
3576 // mark the location undef: we know it was supposed to receive a new location
3577 // here, but that computation has been sunk.
3579 findDbgUsers(DbgUsers, I);
3580 // Process the sinking DbgUsers in reverse order, as we only want to clone the
3581 // last appearing debug intrinsic for each given variable.
3583 for (DbgVariableIntrinsic *DVI : DbgUsers)
3584 if (DVI->getParent() == SrcBlock)
3585 DbgUsersToSink.push_back(DVI);
3586 llvm::sort(DbgUsersToSink,
3587 [](auto *A, auto *B) { return B->comesBefore(A); });
3588
3590 SmallSet<DebugVariable, 4> SunkVariables;
3591 for (auto *User : DbgUsersToSink) {
3592 // A dbg.declare instruction should not be cloned, since there can only be
3593 // one per variable fragment. It should be left in the original place
3594 // because the sunk instruction is not an alloca (otherwise we could not be
3595 // here).
3596 if (isa<DbgDeclareInst>(User))
3597 continue;
3598
3599 DebugVariable DbgUserVariable =
3600 DebugVariable(User->getVariable(), User->getExpression(),
3601 User->getDebugLoc()->getInlinedAt());
3602
3603 if (!SunkVariables.insert(DbgUserVariable).second)
3604 continue;
3605
3606 // Leave dbg.assign intrinsics in their original positions and there should
3607 // be no need to insert a clone.
3608 if (isa<DbgAssignIntrinsic>(User))
3609 continue;
3610
3611 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
3612 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
3613 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
3614 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
3615 }
3616
3617 // Perform salvaging without the clones, then sink the clones.
3618 if (!DIIClones.empty()) {
3619 salvageDebugInfoForDbgValues(*I, DbgUsers);
3620 // The clones are in reverse order of original appearance, reverse again to
3621 // maintain the original order.
3622 for (auto &DIIClone : llvm::reverse(DIIClones)) {
3623 DIIClone->insertBefore(&*InsertPos);
3624 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
3625 }
3626 }
3627
3628 return true;
3629}
3630
3632 while (!Worklist.isEmpty()) {
3633 // Walk deferred instructions in reverse order, and push them to the
3634 // worklist, which means they'll end up popped from the worklist in-order.
3635 while (Instruction *I = Worklist.popDeferred()) {
3636 // Check to see if we can DCE the instruction. We do this already here to
3637 // reduce the number of uses and thus allow other folds to trigger.
3638 // Note that eraseInstFromFunction() may push additional instructions on
3639 // the deferred worklist, so this will DCE whole instruction chains.
3642 ++NumDeadInst;
3643 continue;
3644 }
3645
3646 Worklist.push(I);
3647 }
3648
3650 if (I == nullptr) continue; // skip null values.
3651
3652 // Check to see if we can DCE the instruction.
3655 ++NumDeadInst;
3656 continue;
3657 }
3658
3659 if (!DebugCounter::shouldExecute(VisitCounter))
3660 continue;
3661
3662 // See if we can trivially sink this instruction to its user if we can
3663 // prove that the successor is not executed more frequently than our block.
3664 // Return the UserBlock if successful.
3665 auto getOptionalSinkBlockForInst =
3666 [this](Instruction *I) -> std::optional<BasicBlock *> {
3667 if (!EnableCodeSinking)
3668 return std::nullopt;
3669
3670 BasicBlock *BB = I->getParent();
3671 BasicBlock *UserParent = nullptr;
3672 unsigned NumUsers = 0;
3673
3674 for (auto *U : I->users()) {
3675 if (U->isDroppable())
3676 continue;
3677 if (NumUsers > MaxSinkNumUsers)
3678 return std::nullopt;
3679
3680 Instruction *UserInst = cast<Instruction>(U);
3681 // Special handling for Phi nodes - get the block the use occurs in.
3682 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
3683 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
3684 if (PN->getIncomingValue(i) == I) {
3685 // Bail out if we have uses in different blocks. We don't do any
3686 // sophisticated analysis (i.e finding NearestCommonDominator of
3687 // these use blocks).
3688 if (UserParent && UserParent != PN->getIncomingBlock(i))
3689 return std::nullopt;
3690 UserParent = PN->getIncomingBlock(i);
3691 }
3692 }
3693 assert(UserParent && "expected to find user block!");
3694 } else {
3695 if (UserParent && UserParent != UserInst->getParent())
3696 return std::nullopt;
3697 UserParent = UserInst->getParent();
3698 }
3699
3700 // Make sure these checks are done only once, naturally we do the checks
3701 // the first time we get the userparent, this will save compile time.
3702 if (NumUsers == 0) {
3703 // Try sinking to another block. If that block is unreachable, then do
3704 // not bother. SimplifyCFG should handle it.
3705 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
3706 return std::nullopt;
3707
3708 auto *Term = UserParent->getTerminator();
3709 // See if the user is one of our successors that has only one
3710 // predecessor, so that we don't have to split the critical edge.
3711 // Another option where we can sink is a block that ends with a
3712 // terminator that does not pass control to other block (such as
3713 // return or unreachable or resume). In this case:
3714 // - I dominates the User (by SSA form);
3715 // - the User will be executed at most once.
3716 // So sinking I down to User is always profitable or neutral.
3717 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
3718 return std::nullopt;
3719
3720 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
3721 }
3722
3723 NumUsers++;
3724 }
3725
3726 // No user or only has droppable users.
3727 if (!UserParent)
3728 return std::nullopt;
3729
3730 return UserParent;
3731 };
3732
3733 auto OptBB = getOptionalSinkBlockForInst(I);
3734 if (OptBB) {
3735 auto *UserParent = *OptBB;
3736 // Okay, the CFG is simple enough, try to sink this instruction.
3737 if (tryToSinkInstruction(I, UserParent)) {
3738 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
3739 MadeIRChange = true;
3740 // We'll add uses of the sunk instruction below, but since
3741 // sinking can expose opportunities for it's *operands* add
3742 // them to the worklist
3743 for (Use &U : I->operands())
3744 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
3745 Worklist.push(OpI);
3746 }
3747 }
3748
3749 // Now that we have an instruction, try combining it to simplify it.
3752 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
3753
3754#ifndef NDEBUG
3755 std::string OrigI;
3756#endif
3757 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
3758 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
3759
3760 if (Instruction *Result = visit(*I)) {
3761 ++NumCombined;
3762 // Should we replace the old instruction with a new one?
3763 if (Result != I) {
3764 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
3765 << " New = " << *Result << '\n');
3766
3767 Result->copyMetadata(*I,
3768 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
3769 // Everything uses the new instruction now.
3770 I->replaceAllUsesWith(Result);
3771
3772 // Move the name to the new instruction first.
3773 Result->takeName(I);
3774
3775 // Insert the new instruction into the basic block...
3776 BasicBlock *InstParent = I->getParent();
3777 BasicBlock::iterator InsertPos = I->getIterator();
3778
3779 // Are we replace a PHI with something that isn't a PHI, or vice versa?
3780 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
3781 // We need to fix up the insertion point.
3782 if (isa<PHINode>(I)) // PHI -> Non-PHI
3783 InsertPos = InstParent->getFirstInsertionPt();
3784 else // Non-PHI -> PHI
3785 InsertPos = InstParent->getFirstNonPHI()->getIterator();
3786 }
3787
3788 Result->insertInto(InstParent, InsertPos);
3789
3790 // Push the new instruction and any users onto the worklist.
3792 Worklist.push(Result);
3793
3795 } else {
3796 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
3797 << " New = " << *I << '\n');
3798
3799 // If the instruction was modified, it's possible that it is now dead.
3800 // if so, remove it.
3803 } else {
3805 Worklist.push(I);
3806 }
3807 }
3808 MadeIRChange = true;
3809 }
3810 }
3811
3812 Worklist.zap();
3813 return MadeIRChange;
3814}
3815
3816// Track the scopes used by !alias.scope and !noalias. In a function, a
3817// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
3818// by both sets. If not, the declaration of the scope can be safely omitted.
3819// The MDNode of the scope can be omitted as well for the instructions that are
3820// part of this function. We do not do that at this point, as this might become
3821// too time consuming to do.
3823 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
3824 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
3825
3826public:
3828 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
3829 if (!I->hasMetadataOtherThanDebugLoc())
3830 return;
3831
3832 auto Track = [](Metadata *ScopeList, auto &Container) {
3833 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
3834 if (!MDScopeList || !Container.insert(MDScopeList).second)
3835 return;
3836 for (const auto &MDOperand : MDScopeList->operands())
3837 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
3838 Container.insert(MDScope);
3839 };
3840
3841 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
3842 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
3843 }
3844
3846 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
3847 if (!Decl)
3848 return false;
3849
3850 assert(Decl->use_empty() &&
3851 "llvm.experimental.noalias.scope.decl in use ?");
3852 const MDNode *MDSL = Decl->getScopeList();
3853 assert(MDSL->getNumOperands() == 1 &&
3854 "llvm.experimental.noalias.scope should refer to a single scope");
3855 auto &MDOperand = MDSL->getOperand(0);
3856 if (auto *MD = dyn_cast<MDNode>(MDOperand))
3857 return !UsedAliasScopesAndLists.contains(MD) ||
3858 !UsedNoAliasScopesAndLists.contains(MD);
3859
3860 // Not an MDNode ? throw away.
3861 return true;
3862 }
3863};
3864
3865/// Populate the IC worklist from a function, by walking it in depth-first
3866/// order and adding all reachable code to the worklist.
3867///
3868/// This has a couple of tricks to make the code faster and more powerful. In
3869/// particular, we constant fold and DCE instructions as we go, to avoid adding
3870/// them to the worklist (this significantly speeds up instcombine on code where
3871/// many instructions are dead or constant). Additionally, if we find a branch
3872/// whose condition is a known constant, we only visit the reachable successors.
3874 const TargetLibraryInfo *TLI,
3875 InstructionWorklist &ICWorklist) {
3876 bool MadeIRChange = false;
3879 Worklist.push_back(&F.front());
3880
3881 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
3882 DenseMap<Constant *, Constant *> FoldedConstants;
3883 AliasScopeTracker SeenAliasScopes;
3884
3885 do {
3886 BasicBlock *BB = Worklist.pop_back_val();
3887
3888 // We have now visited this block! If we've already been here, ignore it.
3889 if (!Visited.insert(BB).second)
3890 continue;
3891
3892 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
3893 // ConstantProp instruction if trivially constant.
3894 if (!Inst.use_empty() &&
3895 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
3896 if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
3897 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
3898 << '\n');
3899 Inst.replaceAllUsesWith(C);
3900 ++NumConstProp;
3901 if (isInstructionTriviallyDead(&Inst, TLI))
3902 Inst.eraseFromParent();
3903 MadeIRChange = true;
3904 continue;
3905 }
3906
3907 // See if we can constant fold its operands.
3908 for (Use &U : Inst.operands()) {
3909 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
3910 continue;
3911
3912 auto *C = cast<Constant>(U);
3913 Constant *&FoldRes = FoldedConstants[C];
3914 if (!FoldRes)
3915 FoldRes = ConstantFoldConstant(C, DL, TLI);
3916
3917 if (FoldRes != C) {
3918 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
3919 << "\n Old = " << *C
3920 << "\n New = " << *FoldRes << '\n');
3921 U = FoldRes;
3922 MadeIRChange = true;
3923 }
3924 }
3925
3926 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
3927 // these call instructions consumes non-trivial amount of time and
3928 // provides no value for the optimization.
3929 if (!Inst.isDebugOrPseudoInst()) {
3930 InstrsForInstructionWorklist.push_back(&Inst);
3931 SeenAliasScopes.analyse(&Inst);
3932 }
3933 }
3934
3935 // Recursively visit successors. If this is a branch or switch on a
3936 // constant, only visit the reachable successor.
3937 Instruction *TI = BB->getTerminator();
3938 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
3939 if (isa<UndefValue>(BI->getCondition()))
3940 // Branch on undef is UB.
3941 continue;
3942 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
3943 bool CondVal = Cond->getZExtValue();
3944 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
3945 Worklist.push_back(ReachableBB);
3946 continue;
3947 }
3948 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3949 if (isa<UndefValue>(SI->getCondition()))
3950 // Switch on undef is UB.
3951 continue;
3952 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
3953 Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
3954 continue;
3955 }
3956 }
3957
3958 append_range(Worklist, successors(TI));
3959 } while (!Worklist.empty());
3960
3961 // Remove instructions inside unreachable blocks. This prevents the
3962 // instcombine code from having to deal with some bad special cases, and
3963 // reduces use counts of instructions.
3964 for (BasicBlock &BB : F) {
3965 if (Visited.count(&BB))
3966 continue;
3967
3968 unsigned NumDeadInstInBB;
3969 unsigned NumDeadDbgInstInBB;
3970 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
3972
3973 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
3974 NumDeadInst += NumDeadInstInBB;
3975 }
3976
3977 // Once we've found all of the instructions to add to instcombine's worklist,
3978 // add them in reverse order. This way instcombine will visit from the top
3979 // of the function down. This jives well with the way that it adds all uses
3980 // of instructions to the worklist after doing a transformation, thus avoiding
3981 // some N^2 behavior in pathological cases.
3982 ICWorklist.reserve(InstrsForInstructionWorklist.size());
3983 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
3984 // DCE instruction if trivially dead. As we iterate in reverse program
3985 // order here, we will clean up whole chains of dead instructions.
3986 if (isInstructionTriviallyDead(Inst, TLI) ||
3987 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
3988 ++NumDeadInst;
3989 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
3990 salvageDebugInfo(*Inst);
3991 Inst->eraseFromParent();
3992 MadeIRChange = true;
3993 continue;
3994 }
3995
3996 ICWorklist.push(Inst);
3997 }
3998
3999 return MadeIRChange;
4000}
4001
4006 ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4007 auto &DL = F.getParent()->getDataLayout();
4008
4009 /// Builder - This is an IRBuilder that automatically inserts new
4010 /// instructions into the worklist when they are created.
4012 F.getContext(), TargetFolder(DL),
4013 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4014 Worklist.add(I);
4015 if (auto *Assume = dyn_cast<AssumeInst>(I))
4016 AC.registerAssumption(Assume);
4017 }));
4018
4019 // Lower dbg.declare intrinsics otherwise their value may be clobbered
4020 // by instcombiner.
4021 bool MadeIRChange = false;
4023 MadeIRChange = LowerDbgDeclare(F);
4024
4025 // Iterate while there is work to do.
4026 unsigned Iteration = 0;
4027 while (true) {
4028 ++NumWorklistIterations;
4029 ++Iteration;
4030
4031 if (Iteration > InfiniteLoopDetectionThreshold) {
4033 "Instruction Combining seems stuck in an infinite loop after " +
4034 Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4035 }
4036
4037 if (Iteration > MaxIterations) {
4038 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4039 << " on " << F.getName()
4040 << " reached; stopping before reaching a fixpoint\n");
4041 break;
4042 }
4043
4044 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4045 << F.getName() << "\n");
4046
4047 MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4048
4049 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4050 ORE, BFI, PSI, DL, LI);
4052
4053 if (!IC.run())
4054 break;
4055
4056 MadeIRChange = true;
4057 }
4058
4059 if (Iteration == 1)
4060 ++NumOneIteration;
4061 else if (Iteration == 2)
4062 ++NumTwoIterations;
4063 else if (Iteration == 3)
4064 ++NumThreeIterations;
4065 else
4066 ++NumFourOrMoreIterations;
4067
4068 return MadeIRChange;
4069}
4070
4072
4074 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
4075 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
4076 OS, MapClassName2PassName);
4077 OS << '<';
4078 OS << "max-iterations=" << Options.MaxIterations << ";";
4079 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info";
4080 OS << '>';
4081}
4082
4085 auto &AC = AM.getResult<AssumptionAnalysis>(F);
4086 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4087 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4089 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4090
4091 // TODO: Only use LoopInfo when the option is set. This requires that the
4092 // callers in the pass pipeline explicitly set the option.
4093 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4094 if (!LI && Options.UseLoopInfo)
4095 LI = &AM.getResult<LoopAnalysis>(F);
4096
4097 auto *AA = &AM.getResult<AAManager>(F);
4098 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4099 ProfileSummaryInfo *PSI =
4100 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4101 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4102 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4103
4104 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4105 BFI, PSI, Options.MaxIterations, LI))
4106 // No changes, all analyses are preserved.
4107 return PreservedAnalyses::all();
4108
4109 // Mark all the analyses that instcombine updates as preserved.
4112 return PA;
4113}
4114
4116 AU.setPreservesCFG();
4129}
4130
4132 if (skipFunction(F))
4133 return false;
4134
4135 // Required analyses.
4136 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4137 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4138 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4139 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4140 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4141 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4142
4143 // Optional analyses.
4144 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4145 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4146 ProfileSummaryInfo *PSI =
4147 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4148 BlockFrequencyInfo *BFI =
4149 (PSI && PSI->hasProfileSummary()) ?
4150 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4151 nullptr;
4152
4153 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4154 BFI, PSI, MaxIterations, LI);
4155}
4156
4158
4162}
4163
4165 : FunctionPass(ID), MaxIterations(MaxIterations) {
4167}
4168
4170 "Combine redundant instructions", false, false)
4182
4183// Initialization Routines
4186}
4187
4189 return new InstructionCombiningPass();
4190}
4191
4193 return new InstructionCombiningPass(MaxIterations);
4194}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
SmallVector< MachineOperand, 4 > Cond
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool handlePotentiallyDeadBlock(BasicBlock *BB, InstCombiner &IC)
If a block is dead due to a known branch condition, remove instructions in it.
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc, InstCombiner &IC)
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
print must be executed print the must be executed context for all instructions
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
@ Flags
Definition: TextStubV5.cpp:93
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:77
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:415
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:897
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1925
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1132
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1938
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
Class to represent array types.
Definition: DerivedTypes.h:368
uint64_t getNumElements() const
Definition: DerivedTypes.h:380
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:708
Type * getElementType() const
Definition: DerivedTypes.h:381
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:353
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:187
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:326
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:292
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:300
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
reverse_iterator rend()
Definition: BasicBlock.h:333
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:263
size_t size() const
Definition: BasicBlock.h:336
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:301
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1494
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1927
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1490
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_EQ
equal
Definition: InstrTypes.h:732
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
ConstantArray - Constant Array Declarations.
Definition: Constants.h:408
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1235
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:751
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1957
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2600
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2626
static Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
Definition: Constants.cpp:2247
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2672
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2075
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2581
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:492
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1342
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:753
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:418
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:772
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:908
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:416
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:923
This is the common base class for debug info intrinsics for variables.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:170
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
const BasicBlock & getEntryBlock() const
Definition: Function.h:745
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:844
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:395
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:454
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:993
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1932
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2417
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1132