LLVM 17.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/Statistic.h"
46#include "llvm/Analysis/CFG.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constant.h"
64#include "llvm/IR/Constants.h"
65#include "llvm/IR/DIBuilder.h"
66#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/Dominators.h"
71#include "llvm/IR/Function.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
78#include "llvm/IR/Intrinsics.h"
79#include "llvm/IR/Metadata.h"
80#include "llvm/IR/Operator.h"
81#include "llvm/IR/PassManager.h"
83#include "llvm/IR/Type.h"
84#include "llvm/IR/Use.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/IR/ValueHandle.h"
92#include "llvm/Support/Debug.h"
100#include <algorithm>
101#include <cassert>
102#include <cstdint>
103#include <memory>
104#include <optional>
105#include <string>
106#include <utility>
107
108#define DEBUG_TYPE "instcombine"
110#include <optional>
111
112using namespace llvm;
113using namespace llvm::PatternMatch;
114
115STATISTIC(NumWorklistIterations,
116 "Number of instruction combining iterations performed");
117
118STATISTIC(NumCombined , "Number of insts combined");
119STATISTIC(NumConstProp, "Number of constant folds");
120STATISTIC(NumDeadInst , "Number of dead inst eliminated");
121STATISTIC(NumSunkInst , "Number of instructions sunk");
122STATISTIC(NumExpand, "Number of expansions");
123STATISTIC(NumFactor , "Number of factorizations");
124STATISTIC(NumReassoc , "Number of reassociations");
125DEBUG_COUNTER(VisitCounter, "instcombine-visit",
126 "Controls which instructions are visited");
127
128// FIXME: these limits eventually should be as low as 2.
129#ifndef NDEBUG
130static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
131#else
132static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
133#endif
134
135static cl::opt<bool>
136EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
137 cl::init(true));
138
140 "instcombine-max-sink-users", cl::init(32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
144 "instcombine-infinite-loop-threshold",
145 cl::desc("Number of instruction combining iterations considered an "
146 "infinite loop"),
148
150MaxArraySize("instcombine-maxarray-size", cl::init(1024),
151 cl::desc("Maximum array size considered when doing a combine"));
152
153// FIXME: Remove this flag when it is no longer necessary to convert
154// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
155// increases variable availability at the cost of accuracy. Variables that
156// cannot be promoted by mem2reg or SROA will be described as living in memory
157// for their entire lifetime. However, passes like DSE and instcombine can
158// delete stores to the alloca, leading to misleading and inaccurate debug
159// information. This flag can be removed when those passes are fixed.
160static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
161 cl::Hidden, cl::init(true));
162
163std::optional<Instruction *>
165 // Handle target specific intrinsics
167 return TTI.instCombineIntrinsic(*this, II);
168 }
169 return std::nullopt;
170}
171
173 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
174 bool &KnownBitsComputed) {
175 // Handle target specific intrinsics
177 return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
178 KnownBitsComputed);
179 }
180 return std::nullopt;
181}
182
184 IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
185 APInt &UndefElts3,
186 std::function<void(Instruction *, unsigned, APInt, APInt &)>
187 SimplifyAndSetOp) {
188 // Handle target specific intrinsics
191 *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
192 SimplifyAndSetOp);
193 }
194 return std::nullopt;
195}
196
197bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
198 return TTI.isValidAddrSpaceCast(FromAS, ToAS);
199}
200
201Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
203}
204
205/// Legal integers and common types are considered desirable. This is used to
206/// avoid creating instructions with types that may not be supported well by the
207/// the backend.
208/// NOTE: This treats i8, i16 and i32 specially because they are common
209/// types in frontend languages.
210bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
211 switch (BitWidth) {
212 case 8:
213 case 16:
214 case 32:
215 return true;
216 default:
217 return DL.isLegalInteger(BitWidth);
218 }
219}
220
221/// Return true if it is desirable to convert an integer computation from a
222/// given bit width to a new bit width.
223/// We don't want to convert from a legal or desirable type (like i8) to an
224/// illegal type or from a smaller to a larger illegal type. A width of '1'
225/// is always treated as a desirable type because i1 is a fundamental type in
226/// IR, and there are many specialized optimizations for i1 types.
227/// Common/desirable widths are equally treated as legal to convert to, in
228/// order to open up more combining opportunities.
229bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
230 unsigned ToWidth) const {
231 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
232 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
233
234 // Convert to desirable widths even if they are not legal types.
235 // Only shrink types, to prevent infinite loops.
236 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
237 return true;
238
239 // If this is a legal or desiable integer from type, and the result would be
240 // an illegal type, don't do the transformation.
241 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
242 return false;
243
244 // Otherwise, if both are illegal, do not increase the size of the result. We
245 // do allow things like i160 -> i64, but not i64 -> i160.
246 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
247 return false;
248
249 return true;
250}
251
252/// Return true if it is desirable to convert a computation from 'From' to 'To'.
253/// We don't want to convert from a legal to an illegal type or from a smaller
254/// to a larger illegal type. i1 is always treated as a legal type because it is
255/// a fundamental type in IR, and there are many specialized optimizations for
256/// i1 types.
257bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
258 // TODO: This could be extended to allow vectors. Datalayout changes might be
259 // needed to properly support that.
260 if (!From->isIntegerTy() || !To->isIntegerTy())
261 return false;
262
263 unsigned FromWidth = From->getPrimitiveSizeInBits();
264 unsigned ToWidth = To->getPrimitiveSizeInBits();
265 return shouldChangeType(FromWidth, ToWidth);
266}
267
268// Return true, if No Signed Wrap should be maintained for I.
269// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
270// where both B and C should be ConstantInts, results in a constant that does
271// not overflow. This function only handles the Add and Sub opcodes. For
272// all other opcodes, the function conservatively returns false.
274 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
275 if (!OBO || !OBO->hasNoSignedWrap())
276 return false;
277
278 // We reason about Add and Sub Only.
279 Instruction::BinaryOps Opcode = I.getOpcode();
280 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
281 return false;
282
283 const APInt *BVal, *CVal;
284 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
285 return false;
286
287 bool Overflow = false;
288 if (Opcode == Instruction::Add)
289 (void)BVal->sadd_ov(*CVal, Overflow);
290 else
291 (void)BVal->ssub_ov(*CVal, Overflow);
292
293 return !Overflow;
294}
295
297 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
298 return OBO && OBO->hasNoUnsignedWrap();
299}
300
302 auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
303 return OBO && OBO->hasNoSignedWrap();
304}
305
306/// Conservatively clears subclassOptionalData after a reassociation or
307/// commutation. We preserve fast-math flags when applicable as they can be
308/// preserved.
310 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
311 if (!FPMO) {
312 I.clearSubclassOptionalData();
313 return;
314 }
315
316 FastMathFlags FMF = I.getFastMathFlags();
317 I.clearSubclassOptionalData();
318 I.setFastMathFlags(FMF);
319}
320
321/// Combine constant operands of associative operations either before or after a
322/// cast to eliminate one of the associative operations:
323/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
324/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
326 InstCombinerImpl &IC) {
327 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
328 if (!Cast || !Cast->hasOneUse())
329 return false;
330
331 // TODO: Enhance logic for other casts and remove this check.
332 auto CastOpcode = Cast->getOpcode();
333 if (CastOpcode != Instruction::ZExt)
334 return false;
335
336 // TODO: Enhance logic for other BinOps and remove this check.
337 if (!BinOp1->isBitwiseLogicOp())
338 return false;
339
340 auto AssocOpcode = BinOp1->getOpcode();
341 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
342 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
343 return false;
344
345 Constant *C1, *C2;
346 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
347 !match(BinOp2->getOperand(1), m_Constant(C2)))
348 return false;
349
350 // TODO: This assumes a zext cast.
351 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
352 // to the destination type might lose bits.
353
354 // Fold the constants together in the destination type:
355 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
356 Type *DestTy = C1->getType();
357 Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
358 Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
359 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
360 IC.replaceOperand(*BinOp1, 1, FoldedC);
361 return true;
362}
363
364// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
365// inttoptr ( ptrtoint (x) ) --> x
366Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
367 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
368 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
369 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
370 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
371 Type *CastTy = IntToPtr->getDestTy();
372 if (PtrToInt &&
373 CastTy->getPointerAddressSpace() ==
374 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
375 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
376 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
377 return PtrToInt->getOperand(0);
378 }
379 return nullptr;
380}
381
382/// This performs a few simplifications for operators that are associative or
383/// commutative:
384///
385/// Commutative operators:
386///
387/// 1. Order operands such that they are listed from right (least complex) to
388/// left (most complex). This puts constants before unary operators before
389/// binary operators.
390///
391/// Associative operators:
392///
393/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
394/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
395///
396/// Associative and commutative operators:
397///
398/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
399/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
400/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
401/// if C1 and C2 are constants.
403 Instruction::BinaryOps Opcode = I.getOpcode();
404 bool Changed = false;
405
406 do {
407 // Order operands such that they are listed from right (least complex) to
408 // left (most complex). This puts constants before unary operators before
409 // binary operators.
410 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
411 getComplexity(I.getOperand(1)))
412 Changed = !I.swapOperands();
413
414 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
415 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
416
417 if (I.isAssociative()) {
418 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
419 if (Op0 && Op0->getOpcode() == Opcode) {
420 Value *A = Op0->getOperand(0);
421 Value *B = Op0->getOperand(1);
422 Value *C = I.getOperand(1);
423
424 // Does "B op C" simplify?
425 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
426 // It simplifies to V. Form "A op V".
427 replaceOperand(I, 0, A);
428 replaceOperand(I, 1, V);
429 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
430 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
431
432 // Conservatively clear all optional flags since they may not be
433 // preserved by the reassociation. Reset nsw/nuw based on the above
434 // analysis.
436
437 // Note: this is only valid because SimplifyBinOp doesn't look at
438 // the operands to Op0.
439 if (IsNUW)
440 I.setHasNoUnsignedWrap(true);
441
442 if (IsNSW)
443 I.setHasNoSignedWrap(true);
444
445 Changed = true;
446 ++NumReassoc;
447 continue;
448 }
449 }
450
451 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
452 if (Op1 && Op1->getOpcode() == Opcode) {
453 Value *A = I.getOperand(0);
454 Value *B = Op1->getOperand(0);
455 Value *C = Op1->getOperand(1);
456
457 // Does "A op B" simplify?
458 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
459 // It simplifies to V. Form "V op C".
460 replaceOperand(I, 0, V);
461 replaceOperand(I, 1, C);
462 // Conservatively clear the optional flags, since they may not be
463 // preserved by the reassociation.
465 Changed = true;
466 ++NumReassoc;
467 continue;
468 }
469 }
470 }
471
472 if (I.isAssociative() && I.isCommutative()) {
473 if (simplifyAssocCastAssoc(&I, *this)) {
474 Changed = true;
475 ++NumReassoc;
476 continue;
477 }
478
479 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
480 if (Op0 && Op0->getOpcode() == Opcode) {
481 Value *A = Op0->getOperand(0);
482 Value *B = Op0->getOperand(1);
483 Value *C = I.getOperand(1);
484
485 // Does "C op A" simplify?
486 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
487 // It simplifies to V. Form "V op B".
488 replaceOperand(I, 0, V);
489 replaceOperand(I, 1, B);
490 // Conservatively clear the optional flags, since they may not be
491 // preserved by the reassociation.
493 Changed = true;
494 ++NumReassoc;
495 continue;
496 }
497 }
498
499 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
500 if (Op1 && Op1->getOpcode() == Opcode) {
501 Value *A = I.getOperand(0);
502 Value *B = Op1->getOperand(0);
503 Value *C = Op1->getOperand(1);
504
505 // Does "C op A" simplify?
506 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
507 // It simplifies to V. Form "B op V".
508 replaceOperand(I, 0, B);
509 replaceOperand(I, 1, V);
510 // Conservatively clear the optional flags, since they may not be
511 // preserved by the reassociation.
513 Changed = true;
514 ++NumReassoc;
515 continue;
516 }
517 }
518
519 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
520 // if C1 and C2 are constants.
521 Value *A, *B;
522 Constant *C1, *C2, *CRes;
523 if (Op0 && Op1 &&
524 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
525 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
526 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
527 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
528 bool IsNUW = hasNoUnsignedWrap(I) &&
529 hasNoUnsignedWrap(*Op0) &&
530 hasNoUnsignedWrap(*Op1);
531 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
532 BinaryOperator::CreateNUW(Opcode, A, B) :
533 BinaryOperator::Create(Opcode, A, B);
534
535 if (isa<FPMathOperator>(NewBO)) {
536 FastMathFlags Flags = I.getFastMathFlags();
537 Flags &= Op0->getFastMathFlags();
538 Flags &= Op1->getFastMathFlags();
539 NewBO->setFastMathFlags(Flags);
540 }
541 InsertNewInstWith(NewBO, I);
542 NewBO->takeName(Op1);
543 replaceOperand(I, 0, NewBO);
544 replaceOperand(I, 1, CRes);
545 // Conservatively clear the optional flags, since they may not be
546 // preserved by the reassociation.
548 if (IsNUW)
549 I.setHasNoUnsignedWrap(true);
550
551 Changed = true;
552 continue;
553 }
554 }
555
556 // No further simplifications.
557 return Changed;
558 } while (true);
559}
560
561/// Return whether "X LOp (Y ROp Z)" is always equal to
562/// "(X LOp Y) ROp (X LOp Z)".
565 // X & (Y | Z) <--> (X & Y) | (X & Z)
566 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
567 if (LOp == Instruction::And)
568 return ROp == Instruction::Or || ROp == Instruction::Xor;
569
570 // X | (Y & Z) <--> (X | Y) & (X | Z)
571 if (LOp == Instruction::Or)
572 return ROp == Instruction::And;
573
574 // X * (Y + Z) <--> (X * Y) + (X * Z)
575 // X * (Y - Z) <--> (X * Y) - (X * Z)
576 if (LOp == Instruction::Mul)
577 return ROp == Instruction::Add || ROp == Instruction::Sub;
578
579 return false;
580}
581
582/// Return whether "(X LOp Y) ROp Z" is always equal to
583/// "(X ROp Z) LOp (Y ROp Z)".
587 return leftDistributesOverRight(ROp, LOp);
588
589 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
591
592 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
593 // but this requires knowing that the addition does not overflow and other
594 // such subtleties.
595}
596
597/// This function returns identity value for given opcode, which can be used to
598/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
600 if (isa<Constant>(V))
601 return nullptr;
602
603 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
604}
605
606/// This function predicates factorization using distributive laws. By default,
607/// it just returns the 'Op' inputs. But for special-cases like
608/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
609/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
610/// allow more factorization opportunities.
613 Value *&LHS, Value *&RHS) {
614 assert(Op && "Expected a binary operator");
615 LHS = Op->getOperand(0);
616 RHS = Op->getOperand(1);
617 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
618 Constant *C;
619 if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
620 // X << C --> X * (1 << C)
621 RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
622 return Instruction::Mul;
623 }
624 // TODO: We can add other conversions e.g. shr => div etc.
625 }
626 return Op->getOpcode();
627}
628
629/// This tries to simplify binary operations by factorizing out common terms
630/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
633 Instruction::BinaryOps InnerOpcode, Value *A,
634 Value *B, Value *C, Value *D) {
635 assert(A && B && C && D && "All values must be provided");
636
637 Value *V = nullptr;
638 Value *RetVal = nullptr;
639 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
640 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
641
642 // Does "X op' Y" always equal "Y op' X"?
643 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
644
645 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
646 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
647 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
648 // commutative case, "(A op' B) op (C op' A)"?
649 if (A == C || (InnerCommutative && A == D)) {
650 if (A != C)
651 std::swap(C, D);
652 // Consider forming "A op' (B op D)".
653 // If "B op D" simplifies then it can be formed with no cost.
654 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
655
656 // If "B op D" doesn't simplify then only go on if one of the existing
657 // operations "A op' B" and "C op' D" will be zapped as no longer used.
658 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
659 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
660 if (V)
661 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
662 }
663 }
664
665 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
666 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
667 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
668 // commutative case, "(A op' B) op (B op' D)"?
669 if (B == D || (InnerCommutative && B == C)) {
670 if (B != D)
671 std::swap(C, D);
672 // Consider forming "(A op C) op' B".
673 // If "A op C" simplifies then it can be formed with no cost.
674 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
675
676 // If "A op C" doesn't simplify then only go on if one of the existing
677 // operations "A op' B" and "C op' D" will be zapped as no longer used.
678 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
679 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
680 if (V)
681 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
682 }
683 }
684
685 if (!RetVal)
686 return nullptr;
687
688 ++NumFactor;
689 RetVal->takeName(&I);
690
691 // Try to add no-overflow flags to the final value.
692 if (isa<OverflowingBinaryOperator>(RetVal)) {
693 bool HasNSW = false;
694 bool HasNUW = false;
695 if (isa<OverflowingBinaryOperator>(&I)) {
696 HasNSW = I.hasNoSignedWrap();
697 HasNUW = I.hasNoUnsignedWrap();
698 }
699 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
700 HasNSW &= LOBO->hasNoSignedWrap();
701 HasNUW &= LOBO->hasNoUnsignedWrap();
702 }
703
704 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
705 HasNSW &= ROBO->hasNoSignedWrap();
706 HasNUW &= ROBO->hasNoUnsignedWrap();
707 }
708
709 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
710 // We can propagate 'nsw' if we know that
711 // %Y = mul nsw i16 %X, C
712 // %Z = add nsw i16 %Y, %X
713 // =>
714 // %Z = mul nsw i16 %X, C+1
715 //
716 // iff C+1 isn't INT_MIN
717 const APInt *CInt;
718 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
719 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
720
721 // nuw can be propagated with any constant or nuw value.
722 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
723 }
724 }
725 return RetVal;
726}
727
729 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
730 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
731 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
732 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
733 Value *A, *B, *C, *D;
734 Instruction::BinaryOps LHSOpcode, RHSOpcode;
735
736 if (Op0)
737 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
738 if (Op1)
739 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
740
741 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
742 // a common term.
743 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
744 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
745 return V;
746
747 // The instruction has the form "(A op' B) op (C)". Try to factorize common
748 // term.
749 if (Op0)
750 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
751 if (Value *V =
752 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
753 return V;
754
755 // The instruction has the form "(B) op (C op' D)". Try to factorize common
756 // term.
757 if (Op1)
758 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
759 if (Value *V =
760 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
761 return V;
762
763 return nullptr;
764}
765
766/// This tries to simplify binary operations which some other binary operation
767/// distributes over either by factorizing out common terms
768/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
769/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
770/// Returns the simplified value, or null if it didn't simplify.
772 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
773 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
774 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
775 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
776
777 // Factorization.
778 if (Value *R = tryFactorizationFolds(I))
779 return R;
780
781 // Expansion.
782 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
783 // The instruction has the form "(A op' B) op C". See if expanding it out
784 // to "(A op C) op' (B op C)" results in simplifications.
785 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
786 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
787
788 // Disable the use of undef because it's not safe to distribute undef.
789 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
790 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
791 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
792
793 // Do "A op C" and "B op C" both simplify?
794 if (L && R) {
795 // They do! Return "L op' R".
796 ++NumExpand;
797 C = Builder.CreateBinOp(InnerOpcode, L, R);
798 C->takeName(&I);
799 return C;
800 }
801
802 // Does "A op C" simplify to the identity value for the inner opcode?
803 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
804 // They do! Return "B op C".
805 ++NumExpand;
806 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
807 C->takeName(&I);
808 return C;
809 }
810
811 // Does "B op C" simplify to the identity value for the inner opcode?
812 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
813 // They do! Return "A op C".
814 ++NumExpand;
815 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
816 C->takeName(&I);
817 return C;
818 }
819 }
820
821 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
822 // The instruction has the form "A op (B op' C)". See if expanding it out
823 // to "(A op B) op' (A op C)" results in simplifications.
824 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
825 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
826
827 // Disable the use of undef because it's not safe to distribute undef.
828 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
829 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
830 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
831
832 // Do "A op B" and "A op C" both simplify?
833 if (L && R) {
834 // They do! Return "L op' R".
835 ++NumExpand;
836 A = Builder.CreateBinOp(InnerOpcode, L, R);
837 A->takeName(&I);
838 return A;
839 }
840
841 // Does "A op B" simplify to the identity value for the inner opcode?
842 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
843 // They do! Return "A op C".
844 ++NumExpand;
845 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
846 A->takeName(&I);
847 return A;
848 }
849
850 // Does "A op C" simplify to the identity value for the inner opcode?
851 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
852 // They do! Return "A op B".
853 ++NumExpand;
854 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
855 A->takeName(&I);
856 return A;
857 }
858 }
859
861}
862
864 Value *LHS,
865 Value *RHS) {
866 Value *A, *B, *C, *D, *E, *F;
867 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
868 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
869 if (!LHSIsSelect && !RHSIsSelect)
870 return nullptr;
871
872 FastMathFlags FMF;
874 if (isa<FPMathOperator>(&I)) {
875 FMF = I.getFastMathFlags();
877 }
878
879 Instruction::BinaryOps Opcode = I.getOpcode();
881
882 Value *Cond, *True = nullptr, *False = nullptr;
883
884 // Special-case for add/negate combination. Replace the zero in the negation
885 // with the trailing add operand:
886 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
887 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
888 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
889 // We need an 'add' and exactly 1 arm of the select to have been simplified.
890 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
891 return nullptr;
892
893 Value *N;
894 if (True && match(FVal, m_Neg(m_Value(N)))) {
895 Value *Sub = Builder.CreateSub(Z, N);
896 return Builder.CreateSelect(Cond, True, Sub, I.getName());
897 }
898 if (False && match(TVal, m_Neg(m_Value(N)))) {
899 Value *Sub = Builder.CreateSub(Z, N);
900 return Builder.CreateSelect(Cond, Sub, False, I.getName());
901 }
902 return nullptr;
903 };
904
905 if (LHSIsSelect && RHSIsSelect && A == D) {
906 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
907 Cond = A;
908 True = simplifyBinOp(Opcode, B, E, FMF, Q);
909 False = simplifyBinOp(Opcode, C, F, FMF, Q);
910
911 if (LHS->hasOneUse() && RHS->hasOneUse()) {
912 if (False && !True)
913 True = Builder.CreateBinOp(Opcode, B, E);
914 else if (True && !False)
915 False = Builder.CreateBinOp(Opcode, C, F);
916 }
917 } else if (LHSIsSelect && LHS->hasOneUse()) {
918 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
919 Cond = A;
920 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
921 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
922 if (Value *NewSel = foldAddNegate(B, C, RHS))
923 return NewSel;
924 } else if (RHSIsSelect && RHS->hasOneUse()) {
925 // X op (D ? E : F) -> D ? (X op E) : (X op F)
926 Cond = D;
927 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
928 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
929 if (Value *NewSel = foldAddNegate(E, F, LHS))
930 return NewSel;
931 }
932
933 if (!True || !False)
934 return nullptr;
935
936 Value *SI = Builder.CreateSelect(Cond, True, False);
937 SI->takeName(&I);
938 return SI;
939}
940
941/// Freely adapt every user of V as-if V was changed to !V.
942/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
943void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
944 for (User *U : make_early_inc_range(I->users())) {
945 if (U == IgnoredUser)
946 continue; // Don't consider this user.
947 switch (cast<Instruction>(U)->getOpcode()) {
948 case Instruction::Select: {
949 auto *SI = cast<SelectInst>(U);
950 SI->swapValues();
951 SI->swapProfMetadata();
952 break;
953 }
954 case Instruction::Br:
955 cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
956 break;
957 case Instruction::Xor:
958 replaceInstUsesWith(cast<Instruction>(*U), I);
959 break;
960 default:
961 llvm_unreachable("Got unexpected user - out of sync with "
962 "canFreelyInvertAllUsersOf() ?");
963 }
964 }
965}
966
967/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
968/// constant zero (which is the 'negate' form).
969Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
970 Value *NegV;
971 if (match(V, m_Neg(m_Value(NegV))))
972 return NegV;
973
974 // Constants can be considered to be negated values if they can be folded.
975 if (ConstantInt *C = dyn_cast<ConstantInt>(V))
976 return ConstantExpr::getNeg(C);
977
978 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
979 if (C->getType()->getElementType()->isIntegerTy())
980 return ConstantExpr::getNeg(C);
981
982 if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
983 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
984 Constant *Elt = CV->getAggregateElement(i);
985 if (!Elt)
986 return nullptr;
987
988 if (isa<UndefValue>(Elt))
989 continue;
990
991 if (!isa<ConstantInt>(Elt))
992 return nullptr;
993 }
994 return ConstantExpr::getNeg(CV);
995 }
996
997 // Negate integer vector splats.
998 if (auto *CV = dyn_cast<Constant>(V))
999 if (CV->getType()->isVectorTy() &&
1000 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1001 return ConstantExpr::getNeg(CV);
1002
1003 return nullptr;
1004}
1005
1006/// A binop with a constant operand and a sign-extended boolean operand may be
1007/// converted into a select of constants by applying the binary operation to
1008/// the constant with the two possible values of the extended boolean (0 or -1).
1009Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1010 // TODO: Handle non-commutative binop (constant is operand 0).
1011 // TODO: Handle zext.
1012 // TODO: Peek through 'not' of cast.
1013 Value *BO0 = BO.getOperand(0);
1014 Value *BO1 = BO.getOperand(1);
1015 Value *X;
1016 Constant *C;
1017 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1018 !X->getType()->isIntOrIntVectorTy(1))
1019 return nullptr;
1020
1021 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1024 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1025 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1026 return SelectInst::Create(X, TVal, FVal);
1027}
1028
1030 SelectInst *SI,
1031 bool IsTrueArm) {
1032 SmallVector<Constant *> ConstOps;
1033 for (Value *Op : I.operands()) {
1034 CmpInst::Predicate Pred;
1035 Constant *C = nullptr;
1036 if (Op == SI) {
1037 C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
1038 : SI->getFalseValue());
1039 } else if (match(SI->getCondition(),
1040 m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
1041 Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
1043 // Pass
1044 } else {
1045 C = dyn_cast<Constant>(Op);
1046 }
1047 if (C == nullptr)
1048 return nullptr;
1049
1050 ConstOps.push_back(C);
1051 }
1052
1053 return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1054}
1055
1057 Value *NewOp, InstCombiner &IC) {
1058 Instruction *Clone = I.clone();
1059 Clone->replaceUsesOfWith(SI, NewOp);
1060 IC.InsertNewInstBefore(Clone, *SI);
1061 return Clone;
1062}
1063
1065 bool FoldWithMultiUse) {
1066 // Don't modify shared select instructions unless set FoldWithMultiUse
1067 if (!SI->hasOneUse() && !FoldWithMultiUse)
1068 return nullptr;
1069
1070 Value *TV = SI->getTrueValue();
1071 Value *FV = SI->getFalseValue();
1072 if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1073 return nullptr;
1074
1075 // Bool selects with constant operands can be folded to logical ops.
1076 if (SI->getType()->isIntOrIntVectorTy(1))
1077 return nullptr;
1078
1079 // If it's a bitcast involving vectors, make sure it has the same number of
1080 // elements on both sides.
1081 if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1082 VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1083 VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1084
1085 // Verify that either both or neither are vectors.
1086 if ((SrcTy == nullptr) != (DestTy == nullptr))
1087 return nullptr;
1088
1089 // If vectors, verify that they have the same number of elements.
1090 if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1091 return nullptr;
1092 }
1093
1094 // Make sure that one of the select arms constant folds successfully.
1095 Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
1096 Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
1097 if (!NewTV && !NewFV)
1098 return nullptr;
1099
1100 // Create an instruction for the arm that did not fold.
1101 if (!NewTV)
1102 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1103 if (!NewFV)
1104 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1105 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1106}
1107
1109 unsigned NumPHIValues = PN->getNumIncomingValues();
1110 if (NumPHIValues == 0)
1111 return nullptr;
1112
1113 // We normally only transform phis with a single use. However, if a PHI has
1114 // multiple uses and they are all the same operation, we can fold *all* of the
1115 // uses into the PHI.
1116 if (!PN->hasOneUse()) {
1117 // Walk the use list for the instruction, comparing them to I.
1118 for (User *U : PN->users()) {
1119 Instruction *UI = cast<Instruction>(U);
1120 if (UI != &I && !I.isIdenticalTo(UI))
1121 return nullptr;
1122 }
1123 // Otherwise, we can replace *all* users with the new PHI we form.
1124 }
1125
1126 // Check to see whether the instruction can be folded into each phi operand.
1127 // If there is one operand that does not fold, remember the BB it is in.
1128 // If there is more than one or if *it* is a PHI, bail out.
1129 SmallVector<Value *> NewPhiValues;
1130 BasicBlock *NonSimplifiedBB = nullptr;
1131 Value *NonSimplifiedInVal = nullptr;
1132 for (unsigned i = 0; i != NumPHIValues; ++i) {
1133 Value *InVal = PN->getIncomingValue(i);
1134 BasicBlock *InBB = PN->getIncomingBlock(i);
1135
1136 // NB: It is a precondition of this transform that the operands be
1137 // phi translatable! This is usually trivially satisfied by limiting it
1138 // to constant ops, and for selects we do a more sophisticated check.
1140 for (Value *Op : I.operands()) {
1141 if (Op == PN)
1142 Ops.push_back(InVal);
1143 else
1144 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1145 }
1146
1147 // Don't consider the simplification successful if we get back a constant
1148 // expression. That's just an instruction in hiding.
1149 // Also reject the case where we simplify back to the phi node. We wouldn't
1150 // be able to remove it in that case.
1152 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1153 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
1154 NewPhiValues.push_back(NewVal);
1155 continue;
1156 }
1157
1158 if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1159 if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1160
1161 NonSimplifiedBB = InBB;
1162 NonSimplifiedInVal = InVal;
1163 NewPhiValues.push_back(nullptr);
1164
1165 // If the InVal is an invoke at the end of the pred block, then we can't
1166 // insert a computation after it without breaking the edge.
1167 if (isa<InvokeInst>(InVal))
1168 if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1169 return nullptr;
1170
1171 // If the incoming non-constant value is reachable from the phis block,
1172 // we'll push the operation across a loop backedge. This could result in
1173 // an infinite combine loop, and is generally non-profitable (especially
1174 // if the operation was originally outside the loop).
1175 if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1176 LI))
1177 return nullptr;
1178 }
1179
1180 // If there is exactly one non-simplified value, we can insert a copy of the
1181 // operation in that block. However, if this is a critical edge, we would be
1182 // inserting the computation on some other paths (e.g. inside a loop). Only
1183 // do this if the pred block is unconditionally branching into the phi block.
1184 // Also, make sure that the pred block is not dead code.
1185 if (NonSimplifiedBB != nullptr) {
1186 BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1187 if (!BI || !BI->isUnconditional() ||
1188 !DT.isReachableFromEntry(NonSimplifiedBB))
1189 return nullptr;
1190 }
1191
1192 // Okay, we can do the transformation: create the new PHI node.
1193 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1194 InsertNewInstBefore(NewPN, *PN);
1195 NewPN->takeName(PN);
1196 NewPN->setDebugLoc(PN->getDebugLoc());
1197
1198 // If we are going to have to insert a new computation, do so right before the
1199 // predecessor's terminator.
1200 Instruction *Clone = nullptr;
1201 if (NonSimplifiedBB) {
1202 Clone = I.clone();
1203 for (Use &U : Clone->operands()) {
1204 if (U == PN)
1205 U = NonSimplifiedInVal;
1206 else
1207 U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1208 }
1209 InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
1210 }
1211
1212 for (unsigned i = 0; i != NumPHIValues; ++i) {
1213 if (NewPhiValues[i])
1214 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1215 else
1216 NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1217 }
1218
1219 for (User *U : make_early_inc_range(PN->users())) {
1220 Instruction *User = cast<Instruction>(U);
1221 if (User == &I) continue;
1222 replaceInstUsesWith(*User, NewPN);
1224 }
1225
1226 replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
1227 const_cast<PHINode &>(*NewPN),
1228 const_cast<PHINode &>(*PN), DT);
1229 return replaceInstUsesWith(I, NewPN);
1230}
1231
1233 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1234 // we are guarding against replicating the binop in >1 predecessor.
1235 // This could miss matching a phi with 2 constant incoming values.
1236 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1237 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1238 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1239 Phi0->getNumOperands() != Phi1->getNumOperands())
1240 return nullptr;
1241
1242 // TODO: Remove the restriction for binop being in the same block as the phis.
1243 if (BO.getParent() != Phi0->getParent() ||
1244 BO.getParent() != Phi1->getParent())
1245 return nullptr;
1246
1247 // Fold if there is at least one specific constant value in phi0 or phi1's
1248 // incoming values that comes from the same block and this specific constant
1249 // value can be used to do optimization for specific binary operator.
1250 // For example:
1251 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
1252 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
1253 // %add = add i32 %phi0, %phi1
1254 // ==>
1255 // %add = phi i32 [%j, %bb0], [%i, %bb1]
1257 /*AllowRHSConstant*/ false);
1258 if (C) {
1259 SmallVector<Value *, 4> NewIncomingValues;
1260 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
1261 auto &Phi0Use = std::get<0>(T);
1262 auto &Phi1Use = std::get<1>(T);
1263 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
1264 return false;
1265 Value *Phi0UseV = Phi0Use.get();
1266 Value *Phi1UseV = Phi1Use.get();
1267 if (Phi0UseV == C)
1268 NewIncomingValues.push_back(Phi1UseV);
1269 else if (Phi1UseV == C)
1270 NewIncomingValues.push_back(Phi0UseV);
1271 else
1272 return false;
1273 return true;
1274 };
1275
1276 if (all_of(zip(Phi0->operands(), Phi1->operands()),
1277 CanFoldIncomingValuePair)) {
1278 PHINode *NewPhi =
1279 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
1280 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
1281 "The number of collected incoming values should equal the number "
1282 "of the original PHINode operands!");
1283 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
1284 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
1285 return NewPhi;
1286 }
1287 }
1288
1289 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1290 return nullptr;
1291
1292 // Match a pair of incoming constants for one of the predecessor blocks.
1293 BasicBlock *ConstBB, *OtherBB;
1294 Constant *C0, *C1;
1295 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1296 ConstBB = Phi0->getIncomingBlock(0);
1297 OtherBB = Phi0->getIncomingBlock(1);
1298 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1299 ConstBB = Phi0->getIncomingBlock(1);
1300 OtherBB = Phi0->getIncomingBlock(0);
1301 } else {
1302 return nullptr;
1303 }
1304 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1305 return nullptr;
1306
1307 // The block that we are hoisting to must reach here unconditionally.
1308 // Otherwise, we could be speculatively executing an expensive or
1309 // non-speculative op.
1310 auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1311 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1312 !DT.isReachableFromEntry(OtherBB))
1313 return nullptr;
1314
1315 // TODO: This check could be tightened to only apply to binops (div/rem) that
1316 // are not safe to speculatively execute. But that could allow hoisting
1317 // potentially expensive instructions (fdiv for example).
1318 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1320 return nullptr;
1321
1322 // Fold constants for the predecessor block with constant incoming values.
1323 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1324 if (!NewC)
1325 return nullptr;
1326
1327 // Make a new binop in the predecessor block with the non-constant incoming
1328 // values.
1329 Builder.SetInsertPoint(PredBlockBranch);
1330 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1331 Phi0->getIncomingValueForBlock(OtherBB),
1332 Phi1->getIncomingValueForBlock(OtherBB));
1333 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1334 NotFoldedNewBO->copyIRFlags(&BO);
1335
1336 // Replace the binop with a phi of the new values. The old phis are dead.
1337 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1338 NewPhi->addIncoming(NewBO, OtherBB);
1339 NewPhi->addIncoming(NewC, ConstBB);
1340 return NewPhi;
1341}
1342
1344 if (!isa<Constant>(I.getOperand(1)))
1345 return nullptr;
1346
1347 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1348 if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1349 return NewSel;
1350 } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1351 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1352 return NewPhi;
1353 }
1354 return nullptr;
1355}
1356
1358 // If this GEP has only 0 indices, it is the same pointer as
1359 // Src. If Src is not a trivial GEP too, don't combine
1360 // the indices.
1361 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1362 !Src.hasOneUse())
1363 return false;
1364 return true;
1365}
1366
1368 if (!isa<VectorType>(Inst.getType()))
1369 return nullptr;
1370
1371 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1372 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1373 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1374 cast<VectorType>(Inst.getType())->getElementCount());
1375 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1376 cast<VectorType>(Inst.getType())->getElementCount());
1377
1378 // If both operands of the binop are vector concatenations, then perform the
1379 // narrow binop on each pair of the source operands followed by concatenation
1380 // of the results.
1381 Value *L0, *L1, *R0, *R1;
1382 ArrayRef<int> Mask;
1383 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1384 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
1385 LHS->hasOneUse() && RHS->hasOneUse() &&
1386 cast<ShuffleVectorInst>(LHS)->isConcat() &&
1387 cast<ShuffleVectorInst>(RHS)->isConcat()) {
1388 // This transform does not have the speculative execution constraint as
1389 // below because the shuffle is a concatenation. The new binops are
1390 // operating on exactly the same elements as the existing binop.
1391 // TODO: We could ease the mask requirement to allow different undef lanes,
1392 // but that requires an analysis of the binop-with-undef output value.
1393 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1394 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1395 BO->copyIRFlags(&Inst);
1396 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1397 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1398 BO->copyIRFlags(&Inst);
1399 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1400 }
1401
1402 auto createBinOpReverse = [&](Value *X, Value *Y) {
1403 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
1404 if (auto *BO = dyn_cast<BinaryOperator>(V))
1405 BO->copyIRFlags(&Inst);
1406 Module *M = Inst.getModule();
1408 M, Intrinsic::experimental_vector_reverse, V->getType());
1409 return CallInst::Create(F, V);
1410 };
1411
1412 // NOTE: Reverse shuffles don't require the speculative execution protection
1413 // below because they don't affect which lanes take part in the computation.
1414
1415 Value *V1, *V2;
1416 if (match(LHS, m_VecReverse(m_Value(V1)))) {
1417 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
1418 if (match(RHS, m_VecReverse(m_Value(V2))) &&
1419 (LHS->hasOneUse() || RHS->hasOneUse() ||
1420 (LHS == RHS && LHS->hasNUses(2))))
1421 return createBinOpReverse(V1, V2);
1422
1423 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
1424 if (LHS->hasOneUse() && isSplatValue(RHS))
1425 return createBinOpReverse(V1, RHS);
1426 }
1427 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
1428 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
1429 return createBinOpReverse(LHS, V2);
1430
1431 // It may not be safe to reorder shuffles and things like div, urem, etc.
1432 // because we may trap when executing those ops on unknown vector elements.
1433 // See PR20059.
1434 if (!isSafeToSpeculativelyExecute(&Inst))
1435 return nullptr;
1436
1437 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1438 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1439 if (auto *BO = dyn_cast<BinaryOperator>(XY))
1440 BO->copyIRFlags(&Inst);
1441 return new ShuffleVectorInst(XY, M);
1442 };
1443
1444 // If both arguments of the binary operation are shuffles that use the same
1445 // mask and shuffle within a single vector, move the shuffle after the binop.
1446 if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1447 match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
1448 V1->getType() == V2->getType() &&
1449 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1450 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1451 return createBinOpShuffle(V1, V2, Mask);
1452 }
1453
1454 // If both arguments of a commutative binop are select-shuffles that use the
1455 // same mask with commuted operands, the shuffles are unnecessary.
1456 if (Inst.isCommutative() &&
1457 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1458 match(RHS,
1459 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
1460 auto *LShuf = cast<ShuffleVectorInst>(LHS);
1461 auto *RShuf = cast<ShuffleVectorInst>(RHS);
1462 // TODO: Allow shuffles that contain undefs in the mask?
1463 // That is legal, but it reduces undef knowledge.
1464 // TODO: Allow arbitrary shuffles by shuffling after binop?
1465 // That might be legal, but we have to deal with poison.
1466 if (LShuf->isSelect() &&
1467 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
1468 RShuf->isSelect() &&
1469 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
1470 // Example:
1471 // LHS = shuffle V1, V2, <0, 5, 6, 3>
1472 // RHS = shuffle V2, V1, <0, 5, 6, 3>
1473 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1474 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1475 NewBO->copyIRFlags(&Inst);
1476 return NewBO;
1477 }
1478 }
1479
1480 // If one argument is a shuffle within one vector and the other is a constant,
1481 // try moving the shuffle after the binary operation. This canonicalization
1482 // intends to move shuffles closer to other shuffles and binops closer to
1483 // other binops, so they can be folded. It may also enable demanded elements
1484 // transforms.
1485 Constant *C;
1486 auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1487 if (InstVTy &&
1488 match(&Inst,
1490 m_ImmConstant(C))) &&
1491 cast<FixedVectorType>(V1->getType())->getNumElements() <=
1492 InstVTy->getNumElements()) {
1493 assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1494 "Shuffle should not change scalar type");
1495
1496 // Find constant NewC that has property:
1497 // shuffle(NewC, ShMask) = C
1498 // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1499 // reorder is not possible. A 1-to-1 mapping is not required. Example:
1500 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1501 bool ConstOp1 = isa<Constant>(RHS);
1502 ArrayRef<int> ShMask = Mask;
1503 unsigned SrcVecNumElts =
1504 cast<FixedVectorType>(V1->getType())->getNumElements();
1505 UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1506 SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1507 bool MayChange = true;
1508 unsigned NumElts = InstVTy->getNumElements();
1509 for (unsigned I = 0; I < NumElts; ++I) {
1510 Constant *CElt = C->getAggregateElement(I);
1511 if (ShMask[I] >= 0) {
1512 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1513 Constant *NewCElt = NewVecC[ShMask[I]];
1514 // Bail out if:
1515 // 1. The constant vector contains a constant expression.
1516 // 2. The shuffle needs an element of the constant vector that can't
1517 // be mapped to a new constant vector.
1518 // 3. This is a widening shuffle that copies elements of V1 into the
1519 // extended elements (extending with undef is allowed).
1520 if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1521 I >= SrcVecNumElts) {
1522 MayChange = false;
1523 break;
1524 }
1525 NewVecC[ShMask[I]] = CElt;
1526 }
1527 // If this is a widening shuffle, we must be able to extend with undef
1528 // elements. If the original binop does not produce an undef in the high
1529 // lanes, then this transform is not safe.
1530 // Similarly for undef lanes due to the shuffle mask, we can only
1531 // transform binops that preserve undef.
1532 // TODO: We could shuffle those non-undef constant values into the
1533 // result by using a constant vector (rather than an undef vector)
1534 // as operand 1 of the new binop, but that might be too aggressive
1535 // for target-independent shuffle creation.
1536 if (I >= SrcVecNumElts || ShMask[I] < 0) {
1537 Constant *MaybeUndef =
1538 ConstOp1
1539 ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1540 : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1541 if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1542 MayChange = false;
1543 break;
1544 }
1545 }
1546 }
1547 if (MayChange) {
1548 Constant *NewC = ConstantVector::get(NewVecC);
1549 // It may not be safe to execute a binop on a vector with undef elements
1550 // because the entire instruction can be folded to undef or create poison
1551 // that did not exist in the original code.
1552 if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1553 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1554
1555 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1556 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1557 Value *NewLHS = ConstOp1 ? V1 : NewC;
1558 Value *NewRHS = ConstOp1 ? NewC : V1;
1559 return createBinOpShuffle(NewLHS, NewRHS, Mask);
1560 }
1561 }
1562
1563 // Try to reassociate to sink a splat shuffle after a binary operation.
1564 if (Inst.isAssociative() && Inst.isCommutative()) {
1565 // Canonicalize shuffle operand as LHS.
1566 if (isa<ShuffleVectorInst>(RHS))
1567 std::swap(LHS, RHS);
1568
1569 Value *X;
1570 ArrayRef<int> MaskC;
1571 int SplatIndex;
1572 Value *Y, *OtherOp;
1573 if (!match(LHS,
1574 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1575 !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1576 X->getType() != Inst.getType() ||
1577 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1578 return nullptr;
1579
1580 // FIXME: This may not be safe if the analysis allows undef elements. By
1581 // moving 'Y' before the splat shuffle, we are implicitly assuming
1582 // that it is not undef/poison at the splat index.
1583 if (isSplatValue(OtherOp, SplatIndex)) {
1584 std::swap(Y, OtherOp);
1585 } else if (!isSplatValue(Y, SplatIndex)) {
1586 return nullptr;
1587 }
1588
1589 // X and Y are splatted values, so perform the binary operation on those
1590 // values followed by a splat followed by the 2nd binary operation:
1591 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1592 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1593 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1594 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1595 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1596
1597 // Intersect FMF on both new binops. Other (poison-generating) flags are
1598 // dropped to be safe.
1599 if (isa<FPMathOperator>(R)) {
1600 R->copyFastMathFlags(&Inst);
1601 R->andIRFlags(RHS);
1602 }
1603 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1604 NewInstBO->copyIRFlags(R);
1605 return R;
1606 }
1607
1608 return nullptr;
1609}
1610
1611/// Try to narrow the width of a binop if at least 1 operand is an extend of
1612/// of a value. This requires a potentially expensive known bits check to make
1613/// sure the narrow op does not overflow.
1614Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1615 // We need at least one extended operand.
1616 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1617
1618 // If this is a sub, we swap the operands since we always want an extension
1619 // on the RHS. The LHS can be an extension or a constant.
1620 if (BO.getOpcode() == Instruction::Sub)
1621 std::swap(Op0, Op1);
1622
1623 Value *X;
1624 bool IsSext = match(Op0, m_SExt(m_Value(X)));
1625 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1626 return nullptr;
1627
1628 // If both operands are the same extension from the same source type and we
1629 // can eliminate at least one (hasOneUse), this might work.
1630 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1631 Value *Y;
1632 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1633 cast<Operator>(Op1)->getOpcode() == CastOpc &&
1634 (Op0->hasOneUse() || Op1->hasOneUse()))) {
1635 // If that did not match, see if we have a suitable constant operand.
1636 // Truncating and extending must produce the same constant.
1637 Constant *WideC;
1638 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1639 return nullptr;
1640 Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1641 if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1642 return nullptr;
1643 Y = NarrowC;
1644 }
1645
1646 // Swap back now that we found our operands.
1647 if (BO.getOpcode() == Instruction::Sub)
1648 std::swap(X, Y);
1649
1650 // Both operands have narrow versions. Last step: the math must not overflow
1651 // in the narrow width.
1652 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1653 return nullptr;
1654
1655 // bo (ext X), (ext Y) --> ext (bo X, Y)
1656 // bo (ext X), C --> ext (bo X, C')
1657 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1658 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1659 if (IsSext)
1660 NewBinOp->setHasNoSignedWrap();
1661 else
1662 NewBinOp->setHasNoUnsignedWrap();
1663 }
1664 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1665}
1666
1668 // At least one GEP must be inbounds.
1669 if (!GEP1.isInBounds() && !GEP2.isInBounds())
1670 return false;
1671
1672 return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1673 (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1674}
1675
1676/// Thread a GEP operation with constant indices through the constant true/false
1677/// arms of a select.
1679 InstCombiner::BuilderTy &Builder) {
1680 if (!GEP.hasAllConstantIndices())
1681 return nullptr;
1682
1683 Instruction *Sel;
1684 Value *Cond;
1685 Constant *TrueC, *FalseC;
1686 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1687 !match(Sel,
1688 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1689 return nullptr;
1690
1691 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1692 // Propagate 'inbounds' and metadata from existing instructions.
1693 // Note: using IRBuilder to create the constants for efficiency.
1694 SmallVector<Value *, 4> IndexC(GEP.indices());
1695 bool IsInBounds = GEP.isInBounds();
1696 Type *Ty = GEP.getSourceElementType();
1697 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1698 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1699 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1700}
1701
1703 GEPOperator *Src) {
1704 // Combine Indices - If the source pointer to this getelementptr instruction
1705 // is a getelementptr instruction with matching element type, combine the
1706 // indices of the two getelementptr instructions into a single instruction.
1707 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1708 return nullptr;
1709
1710 // For constant GEPs, use a more general offset-based folding approach.
1711 Type *PtrTy = Src->getType()->getScalarType();
1712 if (GEP.hasAllConstantIndices() &&
1713 (Src->hasOneUse() || Src->hasAllConstantIndices())) {
1714 // Split Src into a variable part and a constant suffix.
1716 Type *BaseType = GTI.getIndexedType();
1717 bool IsFirstType = true;
1718 unsigned NumVarIndices = 0;
1719 for (auto Pair : enumerate(Src->indices())) {
1720 if (!isa<ConstantInt>(Pair.value())) {
1721 BaseType = GTI.getIndexedType();
1722 IsFirstType = false;
1723 NumVarIndices = Pair.index() + 1;
1724 }
1725 ++GTI;
1726 }
1727
1728 // Determine the offset for the constant suffix of Src.
1730 if (NumVarIndices != Src->getNumIndices()) {
1731 // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
1732 if (isa<ScalableVectorType>(BaseType))
1733 return nullptr;
1734
1735 SmallVector<Value *> ConstantIndices;
1736 if (!IsFirstType)
1737 ConstantIndices.push_back(
1739 append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
1740 Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
1741 }
1742
1743 // Add the offset for GEP (which is fully constant).
1744 if (!GEP.accumulateConstantOffset(DL, Offset))
1745 return nullptr;
1746
1747 APInt OffsetOld = Offset;
1748 // Convert the total offset back into indices.
1749 SmallVector<APInt> ConstIndices =
1751 if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
1752 // If both GEP are constant-indexed, and cannot be merged in either way,
1753 // convert them to a GEP of i8.
1754 if (Src->hasAllConstantIndices())
1755 return replaceInstUsesWith(
1757 Builder.getInt8Ty(), Src->getOperand(0),
1758 Builder.getInt(OffsetOld), "",
1759 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
1760 return nullptr;
1761 }
1762
1763 bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
1764 SmallVector<Value *> Indices;
1765 append_range(Indices, drop_end(Src->indices(),
1766 Src->getNumIndices() - NumVarIndices));
1767 for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
1768 Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
1769 // Even if the total offset is inbounds, we may end up representing it
1770 // by first performing a larger negative offset, and then a smaller
1771 // positive one. The large negative offset might go out of bounds. Only
1772 // preserve inbounds if all signs are the same.
1773 IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
1774 }
1775
1776 return replaceInstUsesWith(
1777 GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
1778 Indices, "", IsInBounds));
1779 }
1780
1781 if (Src->getResultElementType() != GEP.getSourceElementType())
1782 return nullptr;
1783
1784 SmallVector<Value*, 8> Indices;
1785
1786 // Find out whether the last index in the source GEP is a sequential idx.
1787 bool EndsWithSequential = false;
1788 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
1789 I != E; ++I)
1790 EndsWithSequential = I.isSequential();
1791
1792 // Can we combine the two pointer arithmetics offsets?
1793 if (EndsWithSequential) {
1794 // Replace: gep (gep %P, long B), long A, ...
1795 // With: T = long A+B; gep %P, T, ...
1796 Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
1797 Value *GO1 = GEP.getOperand(1);
1798
1799 // If they aren't the same type, then the input hasn't been processed
1800 // by the loop above yet (which canonicalizes sequential index types to
1801 // intptr_t). Just avoid transforming this until the input has been
1802 // normalized.
1803 if (SO1->getType() != GO1->getType())
1804 return nullptr;
1805
1806 Value *Sum =
1807 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
1808 // Only do the combine when we are sure the cost after the
1809 // merge is never more than that before the merge.
1810 if (Sum == nullptr)
1811 return nullptr;
1812
1813 // Update the GEP in place if possible.
1814 if (Src->getNumOperands() == 2) {
1815 GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
1816 replaceOperand(GEP, 0, Src->getOperand(0));
1817 replaceOperand(GEP, 1, Sum);
1818 return &GEP;
1819 }
1820 Indices.append(Src->op_begin()+1, Src->op_end()-1);
1821 Indices.push_back(Sum);
1822 Indices.append(GEP.op_begin()+2, GEP.op_end());
1823 } else if (isa<Constant>(*GEP.idx_begin()) &&
1824 cast<Constant>(*GEP.idx_begin())->isNullValue() &&
1825 Src->getNumOperands() != 1) {
1826 // Otherwise we can do the fold if the first index of the GEP is a zero
1827 Indices.append(Src->op_begin()+1, Src->op_end());
1828 Indices.append(GEP.idx_begin()+1, GEP.idx_end());
1829 }
1830
1831 if (!Indices.empty())
1832 return replaceInstUsesWith(
1834 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
1835 isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
1836
1837 return nullptr;
1838}
1839
1841 Value *PtrOp = GEP.getOperand(0);
1842 SmallVector<Value *, 8> Indices(GEP.indices());
1843 Type *GEPType = GEP.getType();
1844 Type *GEPEltType = GEP.getSourceElementType();
1845 bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
1846 if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
1848 return replaceInstUsesWith(GEP, V);
1849
1850 // For vector geps, use the generic demanded vector support.
1851 // Skip if GEP return type is scalable. The number of elements is unknown at
1852 // compile-time.
1853 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
1854 auto VWidth = GEPFVTy->getNumElements();
1855 APInt UndefElts(VWidth, 0);
1856 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1857 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
1858 UndefElts)) {
1859 if (V != &GEP)
1860 return replaceInstUsesWith(GEP, V);
1861 return &GEP;
1862 }
1863
1864 // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
1865 // possible (decide on canonical form for pointer broadcast), 3) exploit
1866 // undef elements to decrease demanded bits
1867 }
1868
1869 // Eliminate unneeded casts for indices, and replace indices which displace
1870 // by multiples of a zero size type with zero.
1871 bool MadeChange = false;
1872
1873 // Index width may not be the same width as pointer width.
1874 // Data layout chooses the right type based on supported integer types.
1875 Type *NewScalarIndexTy =
1876 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
1877
1879 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
1880 ++I, ++GTI) {
1881 // Skip indices into struct types.
1882 if (GTI.isStruct())
1883 continue;
1884
1885 Type *IndexTy = (*I)->getType();
1886 Type *NewIndexType =
1887 IndexTy->isVectorTy()
1888 ? VectorType::get(NewScalarIndexTy,
1889 cast<VectorType>(IndexTy)->getElementCount())
1890 : NewScalarIndexTy;
1891
1892 // If the element type has zero size then any index over it is equivalent
1893 // to an index of zero, so replace it with zero if it is not zero already.
1894 Type *EltTy = GTI.getIndexedType();
1895 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
1896 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
1897 *I = Constant::getNullValue(NewIndexType);
1898 MadeChange = true;
1899 }
1900
1901 if (IndexTy != NewIndexType) {
1902 // If we are using a wider index than needed for this platform, shrink
1903 // it to what we need. If narrower, sign-extend it to what we need.
1904 // This explicit cast can make subsequent optimizations more obvious.
1905 *I = Builder.CreateIntCast(*I, NewIndexType, true);
1906 MadeChange = true;
1907 }
1908 }
1909 if (MadeChange)
1910 return &GEP;
1911
1912 // Check to see if the inputs to the PHI node are getelementptr instructions.
1913 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
1914 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
1915 if (!Op1)
1916 return nullptr;
1917
1918 // Don't fold a GEP into itself through a PHI node. This can only happen
1919 // through the back-edge of a loop. Folding a GEP into itself means that
1920 // the value of the previous iteration needs to be stored in the meantime,
1921 // thus requiring an additional register variable to be live, but not
1922 // actually achieving anything (the GEP still needs to be executed once per
1923 // loop iteration).
1924 if (Op1 == &GEP)
1925 return nullptr;
1926
1927 int DI = -1;
1928
1929 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
1930 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
1931 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
1932 Op1->getSourceElementType() != Op2->getSourceElementType())
1933 return nullptr;
1934
1935 // As for Op1 above, don't try to fold a GEP into itself.
1936 if (Op2 == &GEP)
1937 return nullptr;
1938
1939 // Keep track of the type as we walk the GEP.
1940 Type *CurTy = nullptr;
1941
1942 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
1943 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
1944 return nullptr;
1945
1946 if (Op1->getOperand(J) != Op2->getOperand(J)) {
1947 if (DI == -1) {
1948 // We have not seen any differences yet in the GEPs feeding the
1949 // PHI yet, so we record this one if it is allowed to be a
1950 // variable.
1951
1952 // The first two arguments can vary for any GEP, the rest have to be
1953 // static for struct slots
1954 if (J > 1) {
1955 assert(CurTy && "No current type?");
1956 if (CurTy->isStructTy())
1957 return nullptr;
1958 }
1959
1960 DI = J;
1961 } else {
1962 // The GEP is different by more than one input. While this could be
1963 // extended to support GEPs that vary by more than one variable it
1964 // doesn't make sense since it greatly increases the complexity and
1965 // would result in an R+R+R addressing mode which no backend
1966 // directly supports and would need to be broken into several
1967 // simpler instructions anyway.
1968 return nullptr;
1969 }
1970 }
1971
1972 // Sink down a layer of the type for the next iteration.
1973 if (J > 0) {
1974 if (J == 1) {
1975 CurTy = Op1->getSourceElementType();
1976 } else {
1977 CurTy =
1978 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
1979 }
1980 }
1981 }
1982 }
1983
1984 // If not all GEPs are identical we'll have to create a new PHI node.
1985 // Check that the old PHI node has only one use so that it will get
1986 // removed.
1987 if (DI != -1 && !PN->hasOneUse())
1988 return nullptr;
1989
1990 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
1991 if (DI == -1) {
1992 // All the GEPs feeding the PHI are identical. Clone one down into our
1993 // BB so that it can be merged with the current GEP.
1994 } else {
1995 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
1996 // into the current block so it can be merged, and create a new PHI to
1997 // set that index.
1998 PHINode *NewPN;
1999 {
2002 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2003 PN->getNumOperands());
2004 }
2005
2006 for (auto &I : PN->operands())
2007 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2008 PN->getIncomingBlock(I));
2009
2010 NewGEP->setOperand(DI, NewPN);
2011 }
2012
2013 NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2014 return replaceOperand(GEP, 0, NewGEP);
2015 }
2016
2017 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2018 if (Instruction *I = visitGEPOfGEP(GEP, Src))
2019 return I;
2020
2021 // Skip if GEP source element type is scalable. The type alloc size is unknown
2022 // at compile-time.
2023 if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2024 unsigned AS = GEP.getPointerAddressSpace();
2025 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2026 DL.getIndexSizeInBits(AS)) {
2027 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2028
2029 bool Matched = false;
2030 uint64_t C;
2031 Value *V = nullptr;
2032 if (TyAllocSize == 1) {
2033 V = GEP.getOperand(1);
2034 Matched = true;
2035 } else if (match(GEP.getOperand(1),
2036 m_AShr(m_Value(V), m_ConstantInt(C)))) {
2037 if (TyAllocSize == 1ULL << C)
2038 Matched = true;
2039 } else if (match(GEP.getOperand(1),
2040 m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2041 if (TyAllocSize == C)
2042 Matched = true;
2043 }
2044
2045 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2046 // only if both point to the same underlying object (otherwise provenance
2047 // is not necessarily retained).
2048 Value *Y;
2049 Value *X = GEP.getOperand(0);
2050 if (Matched &&
2054 }
2055 }
2056
2057 // We do not handle pointer-vector geps here.
2058 if (GEPType->isVectorTy())
2059 return nullptr;
2060
2061 if (!GEP.isInBounds()) {
2062 unsigned IdxWidth =
2064 APInt BasePtrOffset(IdxWidth, 0);
2065 Value *UnderlyingPtrOp =
2067 BasePtrOffset);
2068 if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2069 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2070 BasePtrOffset.isNonNegative()) {
2071 APInt AllocSize(
2072 IdxWidth,
2073 DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue());
2074 if (BasePtrOffset.ule(AllocSize)) {
2076 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2077 }
2078 }
2079 }
2080 }
2081
2083 return R;
2084
2085 return nullptr;
2086}
2087
2089 Instruction *AI) {
2090 if (isa<ConstantPointerNull>(V))
2091 return true;
2092 if (auto *LI = dyn_cast<LoadInst>(V))
2093 return isa<GlobalVariable>(LI->getPointerOperand());
2094 // Two distinct allocations will never be equal.
2095 return isAllocLikeFn(V, &TLI) && V != AI;
2096}
2097
2098/// Given a call CB which uses an address UsedV, return true if we can prove the
2099/// call's only possible effect is storing to V.
2100static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2101 const TargetLibraryInfo &TLI) {
2102 if (!CB.use_empty())
2103 // TODO: add recursion if returned attribute is present
2104 return false;
2105
2106 if (CB.isTerminator())
2107 // TODO: remove implementation restriction
2108 return false;
2109
2110 if (!CB.willReturn() || !CB.doesNotThrow())
2111 return false;
2112
2113 // If the only possible side effect of the call is writing to the alloca,
2114 // and the result isn't used, we can safely remove any reads implied by the
2115 // call including those which might read the alloca itself.
2116 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2117 return Dest && Dest->Ptr == UsedV;
2118}
2119
2122 const TargetLibraryInfo &TLI) {
2124 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2125 Worklist.push_back(AI);
2126
2127 do {
2128 Instruction *PI = Worklist.pop_back_val();
2129 for (User *U : PI->users()) {
2130 Instruction *I = cast<Instruction>(U);
2131 switch (I->getOpcode()) {
2132 default:
2133 // Give up the moment we see something we can't handle.
2134 return false;
2135
2136 case Instruction::AddrSpaceCast:
2137 case Instruction::BitCast:
2138 case Instruction::GetElementPtr:
2139 Users.emplace_back(I);
2140 Worklist.push_back(I);
2141 continue;
2142
2143 case Instruction::ICmp: {
2144 ICmpInst *ICI = cast<ICmpInst>(I);
2145 // We can fold eq/ne comparisons with null to false/true, respectively.
2146 // We also fold comparisons in some conditions provided the alloc has
2147 // not escaped (see isNeverEqualToUnescapedAlloc).
2148 if (!ICI->isEquality())
2149 return false;
2150 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2151 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2152 return false;
2153 Users.emplace_back(I);
2154 continue;
2155 }
2156
2157 case Instruction::Call:
2158 // Ignore no-op and store intrinsics.
2159 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2160 switch (II->getIntrinsicID()) {
2161 default:
2162 return false;
2163
2164 case Intrinsic::memmove:
2165 case Intrinsic::memcpy:
2166 case Intrinsic::memset: {
2167 MemIntrinsic *MI = cast<MemIntrinsic>(II);
2168 if (MI->isVolatile() || MI->getRawDest() != PI)
2169 return false;
2170 [[fallthrough]];
2171 }
2172 case Intrinsic::assume:
2173 case Intrinsic::invariant_start:
2174 case Intrinsic::invariant_end:
2175 case Intrinsic::lifetime_start:
2176 case Intrinsic::lifetime_end:
2177 case Intrinsic::objectsize:
2178 Users.emplace_back(I);
2179 continue;
2180 case Intrinsic::launder_invariant_group:
2181 case Intrinsic::strip_invariant_group:
2182 Users.emplace_back(I);
2183 Worklist.push_back(I);
2184 continue;
2185 }
2186 }
2187
2188 if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2189 Users.emplace_back(I);
2190 continue;
2191 }
2192
2193 if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2194 getAllocationFamily(I, &TLI) == Family) {
2195 assert(Family);
2196 Users.emplace_back(I);
2197 continue;
2198 }
2199
2200 if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
2201 getAllocationFamily(I, &TLI) == Family) {
2202 assert(Family);
2203 Users.emplace_back(I);
2204 Worklist.push_back(I);
2205 continue;
2206 }
2207
2208 return false;
2209
2210 case Instruction::Store: {
2211 StoreInst *SI = cast<StoreInst>(I);
2212 if (SI->isVolatile() || SI->getPointerOperand() != PI)
2213 return false;
2214 Users.emplace_back(I);
2215 continue;
2216 }
2217 }
2218 llvm_unreachable("missing a return?");
2219 }
2220 } while (!Worklist.empty());
2221 return true;
2222}
2223
2225 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2226
2227 // If we have a malloc call which is only used in any amount of comparisons to
2228 // null and free calls, delete the calls and replace the comparisons with true
2229 // or false as appropriate.
2230
2231 // This is based on the principle that we can substitute our own allocation
2232 // function (which will never return null) rather than knowledge of the
2233 // specific function being called. In some sense this can change the permitted
2234 // outputs of a program (when we convert a malloc to an alloca, the fact that
2235 // the allocation is now on the stack is potentially visible, for example),
2236 // but we believe in a permissible manner.
2238
2239 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2240 // before each store.
2242 std::unique_ptr<DIBuilder> DIB;
2243 if (isa<AllocaInst>(MI)) {
2244 findDbgUsers(DVIs, &MI);
2245 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2246 }
2247
2248 if (isAllocSiteRemovable(&MI, Users, TLI)) {
2249 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2250 // Lowering all @llvm.objectsize calls first because they may
2251 // use a bitcast/GEP of the alloca we are removing.
2252 if (!Users[i])
2253 continue;
2254
2255 Instruction *I = cast<Instruction>(&*Users[i]);
2256
2257 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2258 if (II->getIntrinsicID() == Intrinsic::objectsize) {
2259 Value *Result =
2260 lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2261 replaceInstUsesWith(*I, Result);
2263 Users[i] = nullptr; // Skip examining in the next loop.
2264 }
2265 }
2266 }
2267 for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2268 if (!Users[i])
2269 continue;
2270
2271 Instruction *I = cast<Instruction>(&*Users[i]);
2272
2273 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2275 ConstantInt::get(Type::getInt1Ty(C->getContext()),
2276 C->isFalseWhenEqual()));
2277 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2278 for (auto *DVI : DVIs)
2279 if (DVI->isAddressOfVariable())
2281 } else {
2282 // Casts, GEP, or anything else: we're about to delete this instruction,
2283 // so it can not have any valid uses.
2284 replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2285 }
2287 }
2288
2289 if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2290 // Replace invoke with a NOP intrinsic to maintain the original CFG
2291 Module *M = II->getModule();
2292 Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2293 InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2294 std::nullopt, "", II->getParent());
2295 }
2296
2297 // Remove debug intrinsics which describe the value contained within the
2298 // alloca. In addition to removing dbg.{declare,addr} which simply point to
2299 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2300 //
2301 // ```
2302 // define void @foo(i32 %0) {
2303 // %a = alloca i32 ; Deleted.
2304 // store i32 %0, i32* %a
2305 // dbg.value(i32 %0, "arg0") ; Not deleted.
2306 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2307 // call void @trivially_inlinable_no_op(i32* %a)
2308 // ret void
2309 // }
2310 // ```
2311 //
2312 // This may not be required if we stop describing the contents of allocas
2313 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2314 // the LowerDbgDeclare utility.
2315 //
2316 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2317 // "arg0" dbg.value may be stale after the call. However, failing to remove
2318 // the DW_OP_deref dbg.value causes large gaps in location coverage.
2319 for (auto *DVI : DVIs)
2320 if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2321 DVI->eraseFromParent();
2322
2323 return eraseInstFromFunction(MI);
2324 }
2325 return nullptr;
2326}
2327
2328/// Move the call to free before a NULL test.
2329///
2330/// Check if this free is accessed after its argument has been test
2331/// against NULL (property 0).
2332/// If yes, it is legal to move this call in its predecessor block.
2333///
2334/// The move is performed only if the block containing the call to free
2335/// will be removed, i.e.:
2336/// 1. it has only one predecessor P, and P has two successors
2337/// 2. it contains the call, noops, and an unconditional branch
2338/// 3. its successor is the same as its predecessor's successor
2339///
2340/// The profitability is out-of concern here and this function should
2341/// be called only if the caller knows this transformation would be
2342/// profitable (e.g., for code size).
2344 const DataLayout &DL) {
2345 Value *Op = FI.getArgOperand(0);
2346 BasicBlock *FreeInstrBB = FI.getParent();
2347 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2348
2349 // Validate part of constraint #1: Only one predecessor
2350 // FIXME: We can extend the number of predecessor, but in that case, we
2351 // would duplicate the call to free in each predecessor and it may
2352 // not be profitable even for code size.
2353 if (!PredBB)
2354 return nullptr;
2355
2356 // Validate constraint #2: Does this block contains only the call to
2357 // free, noops, and an unconditional branch?
2358 BasicBlock *SuccBB;
2359 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2360 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2361 return nullptr;
2362
2363 // If there are only 2 instructions in the block, at this point,
2364 // this is the call to free and unconditional.
2365 // If there are more than 2 instructions, check that they are noops
2366 // i.e., they won't hurt the performance of the generated code.
2367 if (FreeInstrBB->size() != 2) {
2368 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2369 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2370 continue;
2371 auto *Cast = dyn_cast<CastInst>(&Inst);
2372 if (!Cast || !Cast->isNoopCast(DL))
2373 return nullptr;
2374 }
2375 }
2376 // Validate the rest of constraint #1 by matching on the pred branch.
2377 Instruction *TI = PredBB->getTerminator();
2378 BasicBlock *TrueBB, *FalseBB;
2380 if (!match(TI, m_Br(m_ICmp(Pred,
2382 m_Specific(Op->stripPointerCasts())),
2383 m_Zero()),
2384 TrueBB, FalseBB)))
2385 return nullptr;
2386 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2387 return nullptr;
2388
2389 // Validate constraint #3: Ensure the null case just falls through.
2390 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2391 return nullptr;
2392 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2393 "Broken CFG: missing edge from predecessor to successor");
2394
2395 // At this point, we know that everything in FreeInstrBB can be moved
2396 // before TI.
2397 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2398 if (&Instr == FreeInstrBBTerminator)
2399 break;
2400 Instr.moveBefore(TI);
2401 }
2402 assert(FreeInstrBB->size() == 1 &&
2403 "Only the branch instruction should remain");
2404
2405 // Now that we've moved the call to free before the NULL check, we have to
2406 // remove any attributes on its parameter that imply it's non-null, because
2407 // those attributes might have only been valid because of the NULL check, and
2408 // we can get miscompiles if we keep them. This is conservative if non-null is
2409 // also implied by something other than the NULL check, but it's guaranteed to
2410 // be correct, and the conservativeness won't matter in practice, since the
2411 // attributes are irrelevant for the call to free itself and the pointer
2412 // shouldn't be used after the call.
2413 AttributeList Attrs = FI.getAttributes();
2414 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
2415 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
2416 if (Dereferenceable.isValid()) {
2417 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
2418 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
2419 Attribute::Dereferenceable);
2420 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
2421 }
2422 FI.setAttributes(Attrs);
2423
2424 return &FI;
2425}
2426
2428 // free undef -> unreachable.
2429 if (isa<UndefValue>(Op)) {
2430 // Leave a marker since we can't modify the CFG here.
2432 return eraseInstFromFunction(FI);
2433 }
2434
2435 // If we have 'free null' delete the instruction. This can happen in stl code
2436 // when lots of inlining happens.
2437 if (isa<ConstantPointerNull>(Op))
2438 return eraseInstFromFunction(FI);
2439
2440 // If we had free(realloc(...)) with no intervening uses, then eliminate the
2441 // realloc() entirely.
2442 CallInst *CI = dyn_cast<CallInst>(Op);
2443 if (CI && CI->hasOneUse())
2444 if (Value *ReallocatedOp = getReallocatedOperand(CI))
2445 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
2446
2447 // If we optimize for code size, try to move the call to free before the null
2448 // test so that simplify cfg can remove the empty block and dead code
2449 // elimination the branch. I.e., helps to turn something like:
2450 // if (foo) free(foo);
2451 // into
2452 // free(foo);
2453 //
2454 // Note that we can only do this for 'free' and not for any flavor of
2455 // 'operator delete'; there is no 'operator delete' symbol for which we are
2456 // permitted to invent a call, even if we're passing in a null pointer.
2457 if (MinimizeSize) {
2458 LibFunc Func;
2459 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
2461 return I;
2462 }
2463
2464 return nullptr;
2465}
2466
2468 // Nothing for now.
2469 return nullptr;
2470}
2471
2472// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
2474 // Try to remove the previous instruction if it must lead to unreachable.
2475 // This includes instructions like stores and "llvm.assume" that may not get
2476 // removed by simple dead code elimination.
2477 while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
2478 // While we theoretically can erase EH, that would result in a block that
2479 // used to start with an EH no longer starting with EH, which is invalid.
2480 // To make it valid, we'd need to fixup predecessors to no longer refer to
2481 // this block, but that changes CFG, which is not allowed in InstCombine.
2482 if (Prev->isEHPad())
2483 return nullptr; // Can not drop any more instructions. We're done here.
2484
2486 return nullptr; // Can not drop any more instructions. We're done here.
2487 // Otherwise, this instruction can be freely erased,
2488 // even if it is not side-effect free.
2489
2490 // A value may still have uses before we process it here (for example, in
2491 // another unreachable block), so convert those to poison.
2492 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
2493 eraseInstFromFunction(*Prev);
2494 }
2495 assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
2496 // FIXME: recurse into unconditional predecessors?
2497 return nullptr;
2498}
2499
2501 assert(BI.isUnconditional() && "Only for unconditional branches.");
2502
2503 // If this store is the second-to-last instruction in the basic block
2504 // (excluding debug info and bitcasts of pointers) and if the block ends with
2505 // an unconditional branch, try to move the store to the successor block.
2506
2507 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
2508 auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
2509 return BBI->isDebugOrPseudoInst() ||
2510 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
2511 };
2512
2513 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
2514 do {
2515 if (BBI != FirstInstr)
2516 --BBI;
2517 } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
2518
2519 return dyn_cast<StoreInst>(BBI);
2520 };
2521
2522 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
2524 return &BI;
2525
2526 return nullptr;
2527}
2528
2529/// If a block is dead due to a known branch condition, remove instructions
2530/// in it.
2532 // We only know one edge to this block is dead, but there may be others.
2533 // TODO: We could track dead edges globally.
2534 if (!BB->getSinglePredecessor())
2535 return false;
2536
2537 bool Changed = false;
2539 std::next(BB->getTerminator()->getReverseIterator()), BB->rend()))) {
2540 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
2541 IC.replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
2542 Changed = true;
2543 }
2544 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
2545 continue;
2546 IC.eraseInstFromFunction(Inst);
2547 Changed = true;
2548 }
2549
2550 // TODO: Successor blocks may also be dead.
2551 return Changed;
2552}
2553
2555 BasicBlock *LiveSucc,
2556 InstCombiner &IC) {
2557 bool Changed = false;
2558 for (BasicBlock *Succ : successors(BB))
2559 if (Succ != LiveSucc)
2560 Changed |= handlePotentiallyDeadBlock(Succ, IC);
2561 return Changed;
2562}
2563
2565 if (BI.isUnconditional())
2567
2568 // Change br (not X), label True, label False to: br X, label False, True
2569 Value *Cond = BI.getCondition();
2570 Value *X;
2571 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
2572 // Swap Destinations and condition...
2573 BI.swapSuccessors();
2574 return replaceOperand(BI, 0, X);
2575 }
2576
2577 // Canonicalize logical-and-with-invert as logical-or-with-invert.
2578 // This is done by inverting the condition and swapping successors:
2579 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
2580 Value *Y;
2581 if (isa<SelectInst>(Cond) &&
2582 match(Cond,
2584 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
2585 Value *Or = Builder.CreateLogicalOr(NotX, Y);
2586 BI.swapSuccessors();
2587 return replaceOperand(BI, 0, Or);
2588 }
2589
2590 // If the condition is irrelevant, remove the use so that other
2591 // transforms on the condition become more effective.
2592 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
2593 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
2594
2595 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
2596 CmpInst::Predicate Pred;
2597 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
2598 !isCanonicalPredicate(Pred)) {
2599 // Swap destinations and condition.
2600 auto *Cmp = cast<CmpInst>(Cond);
2601 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
2602 BI.swapSuccessors();
2603 Worklist.push(Cmp);
2604 return &BI;
2605 }
2606
2607 if (isa<UndefValue>(Cond) && handlePotentiallyDeadSuccessors(
2608 BI.getParent(), /*LiveSucc*/ nullptr, *this))
2609 return &BI;
2610 if (auto *CI = dyn_cast<ConstantInt>(Cond))
2612 BI.getParent(), BI.getSuccessor(!CI->getZExtValue()), *this))
2613 return &BI;
2614
2615 return nullptr;
2616}
2617
2619 Value *Cond = SI.getCondition();
2620 Value *Op0;
2621 ConstantInt *AddRHS;
2622 if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
2623 // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2624 for (auto Case : SI.cases()) {
2625 Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
2626 assert(isa<ConstantInt>(NewCase) &&
2627 "Result of expression should be constant");
2628 Case.setValue(cast<ConstantInt>(NewCase));
2629 }
2630 return replaceOperand(SI, 0, Op0);
2631 }
2632
2633 if (isa<UndefValue>(Cond) && handlePotentiallyDeadSuccessors(
2634 SI.getParent(), /*LiveSucc*/ nullptr, *this))
2635 return &SI;
2636 if (auto *CI = dyn_cast<ConstantInt>(Cond))
2638 SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor(), *this))
2639 return &SI;
2640
2641 KnownBits Known = computeKnownBits(Cond, 0, &SI);
2642 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
2643 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
2644
2645 // Compute the number of leading bits we can ignore.
2646 // TODO: A better way to determine this would use ComputeNumSignBits().
2647 for (const auto &C : SI.cases()) {
2648 LeadingKnownZeros =
2649 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
2650 LeadingKnownOnes =
2651 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
2652 }
2653
2654 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
2655
2656 // Shrink the condition operand if the new type is smaller than the old type.
2657 // But do not shrink to a non-standard type, because backend can't generate
2658 // good code for that yet.
2659 // TODO: We can make it aggressive again after fixing PR39569.
2660 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
2661 shouldChangeType(Known.getBitWidth(), NewWidth)) {
2662 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
2664 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
2665
2666 for (auto Case : SI.cases()) {
2667 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
2668 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
2669 }
2670 return replaceOperand(SI, 0, NewCond);
2671 }
2672
2673 return nullptr;
2674}
2675
2677InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
2678 auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
2679 if (!WO)
2680 return nullptr;
2681
2682 Intrinsic::ID OvID = WO->getIntrinsicID();
2683 const APInt *C = nullptr;
2684 if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
2685 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
2686 OvID == Intrinsic::umul_with_overflow)) {
2687 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
2688 if (C->isAllOnes())
2689 return BinaryOperator::CreateNeg(WO->getLHS());
2690 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
2691 if (C->isPowerOf2()) {
2692 return BinaryOperator::CreateShl(
2693 WO->getLHS(),
2694 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
2695 }
2696 }
2697 }
2698
2699 // We're extracting from an overflow intrinsic. See if we're the only user.
2700 // That allows us to simplify multiple result intrinsics to simpler things
2701 // that just get one value.
2702 if (!WO->hasOneUse())
2703 return nullptr;
2704
2705 // Check if we're grabbing only the result of a 'with overflow' intrinsic
2706 // and replace it with a traditional binary instruction.
2707 if (*EV.idx_begin() == 0) {
2708 Instruction::BinaryOps BinOp = WO->getBinaryOp();
2709 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
2710 // Replace the old instruction's uses with poison.
2711 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
2713 return BinaryOperator::Create(BinOp, LHS, RHS);
2714 }
2715
2716 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
2717
2718 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
2719 if (OvID == Intrinsic::usub_with_overflow)
2720 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
2721
2722 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
2723 // +1 is not possible because we assume signed values.
2724 if (OvID == Intrinsic::smul_with_overflow &&
2725 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
2726 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
2727
2728 // If only the overflow result is used, and the right hand side is a
2729 // constant (or constant splat), we can remove the intrinsic by directly
2730 // checking for overflow.
2731 if (C) {
2732 // Compute the no-wrap range for LHS given RHS=C, then construct an
2733 // equivalent icmp, potentially using an offset.
2735 WO->getBinaryOp(), *C, WO->getNoWrapKind());
2736
2737 CmpInst::Predicate Pred;
2738 APInt NewRHSC, Offset;
2739 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
2740 auto *OpTy = WO->getRHS()->getType();
2741 auto *NewLHS = WO->getLHS();
2742 if (Offset != 0)
2743 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
2744 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
2745 ConstantInt::get(OpTy, NewRHSC));
2746 }
2747
2748 return nullptr;
2749}
2750
2752 Value *Agg = EV.getAggregateOperand();
2753
2754 if (!EV.hasIndices())
2755 return replaceInstUsesWith(EV, Agg);
2756
2757 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
2758 SQ.getWithInstruction(&EV)))
2759 return replaceInstUsesWith(EV, V);
2760
2761 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
2762 // We're extracting from an insertvalue instruction, compare the indices
2763 const unsigned *exti, *exte, *insi, *inse;
2764 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
2765 exte = EV.idx_end(), inse = IV->idx_end();
2766 exti != exte && insi != inse;
2767 ++exti, ++insi) {
2768 if (*insi != *exti)
2769 // The insert and extract both reference distinctly different elements.
2770 // This means the extract is not influenced by the insert, and we can
2771 // replace the aggregate operand of the extract with the aggregate
2772 // operand of the insert. i.e., replace
2773 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2774 // %E = extractvalue { i32, { i32 } } %I, 0
2775 // with
2776 // %E = extractvalue { i32, { i32 } } %A, 0
2777 return ExtractValueInst::Create(IV->getAggregateOperand(),
2778 EV.getIndices());
2779 }
2780 if (exti == exte && insi == inse)
2781 // Both iterators are at the end: Index lists are identical. Replace
2782 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2783 // %C = extractvalue { i32, { i32 } } %B, 1, 0
2784 // with "i32 42"
2785 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
2786 if (exti == exte) {
2787 // The extract list is a prefix of the insert list. i.e. replace
2788 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2789 // %E = extractvalue { i32, { i32 } } %I, 1
2790 // with
2791 // %X = extractvalue { i32, { i32 } } %A, 1
2792 // %E = insertvalue { i32 } %X, i32 42, 0
2793 // by switching the order of the insert and extract (though the
2794 // insertvalue should be left in, since it may have other uses).
2795 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
2796 EV.getIndices());
2797 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
2798 ArrayRef(insi, inse));
2799 }
2800 if (insi == inse)
2801 // The insert list is a prefix of the extract list
2802 // We can simply remove the common indices from the extract and make it
2803 // operate on the inserted value instead of the insertvalue result.
2804 // i.e., replace
2805 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2806 // %E = extractvalue { i32, { i32 } } %I, 1, 0
2807 // with
2808 // %E extractvalue { i32 } { i32 42 }, 0
2809 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
2810 ArrayRef(exti, exte));
2811 }
2812
2813 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
2814 return R;
2815
2816 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
2817 // Bail out if the aggregate contains scalable vector type
2818 if (auto *STy = dyn_cast<StructType>(Agg->getType());
2819 STy && STy->containsScalableVectorType())
2820 return nullptr;
2821
2822 // If the (non-volatile) load only has one use, we can rewrite this to a
2823 // load from a GEP. This reduces the size of the load. If a load is used
2824 // only by extractvalue instructions then this either must have been
2825 // optimized before, or it is a struct with padding, in which case we
2826 // don't want to do the transformation as it loses padding knowledge.
2827 if (L->isSimple() && L->hasOneUse()) {
2828 // extractvalue has integer indices, getelementptr has Value*s. Convert.
2829 SmallVector<Value*, 4> Indices;
2830 // Prefix an i32 0 since we need the first element.
2831 Indices.push_back(Builder.getInt32(0));
2832 for (unsigned Idx : EV.indices())
2833 Indices.push_back(Builder.getInt32(Idx));
2834
2835 // We need to insert these at the location of the old load, not at that of
2836 // the extractvalue.
2838 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
2839 L->getPointerOperand(), Indices);
2841 // Whatever aliasing information we had for the orignal load must also
2842 // hold for the smaller load, so propagate the annotations.
2843 NL->setAAMetadata(L->getAAMetadata());
2844 // Returning the load directly will cause the main loop to insert it in
2845 // the wrong spot, so use replaceInstUsesWith().
2846 return replaceInstUsesWith(EV, NL);
2847 }
2848 }
2849
2850 if (auto *PN = dyn_cast<PHINode>(Agg))
2851 if (Instruction *Res = foldOpIntoPhi(EV, PN))
2852 return Res;
2853
2854 // We could simplify extracts from other values. Note that nested extracts may
2855 // already be simplified implicitly by the above: extract (extract (insert) )
2856 // will be translated into extract ( insert ( extract ) ) first and then just
2857 // the value inserted, if appropriate. Similarly for extracts from single-use
2858 // loads: extract (extract (load)) will be translated to extract (load (gep))
2859 // and if again single-use then via load (gep (gep)) to load (gep).
2860 // However, double extracts from e.g. function arguments or return values
2861 // aren't handled yet.
2862 return nullptr;
2863}
2864
2865/// Return 'true' if the given typeinfo will match anything.
2866static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
2867 switch (Personality) {
2871 // The GCC C EH and Rust personality only exists to support cleanups, so
2872 // it's not clear what the semantics of catch clauses are.
2873 return false;
2875 return false;
2877 // While __gnat_all_others_value will match any Ada exception, it doesn't
2878 // match foreign exceptions (or didn't, before gcc-4.7).
2879 return false;
2889 return TypeInfo->isNullValue();
2890 }
2891 llvm_unreachable("invalid enum");
2892}
2893
2894static bool shorter_filter(const Value *LHS, const Value *RHS) {
2895 return
2896 cast<ArrayType>(LHS->getType())->getNumElements()
2897 <
2898 cast<ArrayType>(RHS->getType())->getNumElements();
2899}
2900
2902 // The logic here should be correct for any real-world personality function.
2903 // However if that turns out not to be true, the offending logic can always
2904 // be conditioned on the personality function, like the catch-all logic is.
2905 EHPersonality Personality =
2906 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
2907
2908 // Simplify the list of clauses, eg by removing repeated catch clauses
2909 // (these are often created by inlining).
2910 bool MakeNewInstruction = false; // If true, recreate using the following:
2911 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
2912 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
2913
2914 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
2915 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
2916 bool isLastClause = i + 1 == e;
2917 if (LI.isCatch(i)) {
2918 // A catch clause.
2919 Constant *CatchClause = LI.getClause(i);
2920 Constant *TypeInfo = CatchClause->stripPointerCasts();
2921
2922 // If we already saw this clause, there is no point in having a second
2923 // copy of it.
2924 if (AlreadyCaught.insert(TypeInfo).second) {
2925 // This catch clause was not already seen.
2926 NewClauses.push_back(CatchClause);
2927 } else {
2928 // Repeated catch clause - drop the redundant copy.
2929 MakeNewInstruction = true;
2930 }
2931
2932 // If this is a catch-all then there is no point in keeping any following
2933 // clauses or marking the landingpad as having a cleanup.
2934 if (isCatchAll(Personality, TypeInfo)) {
2935 if (!isLastClause)
2936 MakeNewInstruction = true;
2937 CleanupFlag = false;
2938 break;
2939 }
2940 } else {
2941 // A filter clause. If any of the filter elements were already caught
2942 // then they can be dropped from the filter. It is tempting to try to
2943 // exploit the filter further by saying that any typeinfo that does not
2944 // occur in the filter can't be caught later (and thus can be dropped).
2945 // However this would be wrong, since typeinfos can match without being
2946 // equal (for example if one represents a C++ class, and the other some
2947 // class derived from it).
2948 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
2949 Constant *FilterClause = LI.getClause(i);
2950 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
2951 unsigned NumTypeInfos = FilterType->getNumElements();
2952
2953 // An empty filter catches everything, so there is no point in keeping any
2954 // following clauses or marking the landingpad as having a cleanup. By
2955 // dealing with this case here the following code is made a bit simpler.
2956 if (!NumTypeInfos) {
2957 NewClauses.push_back(FilterClause);
2958 if (!isLastClause)
2959 MakeNewInstruction = true;
2960 CleanupFlag = false;
2961 break;
2962 }
2963
2964 bool MakeNewFilter = false; // If true, make a new filter.
2965 SmallVector<Constant *, 16> NewFilterElts; // New elements.
2966 if (isa<ConstantAggregateZero>(FilterClause)) {
2967 // Not an empty filter - it contains at least one null typeinfo.
2968 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
2969 Constant *TypeInfo =
2971 // If this typeinfo is a catch-all then the filter can never match.
2972 if (isCatchAll(Personality, TypeInfo)) {
2973 // Throw the filter away.
2974 MakeNewInstruction = true;
2975 continue;
2976 }
2977
2978 // There is no point in having multiple copies of this typeinfo, so
2979 // discard all but the first copy if there is more than one.
2980 NewFilterElts.push_back(TypeInfo);
2981 if (NumTypeInfos > 1)
2982 MakeNewFilter = true;
2983 } else {
2984 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
2985 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
2986 NewFilterElts.reserve(NumTypeInfos);
2987
2988 // Remove any filter elements that were already caught or that already
2989 // occurred in the filter. While there, see if any of the elements are
2990 // catch-alls. If so, the filter can be discarded.
2991 bool SawCatchAll = false;
2992 for (unsigned j = 0; j != NumTypeInfos; ++j) {
2993 Constant *Elt = Filter->getOperand(j);
2994 Constant *TypeInfo = Elt->stripPointerCasts();
2995 if (isCatchAll(Personality, TypeInfo)) {
2996 // This element is a catch-all. Bail out, noting this fact.
2997 SawCatchAll = true;
2998 break;
2999 }
3000
3001 // Even if we've seen a type in a catch clause, we don't want to
3002 // remove it from the filter. An unexpected type handler may be
3003 // set up for a call site which throws an exception of the same
3004 // type caught. In order for the exception thrown by the unexpected
3005 // handler to propagate correctly, the filter must be correctly
3006 // described for the call site.
3007 //
3008 // Example:
3009 //
3010 // void unexpected() { throw 1;}
3011 // void foo() throw (int) {
3012 // std::set_unexpected(unexpected);
3013 // try {
3014 // throw 2.0;
3015 // } catch (int i) {}
3016 // }
3017
3018 // There is no point in having multiple copies of the same typeinfo in
3019 // a filter, so only add it if we didn't already.
3020 if (SeenInFilter.insert(TypeInfo).second)
3021 NewFilterElts.push_back(cast<Constant>(Elt));
3022 }
3023 // A filter containing a catch-all cannot match anything by definition.
3024 if (SawCatchAll) {
3025 // Throw the filter away.
3026 MakeNewInstruction = true;
3027 continue;
3028 }
3029
3030 // If we dropped something from the filter, make a new one.
3031 if (NewFilterElts.size() < NumTypeInfos)
3032 MakeNewFilter = true;
3033 }
3034 if (MakeNewFilter) {
3035 FilterType = ArrayType::get(FilterType->getElementType(),
3036 NewFilterElts.size());
3037 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3038 MakeNewInstruction = true;
3039 }
3040
3041 NewClauses.push_back(FilterClause);
3042
3043 // If the new filter is empty then it will catch everything so there is
3044 // no point in keeping any following clauses or marking the landingpad
3045 // as having a cleanup. The case of the original filter being empty was
3046 // already handled above.
3047 if (MakeNewFilter && !NewFilterElts.size()) {
3048 assert(MakeNewInstruction && "New filter but not a new instruction!");
3049 CleanupFlag = false;
3050 break;
3051 }
3052 }
3053 }
3054
3055 // If several filters occur in a row then reorder them so that the shortest
3056 // filters come first (those with the smallest number of elements). This is
3057 // advantageous because shorter filters are more likely to match, speeding up
3058 // unwinding, but mostly because it increases the effectiveness of the other
3059 // filter optimizations below.
3060 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3061 unsigned j;
3062 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3063 for (j = i; j != e; ++j)
3064 if (!isa<ArrayType>(NewClauses[j]->getType()))
3065 break;
3066
3067 // Check whether the filters are already sorted by length. We need to know
3068 // if sorting them is actually going to do anything so that we only make a
3069 // new landingpad instruction if it does.
3070 for (unsigned k = i; k + 1 < j; ++k)
3071 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3072 // Not sorted, so sort the filters now. Doing an unstable sort would be
3073 // correct too but reordering filters pointlessly might confuse users.
3074 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3076 MakeNewInstruction = true;
3077 break;
3078 }
3079
3080 // Look for the next batch of filters.
3081 i = j + 1;
3082 }
3083
3084 // If typeinfos matched if and only if equal, then the elements of a filter L
3085 // that occurs later than a filter F could be replaced by the intersection of
3086 // the elements of F and L. In reality two typeinfos can match without being
3087 // equal (for example if one represents a C++ class, and the other some class
3088 // derived from it) so it would be wrong to perform this transform in general.
3089 // However the transform is correct and useful if F is a subset of L. In that
3090 // case L can be replaced by F, and thus removed altogether since repeating a
3091 // filter is pointless. So here we look at all pairs of filters F and L where
3092 // L follows F in the list of clauses, and remove L if every element of F is
3093 // an element of L. This can occur when inlining C++ functions with exception
3094 // specifications.
3095 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3096 // Examine each filter in turn.
3097 Value *Filter = NewClauses[i];
3098 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3099 if (!FTy)
3100 // Not a filter - skip it.
3101 continue;
3102 unsigned FElts = FTy->getNumElements();
3103 // Examine each filter following this one. Doing this backwards means that
3104 // we don't have to worry about filters disappearing under us when removed.
3105 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3106 Value *LFilter = NewClauses[j];
3107 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3108 if (!LTy)
3109 // Not a filter - skip it.
3110 continue;
3111 // If Filter is a subset of LFilter, i.e. every element of Filter is also
3112 // an element of LFilter, then discard LFilter.
3113 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3114 // If Filter is empty then it is a subset of LFilter.
3115 if (!FElts) {
3116 // Discard LFilter.
3117 NewClauses.erase(J);
3118 MakeNewInstruction = true;
3119 // Move on to the next filter.
3120 continue;
3121 }
3122 unsigned LElts = LTy->getNumElements();
3123 // If Filter is longer than LFilter then it cannot be a subset of it.
3124 if (FElts > LElts)
3125 // Move on to the next filter.
3126 continue;
3127 // At this point we know that LFilter has at least one element.
3128 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3129 // Filter is a subset of LFilter iff Filter contains only zeros (as we
3130 // already know that Filter is not longer than LFilter).
3131 if (isa<ConstantAggregateZero>(Filter)) {
3132 assert(FElts <= LElts && "Should have handled this case earlier!");
3133 // Discard LFilter.
3134 NewClauses.erase(J);
3135 MakeNewInstruction = true;
3136 }
3137 // Move on to the next filter.
3138 continue;
3139 }
3140 ConstantArray *LArray = cast<ConstantArray>(LFilter);
3141 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3142 // Since Filter is non-empty and contains only zeros, it is a subset of
3143 // LFilter iff LFilter contains a zero.
3144 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3145 for (unsigned l = 0; l != LElts; ++l)
3146 if (LArray->getOperand(l)->isNullValue()) {
3147 // LFilter contains a zero - discard it.
3148 NewClauses.erase(J);
3149 MakeNewInstruction = true;
3150 break;
3151 }
3152 // Move on to the next filter.
3153 continue;
3154 }
3155 // At this point we know that both filters are ConstantArrays. Loop over
3156 // operands to see whether every element of Filter is also an element of
3157 // LFilter. Since filters tend to be short this is probably faster than
3158 // using a method that scales nicely.
3159 ConstantArray *FArray = cast<ConstantArray>(Filter);
3160 bool AllFound = true;
3161 for (unsigned f = 0; f != FElts; ++f) {
3162 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3163 AllFound = false;
3164 for (unsigned l = 0; l != LElts; ++l) {
3165 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3166 if (LTypeInfo == FTypeInfo) {
3167 AllFound = true;
3168 break;
3169 }
3170 }
3171 if (!AllFound)
3172 break;
3173 }
3174 if (AllFound) {
3175 // Discard LFilter.
3176 NewClauses.erase(J);
3177 MakeNewInstruction = true;
3178 }
3179 // Move on to the next filter.
3180 }
3181 }
3182
3183 // If we changed any of the clauses, replace the old landingpad instruction
3184 // with a new one.
3185 if (MakeNewInstruction) {
3186 LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
3187 NewClauses.size());
3188 for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3189 NLI->addClause(NewClauses[i]);
3190 // A landing pad with no clauses must have the cleanup flag set. It is
3191 // theoretically possible, though highly unlikely, that we eliminated all
3192 // clauses. If so, force the cleanup flag to true.
3193 if (NewClauses.empty())
3194 CleanupFlag = true;
3195 NLI->setCleanup(CleanupFlag);
3196 return NLI;
3197 }
3198
3199 // Even if none of the clauses changed, we may nonetheless have understood
3200 // that the cleanup flag is pointless. Clear it if so.
3201 if (LI.isCleanup() != CleanupFlag) {
3202 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3203 LI.setCleanup(CleanupFlag);
3204 return &LI;
3205 }
3206
3207 return nullptr;
3208}
3209
3210Value *
3212 // Try to push freeze through instructions that propagate but don't produce
3213 // poison as far as possible. If an operand of freeze follows three
3214 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3215 // guaranteed-non-poison operands then push the freeze through to the one
3216 // operand that is not guaranteed non-poison. The actual transform is as
3217 // follows.
3218 // Op1 = ... ; Op1 can be posion
3219 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3220 // ; single guaranteed-non-poison operands
3221 // ... = Freeze(Op0)
3222 // =>
3223 // Op1 = ...
3224 // Op1.fr = Freeze(Op1)
3225 // ... = Inst(Op1.fr, NonPoisonOps...)
3226 auto *OrigOp = OrigFI.getOperand(0);
3227 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3228
3229 // While we could change the other users of OrigOp to use freeze(OrigOp), that
3230 // potentially reduces their optimization potential, so let's only do this iff
3231 // the OrigOp is only used by the freeze.
3232 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3233 return nullptr;
3234
3235 // We can't push the freeze through an instruction which can itself create
3236 // poison. If the only source of new poison is flags, we can simply
3237 // strip them (since we know the only use is the freeze and nothing can
3238 // benefit from them.)
3239 if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
3240 /*ConsiderFlagsAndMetadata*/ false))
3241 return nullptr;
3242
3243 // If operand is guaranteed not to be poison, there is no need to add freeze
3244 // to the operand. So we first find the operand that is not guaranteed to be
3245 // poison.
3246 Use *MaybePoisonOperand = nullptr;
3247 for (Use &U : OrigOpInst->operands()) {
3248 if (isa<MetadataAsValue>(U.get()) ||
3250 continue;
3251 if (!MaybePoisonOperand)
3252 MaybePoisonOperand = &U;
3253 else
3254 return nullptr;
3255 }
3256
3257 OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
3258
3259 // If all operands are guaranteed to be non-poison, we can drop freeze.
3260 if (!MaybePoisonOperand)
3261 return OrigOp;
3262
3263 Builder.SetInsertPoint(OrigOpInst);
3264 auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3265 MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3266
3267 replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3268 return OrigOp;
3269}
3270
3272 PHINode *PN) {
3273 // Detect whether this is a recurrence with a start value and some number of
3274 // backedge values. We'll check whether we can push the freeze through the
3275 // backedge values (possibly dropping poison flags along the way) until we
3276 // reach the phi again. In that case, we can move the freeze to the start
3277 // value.
3278 Use *StartU = nullptr;
3280 for (Use &U : PN->incoming_values()) {
3281 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3282 // Add backedge value to worklist.
3283 Worklist.push_back(U.get());
3284 continue;
3285 }
3286
3287 // Don't bother handling multiple start values.
3288 if (StartU)
3289 return nullptr;
3290 StartU = &U;
3291 }
3292
3293 if (!StartU || Worklist.empty())
3294 return nullptr; // Not a recurrence.
3295
3296 Value *StartV = StartU->get();
3297 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3298 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3299 // We can't insert freeze if the the start value is the result of the
3300 // terminator (e.g. an invoke).
3301 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3302 return nullptr;
3303
3306 while (!Worklist.empty()) {
3307 Value *V = Worklist.pop_back_val();
3308 if (!Visited.insert(V).second)
3309 continue;
3310
3311 if (Visited.size() > 32)
3312 return nullptr; // Limit the total number of values we inspect.
3313
3314 // Assume that PN is non-poison, because it will be after the transform.
3315 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3316 continue;
3317
3318 Instruction *I = dyn_cast<Instruction>(V);
3319 if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3320 /*ConsiderFlagsAndMetadata*/ false))
3321 return nullptr;
3322
3323 DropFlags.push_back(I);
3324 append_range(Worklist, I->operands());
3325 }
3326
3327 for (Instruction *I : DropFlags)
3328 I->dropPoisonGeneratingFlagsAndMetadata();
3329
3330 if (StartNeedsFreeze) {
3332 Value *FrozenStartV = Builder.CreateFreeze(StartV,
3333 StartV->getName() + ".fr");
3334 replaceUse(*StartU, FrozenStartV);
3335 }
3336 return replaceInstUsesWith(FI, PN);
3337}
3338
3340 Value *Op = FI.getOperand(0);
3341
3342 if (isa<Constant>(Op) || Op->hasOneUse())
3343 return false;
3344
3345 // Move the freeze directly after the definition of its operand, so that
3346 // it dominates the maximum number of uses. Note that it may not dominate
3347 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3348 // the normal/default destination. This is why the domination check in the
3349 // replacement below is still necessary.
3350 Instruction *MoveBefore;
3351 if (isa<Argument>(Op)) {
3352 MoveBefore =
3354 } else {
3355 MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3356 if (!MoveBefore)
3357 return false;
3358 }
3359
3360 bool Changed = false;
3361 if (&FI != MoveBefore) {
3362 FI.moveBefore(MoveBefore);
3363 Changed = true;
3364 }
3365
3366 Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3367 bool Dominates = DT.dominates(&FI, U);
3368 Changed |= Dominates;
3369 return Dominates;
3370 });
3371
3372 return Changed;
3373}
3374
3375// Check if any direct or bitcast user of this value is a shuffle instruction.
3377 for (auto *U : V->users()) {
3378 if (isa<ShuffleVectorInst>(U))
3379 return true;
3380 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
3381 return true;
3382 }
3383 return false;
3384}
3385
3387 Value *Op0 = I.getOperand(0);
3388
3390 return replaceInstUsesWith(I, V);
3391
3392 // freeze (phi const, x) --> phi const, (freeze x)
3393 if (auto *PN = dyn_cast<PHINode>(Op0)) {
3394 if (Instruction *NV = foldOpIntoPhi(I, PN))
3395 return NV;
3396 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3397 return NV;
3398 }
3399
3401 return replaceInstUsesWith(I, NI);
3402
3403 // If I is freeze(undef), check its uses and fold it to a fixed constant.
3404 // - or: pick -1
3405 // - select's condition: if the true value is constant, choose it by making
3406 // the condition true.
3407 // - default: pick 0
3408 //
3409 // Note that this transform is intentionally done here rather than
3410 // via an analysis in InstSimplify or at individual user sites. That is
3411 // because we must produce the same value for all uses of the freeze -
3412 // it's the reason "freeze" exists!
3413 //
3414 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3415 // duplicating logic for binops at least.
3416 auto getUndefReplacement = [&I](Type *Ty) {
3417 Constant *BestValue = nullptr;
3418 Constant *NullValue = Constant::getNullValue(Ty);
3419 for (const auto *U : I.users()) {
3420 Constant *C = NullValue;
3421 if (match(U, m_Or(m_Value(), m_Value())))
3423 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
3424 C = ConstantInt::getTrue(Ty);
3425
3426 if (!BestValue)
3427 BestValue = C;
3428 else if (BestValue != C)
3429 BestValue = NullValue;
3430 }
3431 assert(BestValue && "Must have at least one use");
3432 return BestValue;
3433 };
3434
3435 if (match(Op0, m_Undef())) {
3436 // Don't fold freeze(undef/poison) if it's used as a vector operand in
3437 // a shuffle. This may improve codegen for shuffles that allow
3438 // unspecified inputs.
3440 return nullptr;
3441 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
3442 }
3443
3444 Constant *C;
3445 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
3446 Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
3448 }
3449
3450 // Replace uses of Op with freeze(Op).
3451 if (freezeOtherUses(I))
3452 return &I;
3453
3454 return nullptr;
3455}
3456
3457/// Check for case where the call writes to an otherwise dead alloca. This
3458/// shows up for unused out-params in idiomatic C/C++ code. Note that this
3459/// helper *only* analyzes the write; doesn't check any other legality aspect.
3461 auto *CB = dyn_cast<CallBase>(I);
3462 if (!CB)
3463 // TODO: handle e.g. store to alloca here - only worth doing if we extend
3464 // to allow reload along used path as described below. Otherwise, this
3465 // is simply a store to a dead allocation which will be removed.
3466 return false;
3467 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
3468 if (!Dest)
3469 return false;
3470 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
3471 if (!AI)
3472 // TODO: allow malloc?
3473 return false;
3474 // TODO: allow memory access dominated by move point? Note that since AI
3475 // could have a reference to itself captured by the call, we would need to
3476 // account for cycles in doing so.
3477 SmallVector<const User *> AllocaUsers;
3479 auto pushUsers = [&](const Instruction &I) {
3480 for (const User *U : I.users()) {
3481 if (Visited.insert(U).second)
3482 AllocaUsers.push_back(U);
3483 }
3484 };
3485 pushUsers(*AI);
3486 while (!AllocaUsers.empty()) {
3487 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
3488 if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
3489 isa<AddrSpaceCastInst>(UserI)) {
3490 pushUsers(*UserI);
3491 continue;
3492 }
3493 if (UserI == CB)
3494 continue;
3495 // TODO: support lifetime.start/end here
3496 return false;
3497 }
3498 return true;
3499}
3500
3501/// Try to move the specified instruction from its current block into the
3502/// beginning of DestBlock, which can only happen if it's safe to move the
3503/// instruction past all of the instructions between it and the end of its
3504/// block.
3506 BasicBlock *DestBlock) {
3507 BasicBlock *SrcBlock = I->getParent();
3508
3509 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3510 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
3511 I->isTerminator())
3512 return false;
3513
3514 // Do not sink static or dynamic alloca instructions. Static allocas must
3515 // remain in the entry block, and dynamic allocas must not be sunk in between
3516 // a stacksave / stackrestore pair, which would incorrectly shorten its
3517 // lifetime.
3518 if (isa<AllocaInst>(I))
3519 return false;
3520
3521 // Do not sink into catchswitch blocks.
3522 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3523 return false;
3524
3525 // Do not sink convergent call instructions.
3526 if (auto *CI = dyn_cast<CallInst>(I)) {
3527 if (CI->isConvergent())
3528 return false;
3529 }
3530
3531 // Unless we can prove that the memory write isn't visibile except on the
3532 // path we're sinking to, we must bail.
3533 if (I->mayWriteToMemory()) {
3534 if (!SoleWriteToDeadLocal(I, TLI))
3535 return false;
3536 }
3537
3538 // We can only sink load instructions if there is nothing between the load and
3539 // the end of block that could change the value.
3540 if (I->mayReadFromMemory()) {
3541 // We don't want to do any sophisticated alias analysis, so we only check
3542 // the instructions after I in I's parent block if we try to sink to its
3543 // successor block.
3544 if (DestBlock->getUniquePredecessor() != I->getParent())
3545 return false;
3546 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
3547 E = I->getParent()->end();
3548 Scan != E; ++Scan)
3549 if (Scan->mayWriteToMemory())
3550 return false;
3551 }
3552
3553 I->dropDroppableUses([&](const Use *U) {
3554 auto *I = dyn_cast<Instruction>(U->getUser());
3555 if (I && I->getParent() != DestBlock) {
3556 Worklist.add(I);
3557 return true;
3558 }
3559 return false;
3560 });
3561 /// FIXME: We could remove droppable uses that are not dominated by
3562 /// the new position.
3563
3564 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3565 I->moveBefore(&*InsertPos);
3566 ++NumSunkInst;
3567
3568 // Also sink all related debug uses from the source basic block. Otherwise we
3569 // get debug use before the def. Attempt to salvage debug uses first, to
3570 // maximise the range variables have location for. If we cannot salvage, then
3571 // mark the location undef: we know it was supposed to receive a new location
3572 // here, but that computation has been sunk.
3574 findDbgUsers(DbgUsers, I);
3575 // Process the sinking DbgUsers in reverse order, as we only want to clone the
3576 // last appearing debug intrinsic for each given variable.
3578 for (DbgVariableIntrinsic *DVI : DbgUsers)
3579 if (DVI->getParent() == SrcBlock)
3580 DbgUsersToSink.push_back(DVI);
3581 llvm::sort(DbgUsersToSink,
3582 [](auto *A, auto *B) { return B->comesBefore(A); });
3583
3585 SmallSet<DebugVariable, 4> SunkVariables;
3586 for (auto *User : DbgUsersToSink) {
3587 // A dbg.declare instruction should not be cloned, since there can only be
3588 // one per variable fragment. It should be left in the original place
3589 // because the sunk instruction is not an alloca (otherwise we could not be
3590 // here).
3591 if (isa<DbgDeclareInst>(User))
3592 continue;
3593
3594 DebugVariable DbgUserVariable =
3595 DebugVariable(User->getVariable(), User->getExpression(),
3596 User->getDebugLoc()->getInlinedAt());
3597
3598 if (!SunkVariables.insert(DbgUserVariable).second)
3599 continue;
3600
3601 // Leave dbg.assign intrinsics in their original positions and there should
3602 // be no need to insert a clone.
3603 if (isa<DbgAssignIntrinsic>(User))
3604 continue;
3605
3606 DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
3607 if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
3608 DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
3609 LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
3610 }
3611
3612 // Perform salvaging without the clones, then sink the clones.
3613 if (!DIIClones.empty()) {
3614 salvageDebugInfoForDbgValues(*I, DbgUsers);
3615 // The clones are in reverse order of original appearance, reverse again to
3616 // maintain the original order.
3617 for (auto &DIIClone : llvm::reverse(DIIClones)) {
3618 DIIClone->insertBefore(&*InsertPos);
3619 LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
3620 }
3621 }
3622
3623 return true;
3624}
3625
3627 while (!Worklist.isEmpty()) {
3628 // Walk deferred instructions in reverse order, and push them to the
3629 // worklist, which means they'll end up popped from the worklist in-order.
3630 while (Instruction *I = Worklist.popDeferred()) {
3631 // Check to see if we can DCE the instruction. We do this already here to
3632 // reduce the number of uses and thus allow other folds to trigger.
3633 // Note that eraseInstFromFunction() may push additional instructions on
3634 // the deferred worklist, so this will DCE whole instruction chains.
3637 ++NumDeadInst;
3638 continue;
3639 }
3640
3641 Worklist.push(I);
3642 }
3643
3645 if (I == nullptr) continue; // skip null values.
3646
3647 // Check to see if we can DCE the instruction.
3650 ++NumDeadInst;
3651 continue;
3652 }
3653
3654 if (!DebugCounter::shouldExecute(VisitCounter))
3655 continue;
3656
3657 // See if we can trivially sink this instruction to its user if we can
3658 // prove that the successor is not executed more frequently than our block.
3659 // Return the UserBlock if successful.
3660 auto getOptionalSinkBlockForInst =
3661 [this](Instruction *I) -> std::optional<BasicBlock *> {
3662 if (!EnableCodeSinking)
3663 return std::nullopt;
3664
3665 BasicBlock *BB = I->getParent();
3666 BasicBlock *UserParent = nullptr;
3667 unsigned NumUsers = 0;
3668
3669 for (auto *U : I->users()) {
3670 if (U->isDroppable())
3671 continue;
3672 if (NumUsers > MaxSinkNumUsers)
3673 return std::nullopt;
3674
3675 Instruction *UserInst = cast<Instruction>(U);
3676 // Special handling for Phi nodes - get the block the use occurs in.
3677 if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
3678 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
3679 if (PN->getIncomingValue(i) == I) {
3680 // Bail out if we have uses in different blocks. We don't do any
3681 // sophisticated analysis (i.e finding NearestCommonDominator of
3682 // these use blocks).
3683 if (UserParent && UserParent != PN->getIncomingBlock(i))
3684 return std::nullopt;
3685 UserParent = PN->getIncomingBlock(i);
3686 }
3687 }
3688 assert(UserParent && "expected to find user block!");
3689 } else {
3690 if (UserParent && UserParent != UserInst->getParent())
3691 return std::nullopt;
3692 UserParent = UserInst->getParent();
3693 }
3694
3695 // Make sure these checks are done only once, naturally we do the checks
3696 // the first time we get the userparent, this will save compile time.
3697 if (NumUsers == 0) {
3698 // Try sinking to another block. If that block is unreachable, then do
3699 // not bother. SimplifyCFG should handle it.
3700 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
3701 return std::nullopt;
3702
3703 auto *Term = UserParent->getTerminator();
3704 // See if the user is one of our successors that has only one
3705 // predecessor, so that we don't have to split the critical edge.
3706 // Another option where we can sink is a block that ends with a
3707 // terminator that does not pass control to other block (such as
3708 // return or unreachable or resume). In this case:
3709 // - I dominates the User (by SSA form);
3710 // - the User will be executed at most once.
3711 // So sinking I down to User is always profitable or neutral.
3712 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
3713 return std::nullopt;
3714
3715 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
3716 }
3717
3718 NumUsers++;
3719 }
3720
3721 // No user or only has droppable users.
3722 if (!UserParent)
3723 return std::nullopt;
3724
3725 return UserParent;
3726 };
3727
3728 auto OptBB = getOptionalSinkBlockForInst(I);
3729 if (OptBB) {
3730 auto *UserParent = *OptBB;
3731 // Okay, the CFG is simple enough, try to sink this instruction.
3732 if (tryToSinkInstruction(I, UserParent)) {
3733 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
3734 MadeIRChange = true;
3735 // We'll add uses of the sunk instruction below, but since
3736 // sinking can expose opportunities for it's *operands* add
3737 // them to the worklist
3738 for (Use &U : I->operands())
3739 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
3740 Worklist.push(OpI);
3741 }
3742 }
3743
3744 // Now that we have an instruction, try combining it to simplify it.
3747 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
3748
3749#ifndef NDEBUG
3750 std::string OrigI;
3751#endif
3752 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
3753 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
3754
3755 if (Instruction *Result = visit(*I)) {
3756 ++NumCombined;
3757 // Should we replace the old instruction with a new one?
3758 if (Result != I) {
3759 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
3760 << " New = " << *Result << '\n');
3761
3762 Result->copyMetadata(*I,
3763 {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
3764 // Everything uses the new instruction now.
3765 I->replaceAllUsesWith(Result);
3766
3767 // Move the name to the new instruction first.
3768 Result->takeName(I);
3769
3770 // Insert the new instruction into the basic block...
3771 BasicBlock *InstParent = I->getParent();
3772 BasicBlock::iterator InsertPos = I->getIterator();
3773
3774 // Are we replace a PHI with something that isn't a PHI, or vice versa?
3775 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
3776 // We need to fix up the insertion point.
3777 if (isa<PHINode>(I)) // PHI -> Non-PHI
3778 InsertPos = InstParent->getFirstInsertionPt();
3779 else // Non-PHI -> PHI
3780 InsertPos = InstParent->getFirstNonPHI()->getIterator();
3781 }
3782
3783 Result->insertInto(InstParent, InsertPos);
3784
3785 // Push the new instruction and any users onto the worklist.
3787 Worklist.push(Result);
3788
3790 } else {
3791 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
3792 << " New = " << *I << '\n');
3793
3794 // If the instruction was modified, it's possible that it is now dead.
3795 // if so, remove it.
3798 } else {
3800 Worklist.push(I);
3801 }
3802 }
3803 MadeIRChange = true;
3804 }
3805 }
3806
3807 Worklist.zap();
3808 return MadeIRChange;
3809}
3810
3811// Track the scopes used by !alias.scope and !noalias. In a function, a
3812// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
3813// by both sets. If not, the declaration of the scope can be safely omitted.
3814// The MDNode of the scope can be omitted as well for the instructions that are
3815// part of this function. We do not do that at this point, as this might become
3816// too time consuming to do.
3818 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
3819 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
3820
3821public:
3823 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
3824 if (!I->hasMetadataOtherThanDebugLoc())
3825 return;
3826
3827 auto Track = [](Metadata *ScopeList, auto &Container) {
3828 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
3829 if (!MDScopeList || !Container.insert(MDScopeList).second)
3830 return;
3831 for (const auto &MDOperand : MDScopeList->operands())
3832 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
3833 Container.insert(MDScope);
3834 };
3835
3836 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
3837 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
3838 }
3839
3841 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
3842 if (!Decl)
3843 return false;
3844
3845 assert(Decl->use_empty() &&
3846 "llvm.experimental.noalias.scope.decl in use ?");
3847 const MDNode *MDSL = Decl->getScopeList();
3848 assert(MDSL->getNumOperands() == 1 &&
3849 "llvm.experimental.noalias.scope should refer to a single scope");
3850 auto &MDOperand = MDSL->getOperand(0);
3851 if (auto *MD = dyn_cast<MDNode>(MDOperand))
3852 return !UsedAliasScopesAndLists.contains(MD) ||
3853 !UsedNoAliasScopesAndLists.contains(MD);
3854
3855 // Not an MDNode ? throw away.
3856 return true;
3857 }
3858};
3859
3860/// Populate the IC worklist from a function, by walking it in depth-first
3861/// order and adding all reachable code to the worklist.
3862///
3863/// This has a couple of tricks to make the code faster and more powerful. In
3864/// particular, we constant fold and DCE instructions as we go, to avoid adding
3865/// them to the worklist (this significantly speeds up instcombine on code where
3866/// many instructions are dead or constant). Additionally, if we find a branch
3867/// whose condition is a known constant, we only visit the reachable successors.
3869 const TargetLibraryInfo *TLI,
3870 InstructionWorklist &ICWorklist) {
3871 bool MadeIRChange = false;
3874 Worklist.push_back(&F.front());
3875
3876 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
3877 DenseMap<Constant *, Constant *> FoldedConstants;
3878 AliasScopeTracker SeenAliasScopes;
3879
3880 do {
3881 BasicBlock *BB = Worklist.pop_back_val();
3882
3883 // We have now visited this block! If we've already been here, ignore it.
3884 if (!Visited.insert(BB).second)
3885 continue;
3886
3887 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
3888 // ConstantProp instruction if trivially constant.
3889 if (!Inst.use_empty() &&
3890 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
3891 if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
3892 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
3893 << '\n');
3894 Inst.replaceAllUsesWith(C);
3895 ++NumConstProp;
3896 if (isInstructionTriviallyDead(&Inst, TLI))
3897 Inst.eraseFromParent();
3898 MadeIRChange = true;
3899 continue;
3900 }
3901
3902 // See if we can constant fold its operands.
3903 for (Use &U : Inst.operands()) {
3904 if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
3905 continue;
3906
3907 auto *C = cast<Constant>(U);
3908 Constant *&FoldRes = FoldedConstants[C];
3909 if (!FoldRes)
3910 FoldRes = ConstantFoldConstant(C, DL, TLI);
3911
3912 if (FoldRes != C) {
3913 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
3914 << "\n Old = " << *C
3915 << "\n New = " << *FoldRes << '\n');
3916 U = FoldRes;
3917 MadeIRChange = true;
3918 }
3919 }
3920
3921 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
3922 // these call instructions consumes non-trivial amount of time and
3923 // provides no value for the optimization.
3924 if (!Inst.isDebugOrPseudoInst()) {
3925 InstrsForInstructionWorklist.push_back(&Inst);
3926 SeenAliasScopes.analyse(&Inst);
3927 }
3928 }
3929
3930 // Recursively visit successors. If this is a branch or switch on a
3931 // constant, only visit the reachable successor.
3932 Instruction *TI = BB->getTerminator();
3933 if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
3934 if (isa<UndefValue>(BI->getCondition()))
3935 // Branch on undef is UB.
3936 continue;
3937 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
3938 bool CondVal = Cond->getZExtValue();
3939 BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
3940 Worklist.push_back(ReachableBB);
3941 continue;
3942 }
3943 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3944 if (isa<UndefValue>(SI->getCondition()))
3945 // Switch on undef is UB.
3946 continue;
3947 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
3948 Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
3949 continue;
3950 }
3951 }
3952
3953 append_range(Worklist, successors(TI));
3954 } while (!Worklist.empty());
3955
3956 // Remove instructions inside unreachable blocks. This prevents the
3957 // instcombine code from having to deal with some bad special cases, and
3958 // reduces use counts of instructions.
3959 for (BasicBlock &BB : F) {
3960 if (Visited.count(&BB))
3961 continue;
3962
3963 unsigned NumDeadInstInBB;
3964 unsigned NumDeadDbgInstInBB;
3965 std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
3967
3968 MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
3969 NumDeadInst += NumDeadInstInBB;
3970 }
3971
3972 // Once we've found all of the instructions to add to instcombine's worklist,
3973 // add them in reverse order. This way instcombine will visit from the top
3974 // of the function down. This jives well with the way that it adds all uses
3975 // of instructions to the worklist after doing a transformation, thus avoiding
3976 // some N^2 behavior in pathological cases.
3977 ICWorklist.reserve(InstrsForInstructionWorklist.size());
3978 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
3979 // DCE instruction if trivially dead. As we iterate in reverse program
3980 // order here, we will clean up whole chains of dead instructions.
3981 if (isInstructionTriviallyDead(Inst, TLI) ||
3982 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
3983 ++NumDeadInst;
3984 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
3985 salvageDebugInfo(*Inst);
3986 Inst->eraseFromParent();
3987 MadeIRChange = true;
3988 continue;
3989 }
3990
3991 ICWorklist.push(Inst);
3992 }
3993
3994 return MadeIRChange;
3995}
3996
4001 ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4002 auto &DL = F.getParent()->getDataLayout();
4003
4004 /// Builder - This is an IRBuilder that automatically inserts new
4005 /// instructions into the worklist when they are created.
4007 F.getContext(), TargetFolder(DL),
4008 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4009 Worklist.add(I);
4010 if (auto *Assume = dyn_cast<AssumeInst>(I))
4011 AC.registerAssumption(Assume);
4012 }));
4013
4014 // Lower dbg.declare intrinsics otherwise their value may be clobbered
4015 // by instcombiner.
4016 bool MadeIRChange = false;
4018 MadeIRChange = LowerDbgDeclare(F);
4019
4020 // Iterate while there is work to do.
4021 unsigned Iteration = 0;
4022 while (true) {
4023 ++NumWorklistIterations;
4024 ++Iteration;
4025
4026 if (Iteration > InfiniteLoopDetectionThreshold) {
4028 "Instruction Combining seems stuck in an infinite loop after " +
4029 Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4030 }
4031
4032 if (Iteration > MaxIterations) {
4033 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4034 << " on " << F.getName()
4035 << " reached; stopping before reaching a fixpoint\n");
4036 break;
4037 }
4038
4039 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4040 << F.getName() << "\n");
4041
4042 MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4043
4044 InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4045 ORE, BFI, PSI, DL, LI);
4047
4048 if (!IC.run())
4049 break;
4050
4051 MadeIRChange = true;
4052 }
4053
4054 return MadeIRChange;
4055}
4056
4058
4060 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
4061 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
4062 OS, MapClassName2PassName);
4063 OS << '<';
4064 OS << "max-iterations=" << Options.MaxIterations << ";";
4065 OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info";
4066 OS << '>';
4067}
4068
4071 auto &AC = AM.getResult<AssumptionAnalysis>(F);
4072 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4073 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4075 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4076
4077 // TODO: Only use LoopInfo when the option is set. This requires that the
4078 // callers in the pass pipeline explicitly set the option.
4079 auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4080 if (!LI && Options.UseLoopInfo)
4081 LI = &AM.getResult<LoopAnalysis>(F);
4082
4083 auto *AA = &AM.getResult<AAManager>(F);
4084 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4085 ProfileSummaryInfo *PSI =
4086 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4087 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4088 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4089
4090 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4091 BFI, PSI, Options.MaxIterations, LI))
4092 // No changes, all analyses are preserved.
4093 return PreservedAnalyses::all();
4094
4095 // Mark all the analyses that instcombine updates as preserved.
4098 return PA;
4099}
4100
4102 AU.setPreservesCFG();
4115}
4116
4118 if (skipFunction(F))
4119 return false;
4120
4121 // Required analyses.
4122 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4123 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4124 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4125 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4126 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4127 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4128
4129 // Optional analyses.
4130 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4131 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4132 ProfileSummaryInfo *PSI =
4133 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4134 BlockFrequencyInfo *BFI =
4135 (PSI && PSI->hasProfileSummary()) ?
4136 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4137 nullptr;
4138
4139 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4140 BFI, PSI, MaxIterations, LI);
4141}
4142
4144
4148}
4149
4151 : FunctionPass(ID), MaxIterations(MaxIterations) {
4153}
4154
4156 "Combine redundant instructions", false, false)
4168
4169// Initialization Routines
4172}
4173
4175 return new InstructionCombiningPass();
4176}
4177
4179 return new InstructionCombiningPass(MaxIterations);
4180}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
static const Function * getParent(const Value *V)
This is the interface for LLVM's primary stateless and local alias analysis.
SmallVector< MachineOperand, 4 > Cond
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define NL
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
Hexagon Vector Combine
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static bool handlePotentiallyDeadBlock(BasicBlock *BB, InstCombiner &IC)
If a block is dead due to a known branch condition, remove instructions in it.
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI)
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static bool handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc, InstCombiner &IC)
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Constant * constantFoldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2)
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
print must be executed print the must be executed context for all instructions
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
@ Flags
Definition: TextStubV5.cpp:93
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:77
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:415
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:897
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1925
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1132
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1938
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:793
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
Class to represent array types.
Definition: DerivedTypes.h:368
uint64_t getNumElements() const
Definition: DerivedTypes.h:380
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:708
Type * getElementType() const
Definition: DerivedTypes.h:381
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
Definition: Attributes.cpp:353
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:187
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:217
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:293
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:301
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
reverse_iterator rend()
Definition: BasicBlock.h:330
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
Definition: BasicBlock.cpp:264
size_t size() const
Definition: BasicBlock.h:333
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:301
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1494
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1927
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1490
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_EQ
equal
Definition: InstrTypes.h:732
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
ConstantArray - Constant Array Declarations.
Definition: Constants.h:408
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1235
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:751
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1957
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2600
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2626
static Constant * get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a binary or shift operator constant expression, folding if possible.
Definition: Constants.cpp:2247
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2672
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2075
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2581
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This class represents a range of values.
Definition: ConstantRange.h:47
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition: Constants.h:492
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1342
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
Definition: Constants.cpp:753
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
const Constant * stripPointerCasts() const
Definition: Constant.h:213
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:418
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
SmallVector< APInt > getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const
Get GEP indices to access Offset inside ElemTy.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:772
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:908
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:416
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef< Value * > Indices) const
Returns the offset from the beginning of the type for the specified indices.
Definition: DataLayout.cpp:923
This is the common base class for debug info intrinsics for variables.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
Identifies a unique instance of a variable.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:170
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
const BasicBlock & getEntryBlock() const
Definition: Function.h:749
static bool isTargetIntrinsic(Intrinsic::ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Function.cpp:844
bool isInBounds() const
Test whether this is an inbounds GEP, as defined by LangRef.html.
Definition: Operator.h:395
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:454
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:993
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1926
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2411
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1134
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2430
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:297
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1805
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:219
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2292