LLVM  15.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Analysis/CFG.h"
55 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/IR/BasicBlock.h"
66 #include "llvm/IR/CFG.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugInfo.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Function.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InstrTypes.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/IR/Operator.h"
85 #include "llvm/IR/PassManager.h"
86 #include "llvm/IR/PatternMatch.h"
87 #include "llvm/IR/Type.h"
88 #include "llvm/IR/Use.h"
89 #include "llvm/IR/User.h"
90 #include "llvm/IR/Value.h"
91 #include "llvm/IR/ValueHandle.h"
92 #include "llvm/InitializePasses.h"
93 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
103 #include <algorithm>
104 #include <cassert>
105 #include <cstdint>
106 #include <memory>
107 #include <string>
108 #include <utility>
109 
110 #define DEBUG_TYPE "instcombine"
112 
113 using namespace llvm;
114 using namespace llvm::PatternMatch;
115 
116 STATISTIC(NumWorklistIterations,
117  "Number of instruction combining iterations performed");
118 
119 STATISTIC(NumCombined , "Number of insts combined");
120 STATISTIC(NumConstProp, "Number of constant folds");
121 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
122 STATISTIC(NumSunkInst , "Number of instructions sunk");
123 STATISTIC(NumExpand, "Number of expansions");
124 STATISTIC(NumFactor , "Number of factorizations");
125 STATISTIC(NumReassoc , "Number of reassociations");
126 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
127  "Controls which instructions are visited");
128 
129 // FIXME: these limits eventually should be as low as 2.
130 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
131 #ifndef NDEBUG
132 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
133 #else
134 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
135 #endif
136 
137 static cl::opt<bool>
138 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
139  cl::init(true));
140 
142  "instcombine-max-sink-users", cl::init(32),
143  cl::desc("Maximum number of undroppable users for instruction sinking"));
144 
146  "instcombine-max-iterations",
147  cl::desc("Limit the maximum number of instruction combining iterations"),
149 
151  "instcombine-infinite-loop-threshold",
152  cl::desc("Number of instruction combining iterations considered an "
153  "infinite loop"),
155 
156 static cl::opt<unsigned>
157 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
158  cl::desc("Maximum array size considered when doing a combine"));
159 
160 // FIXME: Remove this flag when it is no longer necessary to convert
161 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
162 // increases variable availability at the cost of accuracy. Variables that
163 // cannot be promoted by mem2reg or SROA will be described as living in memory
164 // for their entire lifetime. However, passes like DSE and instcombine can
165 // delete stores to the alloca, leading to misleading and inaccurate debug
166 // information. This flag can be removed when those passes are fixed.
167 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
168  cl::Hidden, cl::init(true));
169 
172  // Handle target specific intrinsics
173  if (II.getCalledFunction()->isTargetIntrinsic()) {
174  return TTI.instCombineIntrinsic(*this, II);
175  }
176  return None;
177 }
178 
180  IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
181  bool &KnownBitsComputed) {
182  // Handle target specific intrinsics
183  if (II.getCalledFunction()->isTargetIntrinsic()) {
184  return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
185  KnownBitsComputed);
186  }
187  return None;
188 }
189 
191  IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
192  APInt &UndefElts3,
193  std::function<void(Instruction *, unsigned, APInt, APInt &)>
194  SimplifyAndSetOp) {
195  // Handle target specific intrinsics
196  if (II.getCalledFunction()->isTargetIntrinsic()) {
198  *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
199  SimplifyAndSetOp);
200  }
201  return None;
202 }
203 
204 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
205  return llvm::EmitGEPOffset(&Builder, DL, GEP);
206 }
207 
208 /// Legal integers and common types are considered desirable. This is used to
209 /// avoid creating instructions with types that may not be supported well by the
210 /// the backend.
211 /// NOTE: This treats i8, i16 and i32 specially because they are common
212 /// types in frontend languages.
213 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
214  switch (BitWidth) {
215  case 8:
216  case 16:
217  case 32:
218  return true;
219  default:
220  return DL.isLegalInteger(BitWidth);
221  }
222 }
223 
224 /// Return true if it is desirable to convert an integer computation from a
225 /// given bit width to a new bit width.
226 /// We don't want to convert from a legal to an illegal type or from a smaller
227 /// to a larger illegal type. A width of '1' is always treated as a desirable
228 /// type because i1 is a fundamental type in IR, and there are many specialized
229 /// optimizations for i1 types. Common/desirable widths are equally treated as
230 /// legal to convert to, in order to open up more combining opportunities.
231 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
232  unsigned ToWidth) const {
233  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
234  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
235 
236  // Convert to desirable widths even if they are not legal types.
237  // Only shrink types, to prevent infinite loops.
238  if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
239  return true;
240 
241  // If this is a legal integer from type, and the result would be an illegal
242  // type, don't do the transformation.
243  if (FromLegal && !ToLegal)
244  return false;
245 
246  // Otherwise, if both are illegal, do not increase the size of the result. We
247  // do allow things like i160 -> i64, but not i64 -> i160.
248  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
249  return false;
250 
251  return true;
252 }
253 
254 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
255 /// We don't want to convert from a legal to an illegal type or from a smaller
256 /// to a larger illegal type. i1 is always treated as a legal type because it is
257 /// a fundamental type in IR, and there are many specialized optimizations for
258 /// i1 types.
259 bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
260  // TODO: This could be extended to allow vectors. Datalayout changes might be
261  // needed to properly support that.
262  if (!From->isIntegerTy() || !To->isIntegerTy())
263  return false;
264 
265  unsigned FromWidth = From->getPrimitiveSizeInBits();
266  unsigned ToWidth = To->getPrimitiveSizeInBits();
267  return shouldChangeType(FromWidth, ToWidth);
268 }
269 
270 // Return true, if No Signed Wrap should be maintained for I.
271 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
272 // where both B and C should be ConstantInts, results in a constant that does
273 // not overflow. This function only handles the Add and Sub opcodes. For
274 // all other opcodes, the function conservatively returns false.
276  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
277  if (!OBO || !OBO->hasNoSignedWrap())
278  return false;
279 
280  // We reason about Add and Sub Only.
281  Instruction::BinaryOps Opcode = I.getOpcode();
282  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
283  return false;
284 
285  const APInt *BVal, *CVal;
286  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
287  return false;
288 
289  bool Overflow = false;
290  if (Opcode == Instruction::Add)
291  (void)BVal->sadd_ov(*CVal, Overflow);
292  else
293  (void)BVal->ssub_ov(*CVal, Overflow);
294 
295  return !Overflow;
296 }
297 
299  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
300  return OBO && OBO->hasNoUnsignedWrap();
301 }
302 
304  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
305  return OBO && OBO->hasNoSignedWrap();
306 }
307 
308 /// Conservatively clears subclassOptionalData after a reassociation or
309 /// commutation. We preserve fast-math flags when applicable as they can be
310 /// preserved.
312  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
313  if (!FPMO) {
314  I.clearSubclassOptionalData();
315  return;
316  }
317 
318  FastMathFlags FMF = I.getFastMathFlags();
319  I.clearSubclassOptionalData();
320  I.setFastMathFlags(FMF);
321 }
322 
323 /// Combine constant operands of associative operations either before or after a
324 /// cast to eliminate one of the associative operations:
325 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
326 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
328  InstCombinerImpl &IC) {
329  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
330  if (!Cast || !Cast->hasOneUse())
331  return false;
332 
333  // TODO: Enhance logic for other casts and remove this check.
334  auto CastOpcode = Cast->getOpcode();
335  if (CastOpcode != Instruction::ZExt)
336  return false;
337 
338  // TODO: Enhance logic for other BinOps and remove this check.
339  if (!BinOp1->isBitwiseLogicOp())
340  return false;
341 
342  auto AssocOpcode = BinOp1->getOpcode();
343  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
344  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
345  return false;
346 
347  Constant *C1, *C2;
348  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
349  !match(BinOp2->getOperand(1), m_Constant(C2)))
350  return false;
351 
352  // TODO: This assumes a zext cast.
353  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
354  // to the destination type might lose bits.
355 
356  // Fold the constants together in the destination type:
357  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
358  Type *DestTy = C1->getType();
359  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
360  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
361  IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
362  IC.replaceOperand(*BinOp1, 1, FoldedC);
363  return true;
364 }
365 
366 // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
367 // inttoptr ( ptrtoint (x) ) --> x
368 Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
369  auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
370  if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) ==
371  DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
372  auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
373  Type *CastTy = IntToPtr->getDestTy();
374  if (PtrToInt &&
375  CastTy->getPointerAddressSpace() ==
376  PtrToInt->getSrcTy()->getPointerAddressSpace() &&
377  DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
378  DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
379  return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
380  "", PtrToInt);
381  }
382  }
383  return nullptr;
384 }
385 
386 /// This performs a few simplifications for operators that are associative or
387 /// commutative:
388 ///
389 /// Commutative operators:
390 ///
391 /// 1. Order operands such that they are listed from right (least complex) to
392 /// left (most complex). This puts constants before unary operators before
393 /// binary operators.
394 ///
395 /// Associative operators:
396 ///
397 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
398 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
399 ///
400 /// Associative and commutative operators:
401 ///
402 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
403 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
404 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
405 /// if C1 and C2 are constants.
407  Instruction::BinaryOps Opcode = I.getOpcode();
408  bool Changed = false;
409 
410  do {
411  // Order operands such that they are listed from right (least complex) to
412  // left (most complex). This puts constants before unary operators before
413  // binary operators.
414  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
415  getComplexity(I.getOperand(1)))
416  Changed = !I.swapOperands();
417 
418  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
419  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
420 
421  if (I.isAssociative()) {
422  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
423  if (Op0 && Op0->getOpcode() == Opcode) {
424  Value *A = Op0->getOperand(0);
425  Value *B = Op0->getOperand(1);
426  Value *C = I.getOperand(1);
427 
428  // Does "B op C" simplify?
429  if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
430  // It simplifies to V. Form "A op V".
431  replaceOperand(I, 0, A);
432  replaceOperand(I, 1, V);
433  bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
434  bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
435 
436  // Conservatively clear all optional flags since they may not be
437  // preserved by the reassociation. Reset nsw/nuw based on the above
438  // analysis.
440 
441  // Note: this is only valid because SimplifyBinOp doesn't look at
442  // the operands to Op0.
443  if (IsNUW)
444  I.setHasNoUnsignedWrap(true);
445 
446  if (IsNSW)
447  I.setHasNoSignedWrap(true);
448 
449  Changed = true;
450  ++NumReassoc;
451  continue;
452  }
453  }
454 
455  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
456  if (Op1 && Op1->getOpcode() == Opcode) {
457  Value *A = I.getOperand(0);
458  Value *B = Op1->getOperand(0);
459  Value *C = Op1->getOperand(1);
460 
461  // Does "A op B" simplify?
462  if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
463  // It simplifies to V. Form "V op C".
464  replaceOperand(I, 0, V);
465  replaceOperand(I, 1, C);
466  // Conservatively clear the optional flags, since they may not be
467  // preserved by the reassociation.
469  Changed = true;
470  ++NumReassoc;
471  continue;
472  }
473  }
474  }
475 
476  if (I.isAssociative() && I.isCommutative()) {
477  if (simplifyAssocCastAssoc(&I, *this)) {
478  Changed = true;
479  ++NumReassoc;
480  continue;
481  }
482 
483  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
484  if (Op0 && Op0->getOpcode() == Opcode) {
485  Value *A = Op0->getOperand(0);
486  Value *B = Op0->getOperand(1);
487  Value *C = I.getOperand(1);
488 
489  // Does "C op A" simplify?
490  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
491  // It simplifies to V. Form "V op B".
492  replaceOperand(I, 0, V);
493  replaceOperand(I, 1, B);
494  // Conservatively clear the optional flags, since they may not be
495  // preserved by the reassociation.
497  Changed = true;
498  ++NumReassoc;
499  continue;
500  }
501  }
502 
503  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
504  if (Op1 && Op1->getOpcode() == Opcode) {
505  Value *A = I.getOperand(0);
506  Value *B = Op1->getOperand(0);
507  Value *C = Op1->getOperand(1);
508 
509  // Does "C op A" simplify?
510  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
511  // It simplifies to V. Form "B op V".
512  replaceOperand(I, 0, B);
513  replaceOperand(I, 1, V);
514  // Conservatively clear the optional flags, since they may not be
515  // preserved by the reassociation.
517  Changed = true;
518  ++NumReassoc;
519  continue;
520  }
521  }
522 
523  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
524  // if C1 and C2 are constants.
525  Value *A, *B;
526  Constant *C1, *C2;
527  if (Op0 && Op1 &&
528  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
529  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
530  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
531  bool IsNUW = hasNoUnsignedWrap(I) &&
532  hasNoUnsignedWrap(*Op0) &&
533  hasNoUnsignedWrap(*Op1);
534  BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
535  BinaryOperator::CreateNUW(Opcode, A, B) :
536  BinaryOperator::Create(Opcode, A, B);
537 
538  if (isa<FPMathOperator>(NewBO)) {
539  FastMathFlags Flags = I.getFastMathFlags();
540  Flags &= Op0->getFastMathFlags();
541  Flags &= Op1->getFastMathFlags();
542  NewBO->setFastMathFlags(Flags);
543  }
544  InsertNewInstWith(NewBO, I);
545  NewBO->takeName(Op1);
546  replaceOperand(I, 0, NewBO);
547  replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
548  // Conservatively clear the optional flags, since they may not be
549  // preserved by the reassociation.
551  if (IsNUW)
552  I.setHasNoUnsignedWrap(true);
553 
554  Changed = true;
555  continue;
556  }
557  }
558 
559  // No further simplifications.
560  return Changed;
561  } while (true);
562 }
563 
564 /// Return whether "X LOp (Y ROp Z)" is always equal to
565 /// "(X LOp Y) ROp (X LOp Z)".
568  // X & (Y | Z) <--> (X & Y) | (X & Z)
569  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
570  if (LOp == Instruction::And)
571  return ROp == Instruction::Or || ROp == Instruction::Xor;
572 
573  // X | (Y & Z) <--> (X | Y) & (X | Z)
574  if (LOp == Instruction::Or)
575  return ROp == Instruction::And;
576 
577  // X * (Y + Z) <--> (X * Y) + (X * Z)
578  // X * (Y - Z) <--> (X * Y) - (X * Z)
579  if (LOp == Instruction::Mul)
580  return ROp == Instruction::Add || ROp == Instruction::Sub;
581 
582  return false;
583 }
584 
585 /// Return whether "(X LOp Y) ROp Z" is always equal to
586 /// "(X ROp Z) LOp (Y ROp Z)".
590  return leftDistributesOverRight(ROp, LOp);
591 
592  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
594 
595  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
596  // but this requires knowing that the addition does not overflow and other
597  // such subtleties.
598 }
599 
600 /// This function returns identity value for given opcode, which can be used to
601 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
603  if (isa<Constant>(V))
604  return nullptr;
605 
606  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
607 }
608 
609 /// This function predicates factorization using distributive laws. By default,
610 /// it just returns the 'Op' inputs. But for special-cases like
611 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
612 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
613 /// allow more factorization opportunities.
616  Value *&LHS, Value *&RHS) {
617  assert(Op && "Expected a binary operator");
618  LHS = Op->getOperand(0);
619  RHS = Op->getOperand(1);
620  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
621  Constant *C;
622  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
623  // X << C --> X * (1 << C)
624  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
625  return Instruction::Mul;
626  }
627  // TODO: We can add other conversions e.g. shr => div etc.
628  }
629  return Op->getOpcode();
630 }
631 
632 /// This tries to simplify binary operations by factorizing out common terms
633 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
635  Instruction::BinaryOps InnerOpcode,
636  Value *A, Value *B, Value *C,
637  Value *D) {
638  assert(A && B && C && D && "All values must be provided");
639 
640  Value *V = nullptr;
641  Value *SimplifiedInst = nullptr;
642  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
643  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
644 
645  // Does "X op' Y" always equal "Y op' X"?
646  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
647 
648  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
649  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode))
650  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
651  // commutative case, "(A op' B) op (C op' A)"?
652  if (A == C || (InnerCommutative && A == D)) {
653  if (A != C)
654  std::swap(C, D);
655  // Consider forming "A op' (B op D)".
656  // If "B op D" simplifies then it can be formed with no cost.
657  V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
658  // If "B op D" doesn't simplify then only go on if both of the existing
659  // operations "A op' B" and "C op' D" will be zapped as no longer used.
660  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
661  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
662  if (V) {
663  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
664  }
665  }
666 
667  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
668  if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
669  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
670  // commutative case, "(A op' B) op (B op' D)"?
671  if (B == D || (InnerCommutative && B == C)) {
672  if (B != D)
673  std::swap(C, D);
674  // Consider forming "(A op C) op' B".
675  // If "A op C" simplifies then it can be formed with no cost.
676  V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
677 
678  // If "A op C" doesn't simplify then only go on if both of the existing
679  // operations "A op' B" and "C op' D" will be zapped as no longer used.
680  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
681  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
682  if (V) {
683  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
684  }
685  }
686 
687  if (SimplifiedInst) {
688  ++NumFactor;
689  SimplifiedInst->takeName(&I);
690 
691  // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
692  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
693  if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
694  bool HasNSW = false;
695  bool HasNUW = false;
696  if (isa<OverflowingBinaryOperator>(&I)) {
697  HasNSW = I.hasNoSignedWrap();
698  HasNUW = I.hasNoUnsignedWrap();
699  }
700 
701  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
702  HasNSW &= LOBO->hasNoSignedWrap();
703  HasNUW &= LOBO->hasNoUnsignedWrap();
704  }
705 
706  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
707  HasNSW &= ROBO->hasNoSignedWrap();
708  HasNUW &= ROBO->hasNoUnsignedWrap();
709  }
710 
711  if (TopLevelOpcode == Instruction::Add &&
712  InnerOpcode == Instruction::Mul) {
713  // We can propagate 'nsw' if we know that
714  // %Y = mul nsw i16 %X, C
715  // %Z = add nsw i16 %Y, %X
716  // =>
717  // %Z = mul nsw i16 %X, C+1
718  //
719  // iff C+1 isn't INT_MIN
720  const APInt *CInt;
721  if (match(V, m_APInt(CInt))) {
722  if (!CInt->isMinSignedValue())
723  BO->setHasNoSignedWrap(HasNSW);
724  }
725 
726  // nuw can be propagated with any constant or nuw value.
727  BO->setHasNoUnsignedWrap(HasNUW);
728  }
729  }
730  }
731  }
732  return SimplifiedInst;
733 }
734 
735 /// This tries to simplify binary operations which some other binary operation
736 /// distributes over either by factorizing out common terms
737 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
738 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
739 /// Returns the simplified value, or null if it didn't simplify.
741  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
742  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
743  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
744  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
745 
746  {
747  // Factorization.
748  Value *A, *B, *C, *D;
749  Instruction::BinaryOps LHSOpcode, RHSOpcode;
750  if (Op0)
751  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
752  if (Op1)
753  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
754 
755  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
756  // a common term.
757  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
758  if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
759  return V;
760 
761  // The instruction has the form "(A op' B) op (C)". Try to factorize common
762  // term.
763  if (Op0)
764  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
765  if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
766  return V;
767 
768  // The instruction has the form "(B) op (C op' D)". Try to factorize common
769  // term.
770  if (Op1)
771  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
772  if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
773  return V;
774  }
775 
776  // Expansion.
777  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
778  // The instruction has the form "(A op' B) op C". See if expanding it out
779  // to "(A op C) op' (B op C)" results in simplifications.
780  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
781  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
782 
783  // Disable the use of undef because it's not safe to distribute undef.
784  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
785  Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
786  Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
787 
788  // Do "A op C" and "B op C" both simplify?
789  if (L && R) {
790  // They do! Return "L op' R".
791  ++NumExpand;
792  C = Builder.CreateBinOp(InnerOpcode, L, R);
793  C->takeName(&I);
794  return C;
795  }
796 
797  // Does "A op C" simplify to the identity value for the inner opcode?
798  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
799  // They do! Return "B op C".
800  ++NumExpand;
801  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
802  C->takeName(&I);
803  return C;
804  }
805 
806  // Does "B op C" simplify to the identity value for the inner opcode?
807  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
808  // They do! Return "A op C".
809  ++NumExpand;
810  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
811  C->takeName(&I);
812  return C;
813  }
814  }
815 
816  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
817  // The instruction has the form "A op (B op' C)". See if expanding it out
818  // to "(A op B) op' (A op C)" results in simplifications.
819  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
820  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
821 
822  // Disable the use of undef because it's not safe to distribute undef.
823  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
824  Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
825  Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
826 
827  // Do "A op B" and "A op C" both simplify?
828  if (L && R) {
829  // They do! Return "L op' R".
830  ++NumExpand;
831  A = Builder.CreateBinOp(InnerOpcode, L, R);
832  A->takeName(&I);
833  return A;
834  }
835 
836  // Does "A op B" simplify to the identity value for the inner opcode?
837  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
838  // They do! Return "A op C".
839  ++NumExpand;
840  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
841  A->takeName(&I);
842  return A;
843  }
844 
845  // Does "A op C" simplify to the identity value for the inner opcode?
846  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
847  // They do! Return "A op B".
848  ++NumExpand;
849  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
850  A->takeName(&I);
851  return A;
852  }
853  }
854 
855  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
856 }
857 
859  Value *LHS,
860  Value *RHS) {
861  Value *A, *B, *C, *D, *E, *F;
862  bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
863  bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
864  if (!LHSIsSelect && !RHSIsSelect)
865  return nullptr;
866 
867  FastMathFlags FMF;
869  if (isa<FPMathOperator>(&I)) {
870  FMF = I.getFastMathFlags();
871  Builder.setFastMathFlags(FMF);
872  }
873 
874  Instruction::BinaryOps Opcode = I.getOpcode();
876 
877  Value *Cond, *True = nullptr, *False = nullptr;
878  if (LHSIsSelect && RHSIsSelect && A == D) {
879  // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
880  Cond = A;
881  True = SimplifyBinOp(Opcode, B, E, FMF, Q);
882  False = SimplifyBinOp(Opcode, C, F, FMF, Q);
883 
884  if (LHS->hasOneUse() && RHS->hasOneUse()) {
885  if (False && !True)
886  True = Builder.CreateBinOp(Opcode, B, E);
887  else if (True && !False)
888  False = Builder.CreateBinOp(Opcode, C, F);
889  }
890  } else if (LHSIsSelect && LHS->hasOneUse()) {
891  // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
892  Cond = A;
893  True = SimplifyBinOp(Opcode, B, RHS, FMF, Q);
894  False = SimplifyBinOp(Opcode, C, RHS, FMF, Q);
895  } else if (RHSIsSelect && RHS->hasOneUse()) {
896  // X op (D ? E : F) -> D ? (X op E) : (X op F)
897  Cond = D;
898  True = SimplifyBinOp(Opcode, LHS, E, FMF, Q);
899  False = SimplifyBinOp(Opcode, LHS, F, FMF, Q);
900  }
901 
902  if (!True || !False)
903  return nullptr;
904 
905  Value *SI = Builder.CreateSelect(Cond, True, False);
906  SI->takeName(&I);
907  return SI;
908 }
909 
910 /// Freely adapt every user of V as-if V was changed to !V.
911 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
912 void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
913  for (User *U : I->users()) {
914  switch (cast<Instruction>(U)->getOpcode()) {
915  case Instruction::Select: {
916  auto *SI = cast<SelectInst>(U);
917  SI->swapValues();
918  SI->swapProfMetadata();
919  break;
920  }
921  case Instruction::Br:
922  cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
923  break;
924  case Instruction::Xor:
925  replaceInstUsesWith(cast<Instruction>(*U), I);
926  break;
927  default:
928  llvm_unreachable("Got unexpected user - out of sync with "
929  "canFreelyInvertAllUsersOf() ?");
930  }
931  }
932 }
933 
934 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
935 /// constant zero (which is the 'negate' form).
936 Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
937  Value *NegV;
938  if (match(V, m_Neg(m_Value(NegV))))
939  return NegV;
940 
941  // Constants can be considered to be negated values if they can be folded.
942  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
943  return ConstantExpr::getNeg(C);
944 
945  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
946  if (C->getType()->getElementType()->isIntegerTy())
947  return ConstantExpr::getNeg(C);
948 
949  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
950  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
951  Constant *Elt = CV->getAggregateElement(i);
952  if (!Elt)
953  return nullptr;
954 
955  if (isa<UndefValue>(Elt))
956  continue;
957 
958  if (!isa<ConstantInt>(Elt))
959  return nullptr;
960  }
961  return ConstantExpr::getNeg(CV);
962  }
963 
964  // Negate integer vector splats.
965  if (auto *CV = dyn_cast<Constant>(V))
966  if (CV->getType()->isVectorTy() &&
967  CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
968  return ConstantExpr::getNeg(CV);
969 
970  return nullptr;
971 }
972 
973 /// A binop with a constant operand and a sign-extended boolean operand may be
974 /// converted into a select of constants by applying the binary operation to
975 /// the constant with the two possible values of the extended boolean (0 or -1).
976 Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
977  // TODO: Handle non-commutative binop (constant is operand 0).
978  // TODO: Handle zext.
979  // TODO: Peek through 'not' of cast.
980  Value *BO0 = BO.getOperand(0);
981  Value *BO1 = BO.getOperand(1);
982  Value *X;
983  Constant *C;
984  if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
985  !X->getType()->isIntOrIntVectorTy(1))
986  return nullptr;
987 
988  // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
991  Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
992  Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
993  return SelectInst::Create(X, TVal, FVal);
994 }
995 
998  if (auto *Cast = dyn_cast<CastInst>(&I))
999  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
1000 
1001  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1002  assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
1003  "Expected constant-foldable intrinsic");
1004  Intrinsic::ID IID = II->getIntrinsicID();
1005  if (II->arg_size() == 1)
1006  return Builder.CreateUnaryIntrinsic(IID, SO);
1007 
1008  // This works for real binary ops like min/max (where we always expect the
1009  // constant operand to be canonicalized as op1) and unary ops with a bonus
1010  // constant argument like ctlz/cttz.
1011  // TODO: Handle non-commutative binary intrinsics as below for binops.
1012  assert(II->arg_size() == 2 && "Expected binary intrinsic");
1013  assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
1014  return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
1015  }
1016 
1017  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
1018 
1019  // Figure out if the constant is the left or the right argument.
1020  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
1021  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
1022 
1023  if (auto *SOC = dyn_cast<Constant>(SO)) {
1024  if (ConstIsRHS)
1025  return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
1026  return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
1027  }
1028 
1029  Value *Op0 = SO, *Op1 = ConstOperand;
1030  if (!ConstIsRHS)
1031  std::swap(Op0, Op1);
1032 
1033  Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
1034  Op1, SO->getName() + ".op");
1035  if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
1036  NewBOI->copyIRFlags(&I);
1037  return NewBO;
1038 }
1039 
1041  bool FoldWithMultiUse) {
1042  // Don't modify shared select instructions unless set FoldWithMultiUse
1043  if (!SI->hasOneUse() && !FoldWithMultiUse)
1044  return nullptr;
1045 
1046  Value *TV = SI->getTrueValue();
1047  Value *FV = SI->getFalseValue();
1048  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1049  return nullptr;
1050 
1051  // Bool selects with constant operands can be folded to logical ops.
1052  if (SI->getType()->isIntOrIntVectorTy(1))
1053  return nullptr;
1054 
1055  // If it's a bitcast involving vectors, make sure it has the same number of
1056  // elements on both sides.
1057  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1058  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1059  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1060 
1061  // Verify that either both or neither are vectors.
1062  if ((SrcTy == nullptr) != (DestTy == nullptr))
1063  return nullptr;
1064 
1065  // If vectors, verify that they have the same number of elements.
1066  if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1067  return nullptr;
1068  }
1069 
1070  // Test if a CmpInst instruction is used exclusively by a select as
1071  // part of a minimum or maximum operation. If so, refrain from doing
1072  // any other folding. This helps out other analyses which understand
1073  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
1074  // and CodeGen. And in this case, at least one of the comparison
1075  // operands has at least one user besides the compare (the select),
1076  // which would often largely negate the benefit of folding anyway.
1077  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
1078  if (CI->hasOneUse()) {
1079  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1080 
1081  // FIXME: This is a hack to avoid infinite looping with min/max patterns.
1082  // We have to ensure that vector constants that only differ with
1083  // undef elements are treated as equivalent.
1084  auto areLooselyEqual = [](Value *A, Value *B) {
1085  if (A == B)
1086  return true;
1087 
1088  // Test for vector constants.
1089  Constant *ConstA, *ConstB;
1090  if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
1091  return false;
1092 
1093  // TODO: Deal with FP constants?
1094  if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
1095  return false;
1096 
1097  // Compare for equality including undefs as equal.
1098  auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
1099  const APInt *C;
1100  return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
1101  };
1102 
1103  if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
1104  (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
1105  return nullptr;
1106  }
1107  }
1108 
1111  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1112 }
1113 
1116  bool ConstIsRHS = isa<Constant>(I->getOperand(1));
1117  Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
1118 
1119  if (auto *InC = dyn_cast<Constant>(InV)) {
1120  if (ConstIsRHS)
1121  return ConstantExpr::get(I->getOpcode(), InC, C);
1122  return ConstantExpr::get(I->getOpcode(), C, InC);
1123  }
1124 
1125  Value *Op0 = InV, *Op1 = C;
1126  if (!ConstIsRHS)
1127  std::swap(Op0, Op1);
1128 
1129  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo");
1130  auto *FPInst = dyn_cast<Instruction>(RI);
1131  if (FPInst && isa<FPMathOperator>(FPInst))
1132  FPInst->copyFastMathFlags(I);
1133  return RI;
1134 }
1135 
1137  unsigned NumPHIValues = PN->getNumIncomingValues();
1138  if (NumPHIValues == 0)
1139  return nullptr;
1140 
1141  // We normally only transform phis with a single use. However, if a PHI has
1142  // multiple uses and they are all the same operation, we can fold *all* of the
1143  // uses into the PHI.
1144  if (!PN->hasOneUse()) {
1145  // Walk the use list for the instruction, comparing them to I.
1146  for (User *U : PN->users()) {
1147  Instruction *UI = cast<Instruction>(U);
1148  if (UI != &I && !I.isIdenticalTo(UI))
1149  return nullptr;
1150  }
1151  // Otherwise, we can replace *all* users with the new PHI we form.
1152  }
1153 
1154  // Check to see if all of the operands of the PHI are simple constants
1155  // (constantint/constantfp/undef). If there is one non-constant value,
1156  // remember the BB it is in. If there is more than one or if *it* is a PHI,
1157  // bail out. We don't do arbitrary constant expressions here because moving
1158  // their computation can be expensive without a cost model.
1159  BasicBlock *NonConstBB = nullptr;
1160  for (unsigned i = 0; i != NumPHIValues; ++i) {
1161  Value *InVal = PN->getIncomingValue(i);
1162  // For non-freeze, require constant operand
1163  // For freeze, require non-undef, non-poison operand
1164  if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
1165  continue;
1166  if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
1167  continue;
1168 
1169  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1170  if (NonConstBB) return nullptr; // More than one non-const value.
1171 
1172  NonConstBB = PN->getIncomingBlock(i);
1173 
1174  // If the InVal is an invoke at the end of the pred block, then we can't
1175  // insert a computation after it without breaking the edge.
1176  if (isa<InvokeInst>(InVal))
1177  if (cast<Instruction>(InVal)->getParent() == NonConstBB)
1178  return nullptr;
1179 
1180  // If the incoming non-constant value is in I's block, we will remove one
1181  // instruction, but insert another equivalent one, leading to infinite
1182  // instcombine.
1183  if (isPotentiallyReachable(I.getParent(), NonConstBB, nullptr, &DT, LI))
1184  return nullptr;
1185  }
1186 
1187  // If there is exactly one non-constant value, we can insert a copy of the
1188  // operation in that block. However, if this is a critical edge, we would be
1189  // inserting the computation on some other paths (e.g. inside a loop). Only
1190  // do this if the pred block is unconditionally branching into the phi block.
1191  // Also, make sure that the pred block is not dead code.
1192  if (NonConstBB != nullptr) {
1193  BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
1194  if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
1195  return nullptr;
1196  }
1197 
1198  // Okay, we can do the transformation: create the new PHI node.
1199  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1200  InsertNewInstBefore(NewPN, *PN);
1201  NewPN->takeName(PN);
1202 
1203  // If we are going to have to insert a new computation, do so right before the
1204  // predecessor's terminator.
1205  if (NonConstBB)
1206  Builder.SetInsertPoint(NonConstBB->getTerminator());
1207 
1208  // Next, add all of the operands to the PHI.
1209  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
1210  // We only currently try to fold the condition of a select when it is a phi,
1211  // not the true/false values.
1212  Value *TrueV = SI->getTrueValue();
1213  Value *FalseV = SI->getFalseValue();
1214  BasicBlock *PhiTransBB = PN->getParent();
1215  for (unsigned i = 0; i != NumPHIValues; ++i) {
1216  BasicBlock *ThisBB = PN->getIncomingBlock(i);
1217  Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
1218  Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
1219  Value *InV = nullptr;
1220  // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
1221  // even if currently isNullValue gives false.
1222  Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
1223  // For vector constants, we cannot use isNullValue to fold into
1224  // FalseVInPred versus TrueVInPred. When we have individual nonzero
1225  // elements in the vector, we will incorrectly fold InC to
1226  // `TrueVInPred`.
1227  if (InC && isa<ConstantInt>(InC))
1228  InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
1229  else {
1230  // Generate the select in the same block as PN's current incoming block.
1231  // Note: ThisBB need not be the NonConstBB because vector constants
1232  // which are constants by definition are handled here.
1233  // FIXME: This can lead to an increase in IR generation because we might
1234  // generate selects for vector constant phi operand, that could not be
1235  // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
1236  // non-vector phis, this transformation was always profitable because
1237  // the select would be generated exactly once in the NonConstBB.
1238  Builder.SetInsertPoint(ThisBB->getTerminator());
1239  InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
1240  FalseVInPred, "phi.sel");
1241  }
1242  NewPN->addIncoming(InV, ThisBB);
1243  }
1244  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
1245  Constant *C = cast<Constant>(I.getOperand(1));
1246  for (unsigned i = 0; i != NumPHIValues; ++i) {
1247  Value *InV = nullptr;
1248  if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1249  InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
1250  else
1251  InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i),
1252  C, "phi.cmp");
1253  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1254  }
1255  } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
1256  for (unsigned i = 0; i != NumPHIValues; ++i) {
1258  Builder);
1259  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1260  }
1261  } else if (isa<FreezeInst>(&I)) {
1262  for (unsigned i = 0; i != NumPHIValues; ++i) {
1263  Value *InV;
1264  if (NonConstBB == PN->getIncomingBlock(i))
1265  InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr");
1266  else
1267  InV = PN->getIncomingValue(i);
1268  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1269  }
1270  } else {
1271  CastInst *CI = cast<CastInst>(&I);
1272  Type *RetTy = CI->getType();
1273  for (unsigned i = 0; i != NumPHIValues; ++i) {
1274  Value *InV;
1275  if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1276  InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
1277  else
1278  InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
1279  I.getType(), "phi.cast");
1280  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1281  }
1282  }
1283 
1284  for (User *U : make_early_inc_range(PN->users())) {
1285  Instruction *User = cast<Instruction>(U);
1286  if (User == &I) continue;
1287  replaceInstUsesWith(*User, NewPN);
1288  eraseInstFromFunction(*User);
1289  }
1290  return replaceInstUsesWith(I, NewPN);
1291 }
1292 
1294  // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1295  // we are guarding against replicating the binop in >1 predecessor.
1296  // This could miss matching a phi with 2 constant incoming values.
1297  auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1298  auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1299  if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1300  Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1301  return nullptr;
1302 
1303  // TODO: Remove the restriction for binop being in the same block as the phis.
1304  if (BO.getParent() != Phi0->getParent() ||
1305  BO.getParent() != Phi1->getParent())
1306  return nullptr;
1307 
1308  // Match a pair of incoming constants for one of the predecessor blocks.
1309  BasicBlock *ConstBB, *OtherBB;
1310  Constant *C0, *C1;
1311  if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1312  ConstBB = Phi0->getIncomingBlock(0);
1313  OtherBB = Phi0->getIncomingBlock(1);
1314  } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1315  ConstBB = Phi0->getIncomingBlock(1);
1316  OtherBB = Phi0->getIncomingBlock(0);
1317  } else {
1318  return nullptr;
1319  }
1320  if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1321  return nullptr;
1322 
1323  // The block that we are hoisting to must reach here unconditionally.
1324  // Otherwise, we could be speculatively executing an expensive or
1325  // non-speculative op.
1326  auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1327  if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1328  !DT.isReachableFromEntry(OtherBB))
1329  return nullptr;
1330 
1331  // TODO: This check could be tightened to only apply to binops (div/rem) that
1332  // are not safe to speculatively execute. But that could allow hoisting
1333  // potentially expensive instructions (fdiv for example).
1334  for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1336  return nullptr;
1337 
1338  // Make a new binop in the predecessor block with the non-constant incoming
1339  // values.
1340  Builder.SetInsertPoint(PredBlockBranch);
1341  Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1342  Phi0->getIncomingValueForBlock(OtherBB),
1343  Phi1->getIncomingValueForBlock(OtherBB));
1344  if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1345  NotFoldedNewBO->copyIRFlags(&BO);
1346 
1347  // Fold constants for the predecessor block with constant incoming values.
1348  Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1);
1349 
1350  // Replace the binop with a phi of the new values. The old phis are dead.
1351  PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1352  NewPhi->addIncoming(NewBO, OtherBB);
1353  NewPhi->addIncoming(NewC, ConstBB);
1354  return NewPhi;
1355 }
1356 
1358  if (!isa<Constant>(I.getOperand(1)))
1359  return nullptr;
1360 
1361  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1362  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1363  return NewSel;
1364  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1365  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1366  return NewPhi;
1367  }
1368  return nullptr;
1369 }
1370 
1371 /// Given a pointer type and a constant offset, determine whether or not there
1372 /// is a sequence of GEP indices into the pointed type that will land us at the
1373 /// specified offset. If so, fill them into NewIndices and return the resultant
1374 /// element type, otherwise return null.
1375 static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
1376  SmallVectorImpl<Value *> &NewIndices,
1377  const DataLayout &DL) {
1378  // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
1379  Type *Ty = PtrTy->getNonOpaquePointerElementType();
1380  if (!Ty->isSized())
1381  return nullptr;
1382 
1383  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
1384  SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
1385  if (!Offset.isZero())
1386  return nullptr;
1387 
1388  for (const APInt &Index : Indices)
1389  NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
1390  return Ty;
1391 }
1392 
1394  // If this GEP has only 0 indices, it is the same pointer as
1395  // Src. If Src is not a trivial GEP too, don't combine
1396  // the indices.
1397  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1398  !Src.hasOneUse())
1399  return false;
1400  return true;
1401 }
1402 
1403 /// Return a value X such that Val = X * Scale, or null if none.
1404 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1405 Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1406  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1407  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1408  Scale.getBitWidth() && "Scale not compatible with value!");
1409 
1410  // If Val is zero or Scale is one then Val = Val * Scale.
1411  if (match(Val, m_Zero()) || Scale == 1) {
1412  NoSignedWrap = true;
1413  return Val;
1414  }
1415 
1416  // If Scale is zero then it does not divide Val.
1417  if (Scale.isMinValue())
1418  return nullptr;
1419 
1420  // Look through chains of multiplications, searching for a constant that is
1421  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1422  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1423  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1424  // down from Val:
1425  //
1426  // Val = M1 * X || Analysis starts here and works down
1427  // M1 = M2 * Y || Doesn't descend into terms with more
1428  // M2 = Z * 4 \/ than one use
1429  //
1430  // Then to modify a term at the bottom:
1431  //
1432  // Val = M1 * X
1433  // M1 = Z * Y || Replaced M2 with Z
1434  //
1435  // Then to work back up correcting nsw flags.
1436 
1437  // Op - the term we are currently analyzing. Starts at Val then drills down.
1438  // Replaced with its descaled value before exiting from the drill down loop.
1439  Value *Op = Val;
1440 
1441  // Parent - initially null, but after drilling down notes where Op came from.
1442  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1443  // 0'th operand of Val.
1444  std::pair<Instruction *, unsigned> Parent;
1445 
1446  // Set if the transform requires a descaling at deeper levels that doesn't
1447  // overflow.
1448  bool RequireNoSignedWrap = false;
1449 
1450  // Log base 2 of the scale. Negative if not a power of 2.
1451  int32_t logScale = Scale.exactLogBase2();
1452 
1453  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1454  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1455  // If Op is a constant divisible by Scale then descale to the quotient.
1456  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1457  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1458  if (!Remainder.isMinValue())
1459  // Not divisible by Scale.
1460  return nullptr;
1461  // Replace with the quotient in the parent.
1462  Op = ConstantInt::get(CI->getType(), Quotient);
1463  NoSignedWrap = true;
1464  break;
1465  }
1466 
1467  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1468  if (BO->getOpcode() == Instruction::Mul) {
1469  // Multiplication.
1470  NoSignedWrap = BO->hasNoSignedWrap();
1471  if (RequireNoSignedWrap && !NoSignedWrap)
1472  return nullptr;
1473 
1474  // There are three cases for multiplication: multiplication by exactly
1475  // the scale, multiplication by a constant different to the scale, and
1476  // multiplication by something else.
1477  Value *LHS = BO->getOperand(0);
1478  Value *RHS = BO->getOperand(1);
1479 
1480  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1481  // Multiplication by a constant.
1482  if (CI->getValue() == Scale) {
1483  // Multiplication by exactly the scale, replace the multiplication
1484  // by its left-hand side in the parent.
1485  Op = LHS;
1486  break;
1487  }
1488 
1489  // Otherwise drill down into the constant.
1490  if (!Op->hasOneUse())
1491  return nullptr;
1492 
1493  Parent = std::make_pair(BO, 1);
1494  continue;
1495  }
1496 
1497  // Multiplication by something else. Drill down into the left-hand side
1498  // since that's where the reassociate pass puts the good stuff.
1499  if (!Op->hasOneUse())
1500  return nullptr;
1501 
1502  Parent = std::make_pair(BO, 0);
1503  continue;
1504  }
1505 
1506  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1507  isa<ConstantInt>(BO->getOperand(1))) {
1508  // Multiplication by a power of 2.
1509  NoSignedWrap = BO->hasNoSignedWrap();
1510  if (RequireNoSignedWrap && !NoSignedWrap)
1511  return nullptr;
1512 
1513  Value *LHS = BO->getOperand(0);
1514  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1515  getLimitedValue(Scale.getBitWidth());
1516  // Op = LHS << Amt.
1517 
1518  if (Amt == logScale) {
1519  // Multiplication by exactly the scale, replace the multiplication
1520  // by its left-hand side in the parent.
1521  Op = LHS;
1522  break;
1523  }
1524  if (Amt < logScale || !Op->hasOneUse())
1525  return nullptr;
1526 
1527  // Multiplication by more than the scale. Reduce the multiplying amount
1528  // by the scale in the parent.
1529  Parent = std::make_pair(BO, 1);
1530  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1531  break;
1532  }
1533  }
1534 
1535  if (!Op->hasOneUse())
1536  return nullptr;
1537 
1538  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1539  if (Cast->getOpcode() == Instruction::SExt) {
1540  // Op is sign-extended from a smaller type, descale in the smaller type.
1541  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1542  APInt SmallScale = Scale.trunc(SmallSize);
1543  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1544  // descale Op as (sext Y) * Scale. In order to have
1545  // sext (Y * SmallScale) = (sext Y) * Scale
1546  // some conditions need to hold however: SmallScale must sign-extend to
1547  // Scale and the multiplication Y * SmallScale should not overflow.
1548  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1549  // SmallScale does not sign-extend to Scale.
1550  return nullptr;
1551  assert(SmallScale.exactLogBase2() == logScale);
1552  // Require that Y * SmallScale must not overflow.
1553  RequireNoSignedWrap = true;
1554 
1555  // Drill down through the cast.
1556  Parent = std::make_pair(Cast, 0);
1557  Scale = SmallScale;
1558  continue;
1559  }
1560 
1561  if (Cast->getOpcode() == Instruction::Trunc) {
1562  // Op is truncated from a larger type, descale in the larger type.
1563  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1564  // trunc (Y * sext Scale) = (trunc Y) * Scale
1565  // always holds. However (trunc Y) * Scale may overflow even if
1566  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1567  // from this point up in the expression (see later).
1568  if (RequireNoSignedWrap)
1569  return nullptr;
1570 
1571  // Drill down through the cast.
1572  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1573  Parent = std::make_pair(Cast, 0);
1574  Scale = Scale.sext(LargeSize);
1575  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1576  logScale = -1;
1577  assert(Scale.exactLogBase2() == logScale);
1578  continue;
1579  }
1580  }
1581 
1582  // Unsupported expression, bail out.
1583  return nullptr;
1584  }
1585 
1586  // If Op is zero then Val = Op * Scale.
1587  if (match(Op, m_Zero())) {
1588  NoSignedWrap = true;
1589  return Op;
1590  }
1591 
1592  // We know that we can successfully descale, so from here on we can safely
1593  // modify the IR. Op holds the descaled version of the deepest term in the
1594  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1595  // not to overflow.
1596 
1597  if (!Parent.first)
1598  // The expression only had one term.
1599  return Op;
1600 
1601  // Rewrite the parent using the descaled version of its operand.
1602  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1603  assert(Op != Parent.first->getOperand(Parent.second) &&
1604  "Descaling was a no-op?");
1605  replaceOperand(*Parent.first, Parent.second, Op);
1606  Worklist.push(Parent.first);
1607 
1608  // Now work back up the expression correcting nsw flags. The logic is based
1609  // on the following observation: if X * Y is known not to overflow as a signed
1610  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1611  // then X * Z will not overflow as a signed multiplication either. As we work
1612  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1613  // current level has strictly smaller absolute value than the original.
1614  Instruction *Ancestor = Parent.first;
1615  do {
1616  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1617  // If the multiplication wasn't nsw then we can't say anything about the
1618  // value of the descaled multiplication, and we have to clear nsw flags
1619  // from this point on up.
1620  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1621  NoSignedWrap &= OpNoSignedWrap;
1622  if (NoSignedWrap != OpNoSignedWrap) {
1623  BO->setHasNoSignedWrap(NoSignedWrap);
1624  Worklist.push(Ancestor);
1625  }
1626  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1627  // The fact that the descaled input to the trunc has smaller absolute
1628  // value than the original input doesn't tell us anything useful about
1629  // the absolute values of the truncations.
1630  NoSignedWrap = false;
1631  }
1632  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1633  "Failed to keep proper track of nsw flags while drilling down?");
1634 
1635  if (Ancestor == Val)
1636  // Got to the top, all done!
1637  return Val;
1638 
1639  // Move up one level in the expression.
1640  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1641  Ancestor = Ancestor->user_back();
1642  } while (true);
1643 }
1644 
1646  if (!isa<VectorType>(Inst.getType()))
1647  return nullptr;
1648 
1649  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1650  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1651  assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1652  cast<VectorType>(Inst.getType())->getElementCount());
1653  assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1654  cast<VectorType>(Inst.getType())->getElementCount());
1655 
1656  // If both operands of the binop are vector concatenations, then perform the
1657  // narrow binop on each pair of the source operands followed by concatenation
1658  // of the results.
1659  Value *L0, *L1, *R0, *R1;
1661  if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1663  LHS->hasOneUse() && RHS->hasOneUse() &&
1664  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1665  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1666  // This transform does not have the speculative execution constraint as
1667  // below because the shuffle is a concatenation. The new binops are
1668  // operating on exactly the same elements as the existing binop.
1669  // TODO: We could ease the mask requirement to allow different undef lanes,
1670  // but that requires an analysis of the binop-with-undef output value.
1671  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1672  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1673  BO->copyIRFlags(&Inst);
1674  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1675  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1676  BO->copyIRFlags(&Inst);
1677  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1678  }
1679 
1680  // It may not be safe to reorder shuffles and things like div, urem, etc.
1681  // because we may trap when executing those ops on unknown vector elements.
1682  // See PR20059.
1683  if (!isSafeToSpeculativelyExecute(&Inst))
1684  return nullptr;
1685 
1686  auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1687  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1688  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1689  BO->copyIRFlags(&Inst);
1690  return new ShuffleVectorInst(XY, M);
1691  };
1692 
1693  // If both arguments of the binary operation are shuffles that use the same
1694  // mask and shuffle within a single vector, move the shuffle after the binop.
1695  Value *V1, *V2;
1696  if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1698  V1->getType() == V2->getType() &&
1699  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1700  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1701  return createBinOpShuffle(V1, V2, Mask);
1702  }
1703 
1704  // If both arguments of a commutative binop are select-shuffles that use the
1705  // same mask with commuted operands, the shuffles are unnecessary.
1706  if (Inst.isCommutative() &&
1707  match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1708  match(RHS,
1710  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1711  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1712  // TODO: Allow shuffles that contain undefs in the mask?
1713  // That is legal, but it reduces undef knowledge.
1714  // TODO: Allow arbitrary shuffles by shuffling after binop?
1715  // That might be legal, but we have to deal with poison.
1716  if (LShuf->isSelect() &&
1717  !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
1718  RShuf->isSelect() &&
1719  !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
1720  // Example:
1721  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1722  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1723  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1724  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1725  NewBO->copyIRFlags(&Inst);
1726  return NewBO;
1727  }
1728  }
1729 
1730  // If one argument is a shuffle within one vector and the other is a constant,
1731  // try moving the shuffle after the binary operation. This canonicalization
1732  // intends to move shuffles closer to other shuffles and binops closer to
1733  // other binops, so they can be folded. It may also enable demanded elements
1734  // transforms.
1735  Constant *C;
1736  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1737  if (InstVTy &&
1738  match(&Inst,
1740  m_ImmConstant(C))) &&
1741  cast<FixedVectorType>(V1->getType())->getNumElements() <=
1742  InstVTy->getNumElements()) {
1743  assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1744  "Shuffle should not change scalar type");
1745 
1746  // Find constant NewC that has property:
1747  // shuffle(NewC, ShMask) = C
1748  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1749  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1750  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1751  bool ConstOp1 = isa<Constant>(RHS);
1752  ArrayRef<int> ShMask = Mask;
1753  unsigned SrcVecNumElts =
1754  cast<FixedVectorType>(V1->getType())->getNumElements();
1755  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1756  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1757  bool MayChange = true;
1758  unsigned NumElts = InstVTy->getNumElements();
1759  for (unsigned I = 0; I < NumElts; ++I) {
1760  Constant *CElt = C->getAggregateElement(I);
1761  if (ShMask[I] >= 0) {
1762  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1763  Constant *NewCElt = NewVecC[ShMask[I]];
1764  // Bail out if:
1765  // 1. The constant vector contains a constant expression.
1766  // 2. The shuffle needs an element of the constant vector that can't
1767  // be mapped to a new constant vector.
1768  // 3. This is a widening shuffle that copies elements of V1 into the
1769  // extended elements (extending with undef is allowed).
1770  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1771  I >= SrcVecNumElts) {
1772  MayChange = false;
1773  break;
1774  }
1775  NewVecC[ShMask[I]] = CElt;
1776  }
1777  // If this is a widening shuffle, we must be able to extend with undef
1778  // elements. If the original binop does not produce an undef in the high
1779  // lanes, then this transform is not safe.
1780  // Similarly for undef lanes due to the shuffle mask, we can only
1781  // transform binops that preserve undef.
1782  // TODO: We could shuffle those non-undef constant values into the
1783  // result by using a constant vector (rather than an undef vector)
1784  // as operand 1 of the new binop, but that might be too aggressive
1785  // for target-independent shuffle creation.
1786  if (I >= SrcVecNumElts || ShMask[I] < 0) {
1787  Constant *MaybeUndef =
1788  ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
1789  : ConstantExpr::get(Opcode, CElt, UndefScalar);
1790  if (!match(MaybeUndef, m_Undef())) {
1791  MayChange = false;
1792  break;
1793  }
1794  }
1795  }
1796  if (MayChange) {
1797  Constant *NewC = ConstantVector::get(NewVecC);
1798  // It may not be safe to execute a binop on a vector with undef elements
1799  // because the entire instruction can be folded to undef or create poison
1800  // that did not exist in the original code.
1801  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1802  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1803 
1804  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1805  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1806  Value *NewLHS = ConstOp1 ? V1 : NewC;
1807  Value *NewRHS = ConstOp1 ? NewC : V1;
1808  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1809  }
1810  }
1811 
1812  // Try to reassociate to sink a splat shuffle after a binary operation.
1813  if (Inst.isAssociative() && Inst.isCommutative()) {
1814  // Canonicalize shuffle operand as LHS.
1815  if (isa<ShuffleVectorInst>(RHS))
1816  std::swap(LHS, RHS);
1817 
1818  Value *X;
1819  ArrayRef<int> MaskC;
1820  int SplatIndex;
1821  Value *Y, *OtherOp;
1822  if (!match(LHS,
1823  m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1824  !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1825  X->getType() != Inst.getType() ||
1826  !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1827  return nullptr;
1828 
1829  // FIXME: This may not be safe if the analysis allows undef elements. By
1830  // moving 'Y' before the splat shuffle, we are implicitly assuming
1831  // that it is not undef/poison at the splat index.
1832  if (isSplatValue(OtherOp, SplatIndex)) {
1833  std::swap(Y, OtherOp);
1834  } else if (!isSplatValue(Y, SplatIndex)) {
1835  return nullptr;
1836  }
1837 
1838  // X and Y are splatted values, so perform the binary operation on those
1839  // values followed by a splat followed by the 2nd binary operation:
1840  // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1841  Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1842  SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1843  Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1844  Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1845 
1846  // Intersect FMF on both new binops. Other (poison-generating) flags are
1847  // dropped to be safe.
1848  if (isa<FPMathOperator>(R)) {
1849  R->copyFastMathFlags(&Inst);
1850  R->andIRFlags(RHS);
1851  }
1852  if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1853  NewInstBO->copyIRFlags(R);
1854  return R;
1855  }
1856 
1857  return nullptr;
1858 }
1859 
1860 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1861 /// of a value. This requires a potentially expensive known bits check to make
1862 /// sure the narrow op does not overflow.
1863 Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1864  // We need at least one extended operand.
1865  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1866 
1867  // If this is a sub, we swap the operands since we always want an extension
1868  // on the RHS. The LHS can be an extension or a constant.
1869  if (BO.getOpcode() == Instruction::Sub)
1870  std::swap(Op0, Op1);
1871 
1872  Value *X;
1873  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1874  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1875  return nullptr;
1876 
1877  // If both operands are the same extension from the same source type and we
1878  // can eliminate at least one (hasOneUse), this might work.
1879  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1880  Value *Y;
1881  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1882  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1883  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1884  // If that did not match, see if we have a suitable constant operand.
1885  // Truncating and extending must produce the same constant.
1886  Constant *WideC;
1887  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1888  return nullptr;
1889  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1890  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1891  return nullptr;
1892  Y = NarrowC;
1893  }
1894 
1895  // Swap back now that we found our operands.
1896  if (BO.getOpcode() == Instruction::Sub)
1897  std::swap(X, Y);
1898 
1899  // Both operands have narrow versions. Last step: the math must not overflow
1900  // in the narrow width.
1901  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1902  return nullptr;
1903 
1904  // bo (ext X), (ext Y) --> ext (bo X, Y)
1905  // bo (ext X), C --> ext (bo X, C')
1906  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1907  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1908  if (IsSext)
1909  NewBinOp->setHasNoSignedWrap();
1910  else
1911  NewBinOp->setHasNoUnsignedWrap();
1912  }
1913  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1914 }
1915 
1916 static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
1917  // At least one GEP must be inbounds.
1918  if (!GEP1.isInBounds() && !GEP2.isInBounds())
1919  return false;
1920 
1921  return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1922  (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1923 }
1924 
1925 /// Thread a GEP operation with constant indices through the constant true/false
1926 /// arms of a select.
1929  if (!GEP.hasAllConstantIndices())
1930  return nullptr;
1931 
1932  Instruction *Sel;
1933  Value *Cond;
1934  Constant *TrueC, *FalseC;
1935  if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1936  !match(Sel,
1937  m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1938  return nullptr;
1939 
1940  // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1941  // Propagate 'inbounds' and metadata from existing instructions.
1942  // Note: using IRBuilder to create the constants for efficiency.
1943  SmallVector<Value *, 4> IndexC(GEP.indices());
1944  bool IsInBounds = GEP.isInBounds();
1945  Type *Ty = GEP.getSourceElementType();
1946  Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1947  Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1948  return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1949 }
1950 
1952  GEPOperator *Src) {
1953  // Combine Indices - If the source pointer to this getelementptr instruction
1954  // is a getelementptr instruction with matching element type, combine the
1955  // indices of the two getelementptr instructions into a single instruction.
1956  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1957  return nullptr;
1958 
1959  if (Src->getResultElementType() == GEP.getSourceElementType() &&
1960  Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
1961  Src->hasOneUse()) {
1962  Value *GO1 = GEP.getOperand(1);
1963  Value *SO1 = Src->getOperand(1);
1964 
1965  if (LI) {
1966  // Try to reassociate loop invariant GEP chains to enable LICM.
1967  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
1968  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
1969  // invariant: this breaks the dependence between GEPs and allows LICM
1970  // to hoist the invariant part out of the loop.
1971  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
1972  // We have to be careful here.
1973  // We have something like:
1974  // %src = getelementptr <ty>, <ty>* %base, <ty> %idx
1975  // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
1976  // If we just swap idx & idx2 then we could inadvertantly
1977  // change %src from a vector to a scalar, or vice versa.
1978  // Cases:
1979  // 1) %base a scalar & idx a scalar & idx2 a vector
1980  // => Swapping idx & idx2 turns %src into a vector type.
1981  // 2) %base a scalar & idx a vector & idx2 a scalar
1982  // => Swapping idx & idx2 turns %src in a scalar type
1983  // 3) %base, %idx, and %idx2 are scalars
1984  // => %src & %gep are scalars
1985  // => swapping idx & idx2 is safe
1986  // 4) %base a vector
1987  // => %src is a vector
1988  // => swapping idx & idx2 is safe.
1989  auto *SO0 = Src->getOperand(0);
1990  auto *SO0Ty = SO0->getType();
1991  if (!isa<VectorType>(GEP.getType()) || // case 3
1992  isa<VectorType>(SO0Ty)) { // case 4
1993  Src->setOperand(1, GO1);
1994  GEP.setOperand(1, SO1);
1995  return &GEP;
1996  } else {
1997  // Case 1 or 2
1998  // -- have to recreate %src & %gep
1999  // put NewSrc at same location as %src
2000  Builder.SetInsertPoint(cast<Instruction>(Src));
2001  Value *NewSrc =
2002  Builder.CreateGEP(GEP.getSourceElementType(), SO0, GO1,
2003  Src->getName(), Src->isInBounds());
2005  GEP.getSourceElementType(), NewSrc, {SO1});
2006  NewGEP->setIsInBounds(GEP.isInBounds());
2007  return NewGEP;
2008  }
2009  }
2010  }
2011  }
2012  }
2013 
2014  // Note that if our source is a gep chain itself then we wait for that
2015  // chain to be resolved before we perform this transformation. This
2016  // avoids us creating a TON of code in some cases.
2017  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
2018  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
2019  return nullptr; // Wait until our source is folded to completion.
2020 
2021  // For constant GEPs, use a more general offset-based folding approach.
2022  // Only do this for opaque pointers, as the result element type may change.
2023  Type *PtrTy = Src->getType()->getScalarType();
2024  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
2025  (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2026  // Split Src into a variable part and a constant suffix.
2027  gep_type_iterator GTI = gep_type_begin(*Src);
2028  Type *BaseType = GTI.getIndexedType();
2029  bool IsFirstType = true;
2030  unsigned NumVarIndices = 0;
2031  for (auto Pair : enumerate(Src->indices())) {
2032  if (!isa<ConstantInt>(Pair.value())) {
2033  BaseType = GTI.getIndexedType();
2034  IsFirstType = false;
2035  NumVarIndices = Pair.index() + 1;
2036  }
2037  ++GTI;
2038  }
2039 
2040  // Determine the offset for the constant suffix of Src.
2041  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
2042  if (NumVarIndices != Src->getNumIndices()) {
2043  // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2044  if (isa<ScalableVectorType>(BaseType))
2045  return nullptr;
2046 
2047  SmallVector<Value *> ConstantIndices;
2048  if (!IsFirstType)
2049  ConstantIndices.push_back(
2051  append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2052  Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2053  }
2054 
2055  // Add the offset for GEP (which is fully constant).
2056  if (!GEP.accumulateConstantOffset(DL, Offset))
2057  return nullptr;
2058 
2059  // Convert the total offset back into indices.
2060  SmallVector<APInt> ConstIndices =
2061  DL.getGEPIndicesForOffset(BaseType, Offset);
2062  if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero()))
2063  return nullptr;
2064 
2065  SmallVector<Value *> Indices;
2066  append_range(Indices, drop_end(Src->indices(),
2067  Src->getNumIndices() - NumVarIndices));
2068  for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType))
2069  Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2070 
2071  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2072  ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
2073  Src->getOperand(0), Indices,
2074  GEP.getName())
2075  : GetElementPtrInst::Create(Src->getSourceElementType(),
2076  Src->getOperand(0), Indices,
2077  GEP.getName());
2078  }
2079 
2080  if (Src->getResultElementType() != GEP.getSourceElementType())
2081  return nullptr;
2082 
2083  SmallVector<Value*, 8> Indices;
2084 
2085  // Find out whether the last index in the source GEP is a sequential idx.
2086  bool EndsWithSequential = false;
2087  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2088  I != E; ++I)
2089  EndsWithSequential = I.isSequential();
2090 
2091  // Can we combine the two pointer arithmetics offsets?
2092  if (EndsWithSequential) {
2093  // Replace: gep (gep %P, long B), long A, ...
2094  // With: T = long A+B; gep %P, T, ...
2095  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2096  Value *GO1 = GEP.getOperand(1);
2097 
2098  // If they aren't the same type, then the input hasn't been processed
2099  // by the loop above yet (which canonicalizes sequential index types to
2100  // intptr_t). Just avoid transforming this until the input has been
2101  // normalized.
2102  if (SO1->getType() != GO1->getType())
2103  return nullptr;
2104 
2105  Value *Sum =
2106  SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2107  // Only do the combine when we are sure the cost after the
2108  // merge is never more than that before the merge.
2109  if (Sum == nullptr)
2110  return nullptr;
2111 
2112  // Update the GEP in place if possible.
2113  if (Src->getNumOperands() == 2) {
2114  GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2115  replaceOperand(GEP, 0, Src->getOperand(0));
2116  replaceOperand(GEP, 1, Sum);
2117  return &GEP;
2118  }
2119  Indices.append(Src->op_begin()+1, Src->op_end()-1);
2120  Indices.push_back(Sum);
2121  Indices.append(GEP.op_begin()+2, GEP.op_end());
2122  } else if (isa<Constant>(*GEP.idx_begin()) &&
2123  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2124  Src->getNumOperands() != 1) {
2125  // Otherwise we can do the fold if the first index of the GEP is a zero
2126  Indices.append(Src->op_begin()+1, Src->op_end());
2127  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2128  }
2129 
2130  if (!Indices.empty())
2131  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2133  Src->getSourceElementType(), Src->getOperand(0), Indices,
2134  GEP.getName())
2135  : GetElementPtrInst::Create(Src->getSourceElementType(),
2136  Src->getOperand(0), Indices,
2137  GEP.getName());
2138 
2139  return nullptr;
2140 }
2141 
2142 // Note that we may have also stripped an address space cast in between.
2145  // With opaque pointers, there is no pointer element type we can use to
2146  // adjust the GEP type.
2147  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2148  if (SrcType->isOpaque())
2149  return nullptr;
2150 
2151  Type *GEPEltType = GEP.getSourceElementType();
2152  Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
2153  Value *SrcOp = BCI->getOperand(0);
2154 
2155  // GEP directly using the source operand if this GEP is accessing an element
2156  // of a bitcasted pointer to vector or array of the same dimensions:
2157  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2158  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2159  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
2160  const DataLayout &DL) {
2161  auto *VecVTy = cast<FixedVectorType>(VecTy);
2162  return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
2163  ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
2164  DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
2165  };
2166  if (GEP.getNumOperands() == 3 &&
2167  ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
2168  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
2169  (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
2170  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
2171 
2172  // Create a new GEP here, as using `setOperand()` followed by
2173  // `setSourceElementType()` won't actually update the type of the
2174  // existing GEP Value. Causing issues if this Value is accessed when
2175  // constructing an AddrSpaceCastInst
2176  SmallVector<Value *, 8> Indices(GEP.indices());
2177  Value *NGEP =
2178  Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
2179  NGEP->takeName(&GEP);
2180 
2181  // Preserve GEP address space to satisfy users
2182  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2183  return new AddrSpaceCastInst(NGEP, GEP.getType());
2184 
2185  return replaceInstUsesWith(GEP, NGEP);
2186  }
2187 
2188  // See if we can simplify:
2189  // X = bitcast A* to B*
2190  // Y = gep X, <...constant indices...>
2191  // into a gep of the original struct. This is important for SROA and alias
2192  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2193  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
2194  APInt Offset(OffsetBits, 0);
2195 
2196  // If the bitcast argument is an allocation, The bitcast is for convertion
2197  // to actual type of allocation. Removing such bitcasts, results in having
2198  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
2199  // struct or array hierarchy.
2200  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
2201  // a better chance to succeed.
2202  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
2203  !isAllocationFn(SrcOp, &TLI)) {
2204  // If this GEP instruction doesn't move the pointer, just replace the GEP
2205  // with a bitcast of the real input to the dest type.
2206  if (!Offset) {
2207  // If the bitcast is of an allocation, and the allocation will be
2208  // converted to match the type of the cast, don't touch this.
2209  if (isa<AllocaInst>(SrcOp)) {
2210  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2211  if (Instruction *I = visitBitCast(*BCI)) {
2212  if (I != BCI) {
2213  I->takeName(BCI);
2214  BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2215  replaceInstUsesWith(*BCI, I);
2216  }
2217  return &GEP;
2218  }
2219  }
2220 
2221  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2222  return new AddrSpaceCastInst(SrcOp, GEP.getType());
2223  return new BitCastInst(SrcOp, GEP.getType());
2224  }
2225 
2226  // Otherwise, if the offset is non-zero, we need to find out if there is a
2227  // field at Offset in 'A's type. If so, we can pull the cast through the
2228  // GEP.
2229  SmallVector<Value *, 8> NewIndices;
2230  if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
2231  Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
2232  GEP.isInBounds());
2233 
2234  if (NGEP->getType() == GEP.getType())
2235  return replaceInstUsesWith(GEP, NGEP);
2236  NGEP->takeName(&GEP);
2237 
2238  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2239  return new AddrSpaceCastInst(NGEP, GEP.getType());
2240  return new BitCastInst(NGEP, GEP.getType());
2241  }
2242  }
2243 
2244  return nullptr;
2245 }
2246 
2248  Value *PtrOp = GEP.getOperand(0);
2249  SmallVector<Value *, 8> Indices(GEP.indices());
2250  Type *GEPType = GEP.getType();
2251  Type *GEPEltType = GEP.getSourceElementType();
2252  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2253  if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2254  SQ.getWithInstruction(&GEP)))
2255  return replaceInstUsesWith(GEP, V);
2256 
2257  // For vector geps, use the generic demanded vector support.
2258  // Skip if GEP return type is scalable. The number of elements is unknown at
2259  // compile-time.
2260  if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2261  auto VWidth = GEPFVTy->getNumElements();
2262  APInt UndefElts(VWidth, 0);
2263  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2264  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2265  UndefElts)) {
2266  if (V != &GEP)
2267  return replaceInstUsesWith(GEP, V);
2268  return &GEP;
2269  }
2270 
2271  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2272  // possible (decide on canonical form for pointer broadcast), 3) exploit
2273  // undef elements to decrease demanded bits
2274  }
2275 
2276  // Eliminate unneeded casts for indices, and replace indices which displace
2277  // by multiples of a zero size type with zero.
2278  bool MadeChange = false;
2279 
2280  // Index width may not be the same width as pointer width.
2281  // Data layout chooses the right type based on supported integer types.
2282  Type *NewScalarIndexTy =
2283  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2284 
2286  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2287  ++I, ++GTI) {
2288  // Skip indices into struct types.
2289  if (GTI.isStruct())
2290  continue;
2291 
2292  Type *IndexTy = (*I)->getType();
2293  Type *NewIndexType =
2294  IndexTy->isVectorTy()
2295  ? VectorType::get(NewScalarIndexTy,
2296  cast<VectorType>(IndexTy)->getElementCount())
2297  : NewScalarIndexTy;
2298 
2299  // If the element type has zero size then any index over it is equivalent
2300  // to an index of zero, so replace it with zero if it is not zero already.
2301  Type *EltTy = GTI.getIndexedType();
2302  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2303  if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2304  *I = Constant::getNullValue(NewIndexType);
2305  MadeChange = true;
2306  }
2307 
2308  if (IndexTy != NewIndexType) {
2309  // If we are using a wider index than needed for this platform, shrink
2310  // it to what we need. If narrower, sign-extend it to what we need.
2311  // This explicit cast can make subsequent optimizations more obvious.
2312  *I = Builder.CreateIntCast(*I, NewIndexType, true);
2313  MadeChange = true;
2314  }
2315  }
2316  if (MadeChange)
2317  return &GEP;
2318 
2319  // Check to see if the inputs to the PHI node are getelementptr instructions.
2320  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2321  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2322  if (!Op1)
2323  return nullptr;
2324 
2325  // Don't fold a GEP into itself through a PHI node. This can only happen
2326  // through the back-edge of a loop. Folding a GEP into itself means that
2327  // the value of the previous iteration needs to be stored in the meantime,
2328  // thus requiring an additional register variable to be live, but not
2329  // actually achieving anything (the GEP still needs to be executed once per
2330  // loop iteration).
2331  if (Op1 == &GEP)
2332  return nullptr;
2333 
2334  int DI = -1;
2335 
2336  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2337  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2338  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2339  Op1->getSourceElementType() != Op2->getSourceElementType())
2340  return nullptr;
2341 
2342  // As for Op1 above, don't try to fold a GEP into itself.
2343  if (Op2 == &GEP)
2344  return nullptr;
2345 
2346  // Keep track of the type as we walk the GEP.
2347  Type *CurTy = nullptr;
2348 
2349  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2350  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2351  return nullptr;
2352 
2353  if (Op1->getOperand(J) != Op2->getOperand(J)) {
2354  if (DI == -1) {
2355  // We have not seen any differences yet in the GEPs feeding the
2356  // PHI yet, so we record this one if it is allowed to be a
2357  // variable.
2358 
2359  // The first two arguments can vary for any GEP, the rest have to be
2360  // static for struct slots
2361  if (J > 1) {
2362  assert(CurTy && "No current type?");
2363  if (CurTy->isStructTy())
2364  return nullptr;
2365  }
2366 
2367  DI = J;
2368  } else {
2369  // The GEP is different by more than one input. While this could be
2370  // extended to support GEPs that vary by more than one variable it
2371  // doesn't make sense since it greatly increases the complexity and
2372  // would result in an R+R+R addressing mode which no backend
2373  // directly supports and would need to be broken into several
2374  // simpler instructions anyway.
2375  return nullptr;
2376  }
2377  }
2378 
2379  // Sink down a layer of the type for the next iteration.
2380  if (J > 0) {
2381  if (J == 1) {
2382  CurTy = Op1->getSourceElementType();
2383  } else {
2384  CurTy =
2385  GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2386  }
2387  }
2388  }
2389  }
2390 
2391  // If not all GEPs are identical we'll have to create a new PHI node.
2392  // Check that the old PHI node has only one use so that it will get
2393  // removed.
2394  if (DI != -1 && !PN->hasOneUse())
2395  return nullptr;
2396 
2397  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2398  if (DI == -1) {
2399  // All the GEPs feeding the PHI are identical. Clone one down into our
2400  // BB so that it can be merged with the current GEP.
2401  } else {
2402  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2403  // into the current block so it can be merged, and create a new PHI to
2404  // set that index.
2405  PHINode *NewPN;
2406  {
2408  Builder.SetInsertPoint(PN);
2409  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2410  PN->getNumOperands());
2411  }
2412 
2413  for (auto &I : PN->operands())
2414  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2415  PN->getIncomingBlock(I));
2416 
2417  NewGEP->setOperand(DI, NewPN);
2418  }
2419 
2420  GEP.getParent()->getInstList().insert(
2421  GEP.getParent()->getFirstInsertionPt(), NewGEP);
2422  replaceOperand(GEP, 0, NewGEP);
2423  PtrOp = NewGEP;
2424  }
2425 
2426  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2427  if (Instruction *I = visitGEPOfGEP(GEP, Src))
2428  return I;
2429 
2430  // Skip if GEP source element type is scalable. The type alloc size is unknown
2431  // at compile-time.
2432  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2433  unsigned AS = GEP.getPointerAddressSpace();
2434  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2435  DL.getIndexSizeInBits(AS)) {
2436  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2437 
2438  bool Matched = false;
2439  uint64_t C;
2440  Value *V = nullptr;
2441  if (TyAllocSize == 1) {
2442  V = GEP.getOperand(1);
2443  Matched = true;
2444  } else if (match(GEP.getOperand(1),
2445  m_AShr(m_Value(V), m_ConstantInt(C)))) {
2446  if (TyAllocSize == 1ULL << C)
2447  Matched = true;
2448  } else if (match(GEP.getOperand(1),
2449  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2450  if (TyAllocSize == C)
2451  Matched = true;
2452  }
2453 
2454  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2455  // only if both point to the same underlying object (otherwise provenance
2456  // is not necessarily retained).
2457  Value *Y;
2458  Value *X = GEP.getOperand(0);
2459  if (Matched &&
2463  }
2464  }
2465 
2466  // We do not handle pointer-vector geps here.
2467  if (GEPType->isVectorTy())
2468  return nullptr;
2469 
2470  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
2471  Value *StrippedPtr = PtrOp->stripPointerCasts();
2472  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
2473 
2474  // TODO: The basic approach of these folds is not compatible with opaque
2475  // pointers, because we can't use bitcasts as a hint for a desirable GEP
2476  // type. Instead, we should perform canonicalization directly on the GEP
2477  // type. For now, skip these.
2478  if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
2479  bool HasZeroPointerIndex = false;
2480  Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
2481 
2482  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
2483  HasZeroPointerIndex = C->isZero();
2484 
2485  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
2486  // into : GEP [10 x i8]* X, i32 0, ...
2487  //
2488  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
2489  // into : GEP i8* X, ...
2490  //
2491  // This occurs when the program declares an array extern like "int X[];"
2492  if (HasZeroPointerIndex) {
2493  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
2494  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
2495  if (CATy->getElementType() == StrippedPtrEltTy) {
2496  // -> GEP i8* X, ...
2497  SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
2499  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
2500  Res->setIsInBounds(GEP.isInBounds());
2501  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
2502  return Res;
2503  // Insert Res, and create an addrspacecast.
2504  // e.g.,
2505  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
2506  // ->
2507  // %0 = GEP i8 addrspace(1)* X, ...
2508  // addrspacecast i8 addrspace(1)* %0 to i8*
2509  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
2510  }
2511 
2512  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
2513  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
2514  if (CATy->getElementType() == XATy->getElementType()) {
2515  // -> GEP [10 x i8]* X, i32 0, ...
2516  // At this point, we know that the cast source type is a pointer
2517  // to an array of the same type as the destination pointer
2518  // array. Because the array type is never stepped over (there
2519  // is a leading zero) we can fold the cast into this GEP.
2520  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
2521  GEP.setSourceElementType(XATy);
2522  return replaceOperand(GEP, 0, StrippedPtr);
2523  }
2524  // Cannot replace the base pointer directly because StrippedPtr's
2525  // address space is different. Instead, create a new GEP followed by
2526  // an addrspacecast.
2527  // e.g.,
2528  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2529  // i32 0, ...
2530  // ->
2531  // %0 = GEP [10 x i8] addrspace(1)* X, ...
2532  // addrspacecast i8 addrspace(1)* %0 to i8*
2533  SmallVector<Value *, 8> Idx(GEP.indices());
2534  Value *NewGEP =
2535  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2536  GEP.getName(), GEP.isInBounds());
2537  return new AddrSpaceCastInst(NewGEP, GEPType);
2538  }
2539  }
2540  }
2541  } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
2542  // Skip if GEP source element type is scalable. The type alloc size is
2543  // unknown at compile-time.
2544  // Transform things like: %t = getelementptr i32*
2545  // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
2546  // x i32]* %str, i32 0, i32 %V; bitcast
2547  if (StrippedPtrEltTy->isArrayTy() &&
2548  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2549  DL.getTypeAllocSize(GEPEltType)) {
2550  Type *IdxType = DL.getIndexType(GEPType);
2551  Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
2552  Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2553  GEP.getName(), GEP.isInBounds());
2554 
2555  // V and GEP are both pointer types --> BitCast
2556  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2557  }
2558 
2559  // Transform things like:
2560  // %V = mul i64 %N, 4
2561  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2562  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2563  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2564  // Check that changing the type amounts to dividing the index by a scale
2565  // factor.
2566  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2567  uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
2568  if (ResSize && SrcSize % ResSize == 0) {
2569  Value *Idx = GEP.getOperand(1);
2570  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2571  uint64_t Scale = SrcSize / ResSize;
2572 
2573  // Earlier transforms ensure that the index has the right type
2574  // according to Data Layout, which considerably simplifies the
2575  // logic by eliminating implicit casts.
2576  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2577  "Index type does not match the Data Layout preferences");
2578 
2579  bool NSW;
2580  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2581  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2582  // If the multiplication NewIdx * Scale may overflow then the new
2583  // GEP may not be "inbounds".
2584  Value *NewGEP =
2585  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2586  GEP.getName(), GEP.isInBounds() && NSW);
2587 
2588  // The NewGEP must be pointer typed, so must the old one -> BitCast
2590  GEPType);
2591  }
2592  }
2593  }
2594 
2595  // Similarly, transform things like:
2596  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2597  // (where tmp = 8*tmp2) into:
2598  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2599  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2600  StrippedPtrEltTy->isArrayTy()) {
2601  // Check that changing to the array element type amounts to dividing the
2602  // index by a scale factor.
2603  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2604  uint64_t ArrayEltSize =
2605  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
2606  .getFixedSize();
2607  if (ResSize && ArrayEltSize % ResSize == 0) {
2608  Value *Idx = GEP.getOperand(1);
2609  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2610  uint64_t Scale = ArrayEltSize / ResSize;
2611 
2612  // Earlier transforms ensure that the index has the right type
2613  // according to the Data Layout, which considerably simplifies
2614  // the logic by eliminating implicit casts.
2615  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2616  "Index type does not match the Data Layout preferences");
2617 
2618  bool NSW;
2619  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2620  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2621  // If the multiplication NewIdx * Scale may overflow then the new
2622  // GEP may not be "inbounds".
2623  Type *IndTy = DL.getIndexType(GEPType);
2624  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2625 
2626  Value *NewGEP =
2627  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2628  GEP.getName(), GEP.isInBounds() && NSW);
2629  // The NewGEP must be pointer typed, so must the old one -> BitCast
2631  GEPType);
2632  }
2633  }
2634  }
2635  }
2636  }
2637 
2638  // addrspacecast between types is canonicalized as a bitcast, then an
2639  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2640  // through the addrspacecast.
2641  Value *ASCStrippedPtrOp = PtrOp;
2642  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2643  // X = bitcast A addrspace(1)* to B addrspace(1)*
2644  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2645  // Z = gep Y, <...constant indices...>
2646  // Into an addrspacecasted GEP of the struct.
2647  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2648  ASCStrippedPtrOp = BC;
2649  }
2650 
2651  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
2652  if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
2653  return I;
2654 
2655  if (!GEP.isInBounds()) {
2656  unsigned IdxWidth =
2657  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2658  APInt BasePtrOffset(IdxWidth, 0);
2659  Value *UnderlyingPtrOp =
2661  BasePtrOffset);
2662  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2663  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2664  BasePtrOffset.isNonNegative()) {
2665  APInt AllocSize(
2666  IdxWidth,
2667  DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
2668  if (BasePtrOffset.ule(AllocSize)) {
2670  GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2671  }
2672  }
2673  }
2674  }
2675 
2676  if (Instruction *R = foldSelectGEP(GEP, Builder))
2677  return R;
2678 
2679  return nullptr;
2680 }
2681 
2683  Instruction *AI) {
2684  if (isa<ConstantPointerNull>(V))
2685  return true;
2686  if (auto *LI = dyn_cast<LoadInst>(V))
2687  return isa<GlobalVariable>(LI->getPointerOperand());
2688  // Two distinct allocations will never be equal.
2689  return isAllocLikeFn(V, &TLI) && V != AI;
2690 }
2691 
2692 /// Given a call CB which uses an address UsedV, return true if we can prove the
2693 /// call's only possible effect is storing to V.
2694 static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2695  const TargetLibraryInfo &TLI) {
2696  if (!CB.use_empty())
2697  // TODO: add recursion if returned attribute is present
2698  return false;
2699 
2700  if (CB.isTerminator())
2701  // TODO: remove implementation restriction
2702  return false;
2703 
2704  if (!CB.willReturn() || !CB.doesNotThrow())
2705  return false;
2706 
2707  // If the only possible side effect of the call is writing to the alloca,
2708  // and the result isn't used, we can safely remove any reads implied by the
2709  // call including those which might read the alloca itself.
2711  return Dest && Dest->Ptr == UsedV;
2712 }
2713 
2716  const TargetLibraryInfo &TLI) {
2718  const Optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2719  Worklist.push_back(AI);
2720 
2721  do {
2722  Instruction *PI = Worklist.pop_back_val();
2723  for (User *U : PI->users()) {
2724  Instruction *I = cast<Instruction>(U);
2725  switch (I->getOpcode()) {
2726  default:
2727  // Give up the moment we see something we can't handle.
2728  return false;
2729 
2730  case Instruction::AddrSpaceCast:
2731  case Instruction::BitCast:
2732  case Instruction::GetElementPtr:
2733  Users.emplace_back(I);
2734  Worklist.push_back(I);
2735  continue;
2736 
2737  case Instruction::ICmp: {
2738  ICmpInst *ICI = cast<ICmpInst>(I);
2739  // We can fold eq/ne comparisons with null to false/true, respectively.
2740  // We also fold comparisons in some conditions provided the alloc has
2741  // not escaped (see isNeverEqualToUnescapedAlloc).
2742  if (!ICI->isEquality())
2743  return false;
2744  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2745  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2746  return false;
2747  Users.emplace_back(I);
2748  continue;
2749  }
2750 
2751  case Instruction::Call:
2752  // Ignore no-op and store intrinsics.
2753  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2754  switch (II->getIntrinsicID()) {
2755  default:
2756  return false;
2757 
2758  case Intrinsic::memmove:
2759  case Intrinsic::memcpy:
2760  case Intrinsic::memset: {
2761  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2762  if (MI->isVolatile() || MI->getRawDest() != PI)
2763  return false;
2765  }
2766  case Intrinsic::assume:
2767  case Intrinsic::invariant_start:
2768  case Intrinsic::invariant_end:
2769  case Intrinsic::lifetime_start:
2770  case Intrinsic::lifetime_end:
2771  case Intrinsic::objectsize:
2772  Users.emplace_back(I);
2773  continue;
2774  case Intrinsic::launder_invariant_group:
2775  case Intrinsic::strip_invariant_group:
2776  Users.emplace_back(I);
2777  Worklist.push_back(I);
2778  continue;
2779  }
2780  }
2781 
2782  if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2783  Users.emplace_back(I);
2784  continue;
2785  }
2786 
2787  if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) {
2788  assert(Family);
2789  Users.emplace_back(I);
2790  continue;
2791  }
2792 
2793  if (isReallocLikeFn(I, &TLI) &&
2794  getAllocationFamily(I, &TLI) == Family) {
2795  assert(Family);
2796  Users.emplace_back(I);
2797  Worklist.push_back(I);
2798  continue;
2799  }
2800 
2801  return false;
2802 
2803  case Instruction::Store: {
2804  StoreInst *SI = cast<StoreInst>(I);
2805  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2806  return false;
2807  Users.emplace_back(I);
2808  continue;
2809  }
2810  }
2811  llvm_unreachable("missing a return?");
2812  }
2813  } while (!Worklist.empty());
2814  return true;
2815 }
2816 
2818  assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI));
2819 
2820  // If we have a malloc call which is only used in any amount of comparisons to
2821  // null and free calls, delete the calls and replace the comparisons with true
2822  // or false as appropriate.
2823 
2824  // This is based on the principle that we can substitute our own allocation
2825  // function (which will never return null) rather than knowledge of the
2826  // specific function being called. In some sense this can change the permitted
2827  // outputs of a program (when we convert a malloc to an alloca, the fact that
2828  // the allocation is now on the stack is potentially visible, for example),
2829  // but we believe in a permissible manner.
2831 
2832  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2833  // before each store.
2835  std::unique_ptr<DIBuilder> DIB;
2836  if (isa<AllocaInst>(MI)) {
2837  findDbgUsers(DVIs, &MI);
2838  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2839  }
2840 
2841  if (isAllocSiteRemovable(&MI, Users, TLI)) {
2842  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2843  // Lowering all @llvm.objectsize calls first because they may
2844  // use a bitcast/GEP of the alloca we are removing.
2845  if (!Users[i])
2846  continue;
2847 
2848  Instruction *I = cast<Instruction>(&*Users[i]);
2849 
2850  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2851  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2852  Value *Result =
2853  lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2854  replaceInstUsesWith(*I, Result);
2855  eraseInstFromFunction(*I);
2856  Users[i] = nullptr; // Skip examining in the next loop.
2857  }
2858  }
2859  }
2860  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2861  if (!Users[i])
2862  continue;
2863 
2864  Instruction *I = cast<Instruction>(&*Users[i]);
2865 
2866  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2867  replaceInstUsesWith(*C,
2868  ConstantInt::get(Type::getInt1Ty(C->getContext()),
2869  C->isFalseWhenEqual()));
2870  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2871  for (auto *DVI : DVIs)
2872  if (DVI->isAddressOfVariable())
2873  ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2874  } else {
2875  // Casts, GEP, or anything else: we're about to delete this instruction,
2876  // so it can not have any valid uses.
2877  replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2878  }
2879  eraseInstFromFunction(*I);
2880  }
2881 
2882  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2883  // Replace invoke with a NOP intrinsic to maintain the original CFG
2884  Module *M = II->getModule();
2885  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2886  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2887  None, "", II->getParent());
2888  }
2889 
2890  // Remove debug intrinsics which describe the value contained within the
2891  // alloca. In addition to removing dbg.{declare,addr} which simply point to
2892  // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2893  //
2894  // ```
2895  // define void @foo(i32 %0) {
2896  // %a = alloca i32 ; Deleted.
2897  // store i32 %0, i32* %a
2898  // dbg.value(i32 %0, "arg0") ; Not deleted.
2899  // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2900  // call void @trivially_inlinable_no_op(i32* %a)
2901  // ret void
2902  // }
2903  // ```
2904  //
2905  // This may not be required if we stop describing the contents of allocas
2906  // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2907  // the LowerDbgDeclare utility.
2908  //
2909  // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2910  // "arg0" dbg.value may be stale after the call. However, failing to remove
2911  // the DW_OP_deref dbg.value causes large gaps in location coverage.
2912  for (auto *DVI : DVIs)
2913  if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2914  DVI->eraseFromParent();
2915 
2916  return eraseInstFromFunction(MI);
2917  }
2918  return nullptr;
2919 }
2920 
2921 /// Move the call to free before a NULL test.
2922 ///
2923 /// Check if this free is accessed after its argument has been test
2924 /// against NULL (property 0).
2925 /// If yes, it is legal to move this call in its predecessor block.
2926 ///
2927 /// The move is performed only if the block containing the call to free
2928 /// will be removed, i.e.:
2929 /// 1. it has only one predecessor P, and P has two successors
2930 /// 2. it contains the call, noops, and an unconditional branch
2931 /// 3. its successor is the same as its predecessor's successor
2932 ///
2933 /// The profitability is out-of concern here and this function should
2934 /// be called only if the caller knows this transformation would be
2935 /// profitable (e.g., for code size).
2937  const DataLayout &DL) {
2938  Value *Op = FI.getArgOperand(0);
2939  BasicBlock *FreeInstrBB = FI.getParent();
2940  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2941 
2942  // Validate part of constraint #1: Only one predecessor
2943  // FIXME: We can extend the number of predecessor, but in that case, we
2944  // would duplicate the call to free in each predecessor and it may
2945  // not be profitable even for code size.
2946  if (!PredBB)
2947  return nullptr;
2948 
2949  // Validate constraint #2: Does this block contains only the call to
2950  // free, noops, and an unconditional branch?
2951  BasicBlock *SuccBB;
2952  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2953  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2954  return nullptr;
2955 
2956  // If there are only 2 instructions in the block, at this point,
2957  // this is the call to free and unconditional.
2958  // If there are more than 2 instructions, check that they are noops
2959  // i.e., they won't hurt the performance of the generated code.
2960  if (FreeInstrBB->size() != 2) {
2961  for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2962  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2963  continue;
2964  auto *Cast = dyn_cast<CastInst>(&Inst);
2965  if (!Cast || !Cast->isNoopCast(DL))
2966  return nullptr;
2967  }
2968  }
2969  // Validate the rest of constraint #1 by matching on the pred branch.
2970  Instruction *TI = PredBB->getTerminator();
2971  BasicBlock *TrueBB, *FalseBB;
2972  ICmpInst::Predicate Pred;
2973  if (!match(TI, m_Br(m_ICmp(Pred,
2975  m_Specific(Op->stripPointerCasts())),
2976  m_Zero()),
2977  TrueBB, FalseBB)))
2978  return nullptr;
2979  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2980  return nullptr;
2981 
2982  // Validate constraint #3: Ensure the null case just falls through.
2983  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2984  return nullptr;
2985  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2986  "Broken CFG: missing edge from predecessor to successor");
2987 
2988  // At this point, we know that everything in FreeInstrBB can be moved
2989  // before TI.
2990  for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2991  if (&Instr == FreeInstrBBTerminator)
2992  break;
2993  Instr.moveBefore(TI);
2994  }
2995  assert(FreeInstrBB->size() == 1 &&
2996  "Only the branch instruction should remain");
2997 
2998  // Now that we've moved the call to free before the NULL check, we have to
2999  // remove any attributes on its parameter that imply it's non-null, because
3000  // those attributes might have only been valid because of the NULL check, and
3001  // we can get miscompiles if we keep them. This is conservative if non-null is
3002  // also implied by something other than the NULL check, but it's guaranteed to
3003  // be correct, and the conservativeness won't matter in practice, since the
3004  // attributes are irrelevant for the call to free itself and the pointer
3005  // shouldn't be used after the call.
3007  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3008  Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3009  if (Dereferenceable.isValid()) {
3010  uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3011  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3012  Attribute::Dereferenceable);
3014  }
3015  FI.setAttributes(Attrs);
3016 
3017  return &FI;
3018 }
3019 
3021  Value *Op = FI.getArgOperand(0);
3022 
3023  // free undef -> unreachable.
3024  if (isa<UndefValue>(Op)) {
3025  // Leave a marker since we can't modify the CFG here.
3026  CreateNonTerminatorUnreachable(&FI);
3027  return eraseInstFromFunction(FI);
3028  }
3029 
3030  // If we have 'free null' delete the instruction. This can happen in stl code
3031  // when lots of inlining happens.
3032  if (isa<ConstantPointerNull>(Op))
3033  return eraseInstFromFunction(FI);
3034 
3035  // If we had free(realloc(...)) with no intervening uses, then eliminate the
3036  // realloc() entirely.
3037  if (CallInst *CI = dyn_cast<CallInst>(Op)) {
3038  if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) {
3039  return eraseInstFromFunction(
3040  *replaceInstUsesWith(*CI, CI->getOperand(0)));
3041  }
3042  }
3043 
3044  // If we optimize for code size, try to move the call to free before the null
3045  // test so that simplify cfg can remove the empty block and dead code
3046  // elimination the branch. I.e., helps to turn something like:
3047  // if (foo) free(foo);
3048  // into
3049  // free(foo);
3050  //
3051  // Note that we can only do this for 'free' and not for any flavor of
3052  // 'operator delete'; there is no 'operator delete' symbol for which we are
3053  // permitted to invent a call, even if we're passing in a null pointer.
3054  if (MinimizeSize) {
3055  LibFunc Func;
3056  if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3058  return I;
3059  }
3060 
3061  return nullptr;
3062 }
3063 
3064 static bool isMustTailCall(Value *V) {
3065  if (auto *CI = dyn_cast<CallInst>(V))
3066  return CI->isMustTailCall();
3067  return false;
3068 }
3069 
3071  if (RI.getNumOperands() == 0) // ret void
3072  return nullptr;
3073 
3074  Value *ResultOp = RI.getOperand(0);
3075  Type *VTy = ResultOp->getType();
3076  if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
3077  return nullptr;
3078 
3079  // Don't replace result of musttail calls.
3080  if (isMustTailCall(ResultOp))
3081  return nullptr;
3082 
3083  // There might be assume intrinsics dominating this return that completely
3084  // determine the value. If so, constant fold it.
3085  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
3086  if (Known.isConstant())
3087  return replaceOperand(RI, 0,
3088  Constant::getIntegerValue(VTy, Known.getConstant()));
3089 
3090  return nullptr;
3091 }
3092 
3093 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3095  // Try to remove the previous instruction if it must lead to unreachable.
3096  // This includes instructions like stores and "llvm.assume" that may not get
3097  // removed by simple dead code elimination.
3098  while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3099  // While we theoretically can erase EH, that would result in a block that
3100  // used to start with an EH no longer starting with EH, which is invalid.
3101  // To make it valid, we'd need to fixup predecessors to no longer refer to
3102  // this block, but that changes CFG, which is not allowed in InstCombine.
3103  if (Prev->isEHPad())
3104  return nullptr; // Can not drop any more instructions. We're done here.
3105 
3107  return nullptr; // Can not drop any more instructions. We're done here.
3108  // Otherwise, this instruction can be freely erased,
3109  // even if it is not side-effect free.
3110 
3111  // A value may still have uses before we process it here (for example, in
3112  // another unreachable block), so convert those to poison.
3113  replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3114  eraseInstFromFunction(*Prev);
3115  }
3116  assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
3117  // FIXME: recurse into unconditional predecessors?
3118  return nullptr;
3119 }
3120 
3122  assert(BI.isUnconditional() && "Only for unconditional branches.");
3123 
3124  // If this store is the second-to-last instruction in the basic block
3125  // (excluding debug info and bitcasts of pointers) and if the block ends with
3126  // an unconditional branch, try to move the store to the successor block.
3127 
3128  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3129  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3130  return BBI->isDebugOrPseudoInst() ||
3131  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3132  };
3133 
3134  BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3135  do {
3136  if (BBI != FirstInstr)
3137  --BBI;
3138  } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3139 
3140  return dyn_cast<StoreInst>(BBI);
3141  };
3142 
3143  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3144  if (mergeStoreIntoSuccessor(*SI))
3145  return &BI;
3146 
3147  return nullptr;
3148 }
3149 
3151  if (BI.isUnconditional())
3152  return visitUnconditionalBranchInst(BI);
3153 
3154  // Change br (not X), label True, label False to: br X, label False, True
3155  Value *X = nullptr;
3156  if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
3157  !isa<Constant>(X)) {
3158  // Swap Destinations and condition...
3159  BI.swapSuccessors();
3160  return replaceOperand(BI, 0, X);
3161  }
3162 
3163  // If the condition is irrelevant, remove the use so that other
3164  // transforms on the condition become more effective.
3165  if (!isa<ConstantInt>(BI.getCondition()) &&
3166  BI.getSuccessor(0) == BI.getSuccessor(1))
3167  return replaceOperand(
3168  BI, 0, ConstantInt::getFalse(BI.getCondition()->getType()));
3169 
3170  // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3171  CmpInst::Predicate Pred;
3172  if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())),
3173  m_BasicBlock(), m_BasicBlock())) &&
3174  !isCanonicalPredicate(Pred)) {
3175  // Swap destinations and condition.
3176  CmpInst *Cond = cast<CmpInst>(BI.getCondition());
3177  Cond->setPredicate(CmpInst::getInversePredicate(Pred));
3178  BI.swapSuccessors();
3179  Worklist.push(Cond);
3180  return &BI;
3181  }
3182 
3183  return nullptr;
3184 }
3185 
3187  Value *Cond = SI.getCondition();
3188  Value *Op0;
3189  ConstantInt *AddRHS;
3190  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3191  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3192  for (auto Case : SI.cases()) {
3193  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3194  assert(isa<ConstantInt>(NewCase) &&
3195  "Result of expression should be constant");
3196  Case.setValue(cast<ConstantInt>(NewCase));
3197  }
3198  return replaceOperand(SI, 0, Op0);
3199  }
3200 
3201  KnownBits Known = computeKnownBits(Cond, 0, &SI);
3202  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3203  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3204 
3205  // Compute the number of leading bits we can ignore.
3206  // TODO: A better way to determine this would use ComputeNumSignBits().
3207  for (auto &C : SI.cases()) {
3208  LeadingKnownZeros = std::min(
3209  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
3210  LeadingKnownOnes = std::min(
3211  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
3212  }
3213 
3214  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3215 
3216  // Shrink the condition operand if the new type is smaller than the old type.
3217  // But do not shrink to a non-standard type, because backend can't generate
3218  // good code for that yet.
3219  // TODO: We can make it aggressive again after fixing PR39569.
3220  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3221  shouldChangeType(Known.getBitWidth(), NewWidth)) {
3222  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3223  Builder.SetInsertPoint(&SI);
3224  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3225 
3226  for (auto Case : SI.cases()) {
3227  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3228  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3229  }
3230  return replaceOperand(SI, 0, NewCond);
3231  }
3232 
3233  return nullptr;
3234 }
3235 
3237  Value *Agg = EV.getAggregateOperand();
3238 
3239  if (!EV.hasIndices())
3240  return replaceInstUsesWith(EV, Agg);
3241 
3242  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
3243  SQ.getWithInstruction(&EV)))
3244  return replaceInstUsesWith(EV, V);
3245 
3246  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3247  // We're extracting from an insertvalue instruction, compare the indices
3248  const unsigned *exti, *exte, *insi, *inse;
3249  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3250  exte = EV.idx_end(), inse = IV->idx_end();
3251  exti != exte && insi != inse;
3252  ++exti, ++insi) {
3253  if (*insi != *exti)
3254  // The insert and extract both reference distinctly different elements.
3255  // This means the extract is not influenced by the insert, and we can
3256  // replace the aggregate operand of the extract with the aggregate
3257  // operand of the insert. i.e., replace
3258  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3259  // %E = extractvalue { i32, { i32 } } %I, 0
3260  // with
3261  // %E = extractvalue { i32, { i32 } } %A, 0
3262  return ExtractValueInst::Create(IV->getAggregateOperand(),
3263  EV.getIndices());
3264  }
3265  if (exti == exte && insi == inse)
3266  // Both iterators are at the end: Index lists are identical. Replace
3267  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3268  // %C = extractvalue { i32, { i32 } } %B, 1, 0
3269  // with "i32 42"
3270  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3271  if (exti == exte) {
3272  // The extract list is a prefix of the insert list. i.e. replace
3273  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3274  // %E = extractvalue { i32, { i32 } } %I, 1
3275  // with
3276  // %X = extractvalue { i32, { i32 } } %A, 1
3277  // %E = insertvalue { i32 } %X, i32 42, 0
3278  // by switching the order of the insert and extract (though the
3279  // insertvalue should be left in, since it may have other uses).
3280  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3281  EV.getIndices());
3282  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3283  makeArrayRef(insi, inse));
3284  }
3285  if (insi == inse)
3286  // The insert list is a prefix of the extract list
3287  // We can simply remove the common indices from the extract and make it
3288  // operate on the inserted value instead of the insertvalue result.
3289  // i.e., replace
3290  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3291  // %E = extractvalue { i32, { i32 } } %I, 1, 0
3292  // with
3293  // %E extractvalue { i32 } { i32 42 }, 0
3294  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3295  makeArrayRef(exti, exte));
3296  }
3297  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
3298  // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3299  Intrinsic::ID OvID = WO->getIntrinsicID();
3300  if (*EV.idx_begin() == 0 &&
3301  (OvID == Intrinsic::smul_with_overflow ||
3302  OvID == Intrinsic::umul_with_overflow) &&
3303  match(WO->getArgOperand(1), m_AllOnes())) {
3304  return BinaryOperator::CreateNeg(WO->getArgOperand(0));
3305  }
3306 
3307  // We're extracting from an overflow intrinsic, see if we're the only user,
3308  // which allows us to simplify multiple result intrinsics to simpler
3309  // things that just get one value.
3310  if (WO->hasOneUse()) {
3311  // Check if we're grabbing only the result of a 'with overflow' intrinsic
3312  // and replace it with a traditional binary instruction.
3313  if (*EV.idx_begin() == 0) {
3314  Instruction::BinaryOps BinOp = WO->getBinaryOp();
3315  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3316  // Replace the old instruction's uses with poison.
3317  replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3318  eraseInstFromFunction(*WO);
3319  return BinaryOperator::Create(BinOp, LHS, RHS);
3320  }
3321 
3322  assert(*EV.idx_begin() == 1 &&
3323  "unexpected extract index for overflow inst");
3324 
3325  // If only the overflow result is used, and the right hand side is a
3326  // constant (or constant splat), we can remove the intrinsic by directly
3327  // checking for overflow.
3328  const APInt *C;
3329  if (match(WO->getRHS(), m_APInt(C))) {
3330  // Compute the no-wrap range for LHS given RHS=C, then construct an
3331  // equivalent icmp, potentially using an offset.
3332  ConstantRange NWR =
3333  ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
3334  WO->getNoWrapKind());
3335 
3336  CmpInst::Predicate Pred;
3337  APInt NewRHSC, Offset;
3338  NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3339  auto *OpTy = WO->getRHS()->getType();
3340  auto *NewLHS = WO->getLHS();
3341  if (Offset != 0)
3342  NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3343  return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3344  ConstantInt::get(OpTy, NewRHSC));
3345  }
3346  }
3347  }
3348  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
3349  // If the (non-volatile) load only has one use, we can rewrite this to a
3350  // load from a GEP. This reduces the size of the load. If a load is used
3351  // only by extractvalue instructions then this either must have been
3352  // optimized before, or it is a struct with padding, in which case we
3353  // don't want to do the transformation as it loses padding knowledge.
3354  if (L->isSimple() && L->hasOneUse()) {
3355  // extractvalue has integer indices, getelementptr has Value*s. Convert.
3356  SmallVector<Value*, 4> Indices;
3357  // Prefix an i32 0 since we need the first element.
3358  Indices.push_back(Builder.getInt32(0));
3359  for (unsigned Idx : EV.indices())
3360  Indices.push_back(Builder.getInt32(Idx));
3361 
3362  // We need to insert these at the location of the old load, not at that of
3363  // the extractvalue.
3364  Builder.SetInsertPoint(L);
3365  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3366  L->getPointerOperand(), Indices);
3367  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
3368  // Whatever aliasing information we had for the orignal load must also
3369  // hold for the smaller load, so propagate the annotations.
3370  NL->setAAMetadata(L->getAAMetadata());
3371  // Returning the load directly will cause the main loop to insert it in
3372  // the wrong spot, so use replaceInstUsesWith().
3373  return replaceInstUsesWith(EV, NL);
3374  }
3375  // We could simplify extracts from other values. Note that nested extracts may
3376  // already be simplified implicitly by the above: extract (extract (insert) )
3377  // will be translated into extract ( insert ( extract ) ) first and then just
3378  // the value inserted, if appropriate. Similarly for extracts from single-use
3379  // loads: extract (extract (load)) will be translated to extract (load (gep))
3380  // and if again single-use then via load (gep (gep)) to load (gep).
3381  // However, double extracts from e.g. function arguments or return values
3382  // aren't handled yet.
3383  return nullptr;
3384 }
3385 
3386 /// Return 'true' if the given typeinfo will match anything.
3387 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3388  switch (Personality) {
3389  case EHPersonality::GNU_C:
3391  case EHPersonality::Rust:
3392  // The GCC C EH and Rust personality only exists to support cleanups, so
3393  // it's not clear what the semantics of catch clauses are.
3394  return false;
3396  return false;
3398  // While __gnat_all_others_value will match any Ada exception, it doesn't
3399  // match foreign exceptions (or didn't, before gcc-4.7).
3400  return false;
3409  case EHPersonality::XL_CXX:
3410  return TypeInfo->isNullValue();
3411  }
3412  llvm_unreachable("invalid enum");
3413 }
3414 
3415 static bool shorter_filter(const Value *LHS, const Value *RHS) {
3416  return
3417  cast<ArrayType>(LHS->getType())->getNumElements()
3418  <
3419  cast<ArrayType>(RHS->getType())->getNumElements();
3420 }
3421 
3423  // The logic here should be correct for any real-world personality function.
3424  // However if that turns out not to be true, the offending logic can always
3425  // be conditioned on the personality function, like the catch-all logic is.
3426  EHPersonality Personality =
3428 
3429  // Simplify the list of clauses, eg by removing repeated catch clauses
3430  // (these are often created by inlining).
3431  bool MakeNewInstruction = false; // If true, recreate using the following:
3432  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3433  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3434 
3435  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3436  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3437  bool isLastClause = i + 1 == e;
3438  if (LI.isCatch(i)) {
3439  // A catch clause.
3440  Constant *CatchClause = LI.getClause(i);
3441  Constant *TypeInfo = CatchClause->stripPointerCasts();
3442 
3443  // If we already saw this clause, there is no point in having a second
3444  // copy of it.
3445  if (AlreadyCaught.insert(TypeInfo).second) {
3446  // This catch clause was not already seen.
3447  NewClauses.push_back(CatchClause);
3448  } else {
3449  // Repeated catch clause - drop the redundant copy.
3450  MakeNewInstruction = true;
3451  }
3452 
3453  // If this is a catch-all then there is no point in keeping any following
3454  // clauses or marking the landingpad as having a cleanup.
3455  if (isCatchAll(Personality, TypeInfo)) {
3456  if (!isLastClause)
3457  MakeNewInstruction = true;
3458  CleanupFlag = false;
3459  break;
3460  }
3461  } else {
3462  // A filter clause. If any of the filter elements were already caught
3463  // then they can be dropped from the filter. It is tempting to try to
3464  // exploit the filter further by saying that any typeinfo that does not
3465  // occur in the filter can't be caught later (and thus can be dropped).
3466  // However this would be wrong, since typeinfos can match without being
3467  // equal (for example if one represents a C++ class, and the other some
3468  // class derived from it).
3469  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3470  Constant *FilterClause = LI.getClause(i);
3471  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3472  unsigned NumTypeInfos = FilterType->getNumElements();
3473 
3474  // An empty filter catches everything, so there is no point in keeping any
3475  // following clauses or marking the landingpad as having a cleanup. By
3476  // dealing with this case here the following code is made a bit simpler.
3477  if (!NumTypeInfos) {
3478  NewClauses.push_back(FilterClause);
3479  if (!isLastClause)
3480  MakeNewInstruction = true;
3481  CleanupFlag = false;
3482  break;
3483  }
3484 
3485  bool MakeNewFilter = false; // If true, make a new filter.
3486  SmallVector<Constant *, 16> NewFilterElts; // New elements.
3487  if (isa<ConstantAggregateZero>(FilterClause)) {
3488  // Not an empty filter - it contains at least one null typeinfo.
3489  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3490  Constant *TypeInfo =
3491  Constant::getNullValue(FilterType->getElementType());
3492  // If this typeinfo is a catch-all then the filter can never match.
3493  if (isCatchAll(Personality, TypeInfo)) {
3494  // Throw the filter away.
3495  MakeNewInstruction = true;
3496  continue;
3497  }
3498 
3499  // There is no point in having multiple copies of this typeinfo, so
3500  // discard all but the first copy if there is more than one.
3501  NewFilterElts.push_back(TypeInfo);
3502  if (NumTypeInfos > 1)
3503  MakeNewFilter = true;
3504  } else {
3505  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3506  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3507  NewFilterElts.reserve(NumTypeInfos);
3508 
3509  // Remove any filter elements that were already caught or that already
3510  // occurred in the filter. While there, see if any of the elements are
3511  // catch-alls. If so, the filter can be discarded.
3512  bool SawCatchAll = false;
3513  for (unsigned j = 0; j != NumTypeInfos; ++j) {
3514  Constant *Elt = Filter->getOperand(j);
3515  Constant *TypeInfo = Elt->stripPointerCasts();
3516  if (isCatchAll(Personality, TypeInfo)) {
3517  // This element is a catch-all. Bail out, noting this fact.
3518  SawCatchAll = true;
3519  break;
3520  }
3521 
3522  // Even if we've seen a type in a catch clause, we don't want to
3523  // remove it from the filter. An unexpected type handler may be
3524  // set up for a call site which throws an exception of the same
3525  // type caught. In order for the exception thrown by the unexpected
3526  // handler to propagate correctly, the filter must be correctly
3527  // described for the call site.
3528  //
3529  // Example:
3530  //
3531  // void unexpected() { throw 1;}
3532  // void foo() throw (int) {
3533  // std::set_unexpected(unexpected);
3534  // try {
3535  // throw 2.0;
3536  // } catch (int i) {}
3537  // }
3538 
3539  // There is no point in having multiple copies of the same typeinfo in
3540  // a filter, so only add it if we didn't already.
3541  if (SeenInFilter.insert(TypeInfo).second)
3542  NewFilterElts.push_back(cast<Constant>(Elt));
3543  }
3544  // A filter containing a catch-all cannot match anything by definition.
3545  if (SawCatchAll) {
3546  // Throw the filter away.
3547  MakeNewInstruction = true;
3548  continue;
3549  }
3550 
3551  // If we dropped something from the filter, make a new one.
3552  if (NewFilterElts.size() < NumTypeInfos)
3553  MakeNewFilter = true;
3554  }
3555  if (MakeNewFilter) {
3556  FilterType = ArrayType::get(FilterType->getElementType(),
3557  NewFilterElts.size());
3558  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3559  MakeNewInstruction = true;
3560  }
3561 
3562  NewClauses.push_back(FilterClause);
3563 
3564  // If the new filter is empty then it will catch everything so there is
3565  // no point in keeping any following clauses or marking the landingpad
3566  // as having a cleanup. The case of the original filter being empty was
3567  // already handled above.
3568  if (MakeNewFilter && !NewFilterElts.size()) {
3569  assert(MakeNewInstruction && "New filter but not a new instruction!");
3570  CleanupFlag = false;
3571  break;
3572  }
3573  }
3574  }
3575 
3576  // If several filters occur in a row then reorder them so that the shortest
3577  // filters come first (those with the smallest number of elements). This is
3578  // advantageous because shorter filters are more likely to match, speeding up
3579  // unwinding, but mostly because it increases the effectiveness of the other
3580  // filter optimizations below.
3581  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3582  unsigned j;
3583  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3584  for (j = i; j != e; ++j)
3585  if (!isa<ArrayType>(NewClauses[j]->getType()))
3586  break;
3587 
3588  // Check whether the filters are already sorted by length. We need to know
3589  // if sorting them is actually going to do anything so that we only make a
3590  // new landingpad instruction if it does.
3591  for (unsigned k = i; k + 1 < j; ++k)
3592  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3593  // Not sorted, so sort the filters now. Doing an unstable sort would be
3594  // correct too but reordering filters pointlessly might confuse users.
3595  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3596  shorter_filter);
3597  MakeNewInstruction = true;
3598  break;
3599  }
3600 
3601  // Look for the next batch of filters.
3602  i = j + 1;
3603  }
3604 
3605  // If typeinfos matched if and only if equal, then the elements of a filter L
3606  // that occurs later than a filter F could be replaced by the intersection of
3607  // the elements of F and L. In reality two typeinfos can match without being
3608  // equal (for example if one represents a C++ class, and the other some class
3609  // derived from it) so it would be wrong to perform this transform in general.
3610  // However the transform is correct and useful if F is a subset of L. In that
3611  // case L can be replaced by F, and thus removed altogether since repeating a
3612  // filter is pointless. So here we look at all pairs of filters F and L where
3613  // L follows F in the list of clauses, and remove L if every element of F is
3614  // an element of L. This can occur when inlining C++ functions with exception
3615  // specifications.
3616  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3617  // Examine each filter in turn.
3618  Value *Filter = NewClauses[i];
3619  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3620  if (!FTy)
3621  // Not a filter - skip it.
3622  continue;
3623  unsigned FElts = FTy->getNumElements();
3624  // Examine each filter following this one. Doing this backwards means that
3625  // we don't have to worry about filters disappearing under us when removed.
3626  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3627  Value *LFilter = NewClauses[j];
3628  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3629  if (!LTy)
3630  // Not a filter - skip it.
3631  continue;
3632  // If Filter is a subset of LFilter, i.e. every element of Filter is also
3633  // an element of LFilter, then discard LFilter.
3634  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3635  // If Filter is empty then it is a subset of LFilter.
3636  if (!FElts) {
3637  // Discard LFilter.
3638  NewClauses.erase(J);
3639  MakeNewInstruction = true;
3640  // Move on to the next filter.
3641  continue;
3642  }
3643  unsigned LElts = LTy->getNumElements();
3644  // If Filter is longer than LFilter then it cannot be a subset of it.
3645  if (FElts > LElts)
3646  // Move on to the next filter.
3647  continue;
3648  // At this point we know that LFilter has at least one element.
3649  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3650  // Filter is a subset of LFilter iff Filter contains only zeros (as we
3651  // already know that Filter is not longer than LFilter).
3652  if (isa<ConstantAggregateZero>(Filter)) {
3653  assert(FElts <= LElts && "Should have handled this case earlier!");
3654  // Discard LFilter.
3655  NewClauses.erase(J);
3656  MakeNewInstruction = true;
3657  }
3658  // Move on to the next filter.
3659  continue;
3660  }
3661  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3662  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3663  // Since Filter is non-empty and contains only zeros, it is a subset of
3664  // LFilter iff LFilter contains a zero.
3665  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3666  for (unsigned l = 0; l != LElts; ++l)
3667  if (LArray->getOperand(l)->isNullValue()) {
3668  // LFilter contains a zero - discard it.
3669  NewClauses.erase(J);
3670  MakeNewInstruction = true;
3671  break;
3672  }
3673  // Move on to the next filter.
3674  continue;
3675  }
3676  // At this point we know that both filters are ConstantArrays. Loop over
3677  // operands to see whether every element of Filter is also an element of
3678  // LFilter. Since filters tend to be short this is probably faster than
3679  // using a method that scales nicely.
3680  ConstantArray *FArray = cast<ConstantArray>(Filter);
3681  bool AllFound = true;
3682  for (unsigned f = 0; f != FElts; ++f) {
3683  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3684  AllFound = false;
3685  for (unsigned l = 0; l != LElts; ++l) {
3686  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3687  if (LTypeInfo == FTypeInfo) {
3688  AllFound = true;
3689  break;
3690  }
3691  }
3692  if (!AllFound)
3693  break;
3694  }
3695  if (AllFound) {
3696  // Discard LFilter.
3697  NewClauses.erase(J);
3698  MakeNewInstruction = true;
3699  }
3700  // Move on to the next filter.
3701  }
3702  }
3703 
3704  // If we changed any of the clauses, replace the old landingpad instruction
3705  // with a new one.
3706  if (MakeNewInstruction) {
3708  NewClauses.size());
3709  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3710  NLI->addClause(NewClauses[i]);
3711  // A landing pad with no clauses must have the cleanup flag set. It is
3712  // theoretically possible, though highly unlikely, that we eliminated all
3713  // clauses. If so, force the cleanup flag to true.
3714  if (NewClauses.empty())
3715  CleanupFlag = true;
3716  NLI->setCleanup(CleanupFlag);
3717  return NLI;
3718  }
3719 
3720  // Even if none of the clauses changed, we may nonetheless have understood
3721  // that the cleanup flag is pointless. Clear it if so.
3722  if (LI.isCleanup() != CleanupFlag) {
3723  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3724  LI.setCleanup(CleanupFlag);
3725  return &LI;
3726  }
3727 
3728  return nullptr;
3729 }
3730 
3731 Value *
3733  // Try to push freeze through instructions that propagate but don't produce
3734  // poison as far as possible. If an operand of freeze follows three
3735  // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3736  // guaranteed-non-poison operands then push the freeze through to the one
3737  // operand that is not guaranteed non-poison. The actual transform is as
3738  // follows.
3739  // Op1 = ... ; Op1 can be posion
3740  // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3741  // ; single guaranteed-non-poison operands
3742  // ... = Freeze(Op0)
3743  // =>
3744  // Op1 = ...
3745  // Op1.fr = Freeze(Op1)
3746  // ... = Inst(Op1.fr, NonPoisonOps...)
3747  auto *OrigOp = OrigFI.getOperand(0);
3748  auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3749 
3750  // While we could change the other users of OrigOp to use freeze(OrigOp), that
3751  // potentially reduces their optimization potential, so let's only do this iff
3752  // the OrigOp is only used by the freeze.
3753  if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3754  return nullptr;
3755 
3756  // We can't push the freeze through an instruction which can itself create
3757  // poison. If the only source of new poison is flags, we can simply
3758  // strip them (since we know the only use is the freeze and nothing can
3759  // benefit from them.)
3760  if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
3761  return nullptr;
3762 
3763  // If operand is guaranteed not to be poison, there is no need to add freeze
3764  // to the operand. So we first find the operand that is not guaranteed to be
3765  // poison.
3766  Use *MaybePoisonOperand = nullptr;
3767  for (Use &U : OrigOpInst->operands()) {
3768  if (isGuaranteedNotToBeUndefOrPoison(U.get()))
3769  continue;
3770  if (!MaybePoisonOperand)
3771  MaybePoisonOperand = &U;
3772  else
3773  return nullptr;
3774  }
3775 
3776  OrigOpInst->dropPoisonGeneratingFlags();
3777 
3778  // If all operands are guaranteed to be non-poison, we can drop freeze.
3779  if (!MaybePoisonOperand)
3780  return OrigOp;
3781 
3782  auto *FrozenMaybePoisonOperand = new FreezeInst(
3783  MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3784 
3785  replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3786  FrozenMaybePoisonOperand->insertBefore(OrigOpInst);
3787  return OrigOp;
3788 }
3789 
3791  Value *Op = FI.getOperand(0);
3792 
3793  if (isa<Constant>(Op) || Op->hasOneUse())
3794  return false;
3795 
3796  // Move the freeze directly after the definition of its operand, so that
3797  // it dominates the maximum number of uses. Note that it may not dominate
3798  // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3799  // the normal/default destination. This is why the domination check in the
3800  // replacement below is still necessary.
3801  Instruction *MoveBefore = nullptr;
3802  if (isa<Argument>(Op)) {
3803  MoveBefore = &FI.getFunction()->getEntryBlock().front();
3804  while (isa<AllocaInst>(MoveBefore))
3805  MoveBefore = MoveBefore->getNextNode();
3806  } else if (auto *PN = dyn_cast<PHINode>(Op)) {
3807  MoveBefore = PN->getParent()->getFirstNonPHI();
3808  } else if (auto *II = dyn_cast<InvokeInst>(Op)) {
3809  MoveBefore = II->getNormalDest()->getFirstNonPHI();
3810  } else if (auto *CB = dyn_cast<CallBrInst>(Op)) {
3811  MoveBefore = CB->getDefaultDest()->getFirstNonPHI();
3812  } else {
3813  auto *I = cast<Instruction>(Op);
3814  assert(!I->isTerminator() && "Cannot be a terminator");
3815  MoveBefore = I->getNextNode();
3816  }
3817 
3818  bool Changed = false;
3819  if (&FI != MoveBefore) {
3820  FI.moveBefore(MoveBefore);
3821  Changed = true;
3822  }
3823 
3824  Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3825  bool Dominates = DT.dominates(&FI, U);
3826  Changed |= Dominates;
3827  return Dominates;
3828  });
3829 
3830  return Changed;
3831 }
3832 
3834  Value *Op0 = I.getOperand(0);
3835 
3836  if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
3837  return replaceInstUsesWith(I, V);
3838 
3839  // freeze (phi const, x) --> phi const, (freeze x)
3840  if (auto *PN = dyn_cast<PHINode>(Op0)) {
3841  if (Instruction *NV = foldOpIntoPhi(I, PN))
3842  return NV;
3843  }
3844 
3845  if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
3846  return replaceInstUsesWith(I, NI);
3847 
3848  // If I is freeze(undef), check its uses and fold it to a fixed constant.
3849  // - or: pick -1
3850  // - select's condition: if the true value is constant, choose it by making
3851  // the condition true.
3852  // - default: pick 0
3853  //
3854  // Note that this transform is intentionally done here rather than
3855  // via an analysis in InstSimplify or at individual user sites. That is
3856  // because we must produce the same value for all uses of the freeze -
3857  // it's the reason "freeze" exists!
3858  //
3859  // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3860  // duplicating logic for binops at least.
3861  auto getUndefReplacement = [&I](Type *Ty) {
3862  Constant *BestValue = nullptr;
3863  Constant *NullValue = Constant::getNullValue(Ty);
3864  for (const auto *U : I.users()) {
3865  Constant *C = NullValue;
3866  if (match(U, m_Or(m_Value(), m_Value())))
3868  else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
3869  C = ConstantInt::getTrue(Ty);
3870 
3871  if (!BestValue)
3872  BestValue = C;
3873  else if (BestValue != C)
3874  BestValue = NullValue;
3875  }
3876  assert(BestValue && "Must have at least one use");
3877  return BestValue;
3878  };
3879 
3880  if (match(Op0, m_Undef()))
3881  return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
3882 
3883  Constant *C;
3884  if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
3885  Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
3886  return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
3887  }
3888 
3889  // Replace uses of Op with freeze(Op).
3890  if (freezeOtherUses(I))
3891  return &I;
3892 
3893  return nullptr;
3894 }
3895 
3896 /// Check for case where the call writes to an otherwise dead alloca. This
3897 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
3898 /// helper *only* analyzes the write; doesn't check any other legality aspect.
3900  auto *CB = dyn_cast<CallBase>(I);
3901  if (!CB)
3902  // TODO: handle e.g. store to alloca here - only worth doing if we extend
3903  // to allow reload along used path as described below. Otherwise, this
3904  // is simply a store to a dead allocation which will be removed.
3905  return false;
3907  if (!Dest)
3908  return false;
3909  auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
3910  if (!AI)
3911  // TODO: allow malloc?
3912  return false;
3913  // TODO: allow memory access dominated by move point? Note that since AI
3914  // could have a reference to itself captured by the call, we would need to
3915  // account for cycles in doing so.
3916  SmallVector<const User *> AllocaUsers;
3918  auto pushUsers = [&](const Instruction &I) {
3919  for (const User *U : I.users()) {
3920  if (Visited.insert(U).second)
3921  AllocaUsers.push_back(U);
3922  }
3923  };
3924  pushUsers(*AI);
3925  while (!AllocaUsers.empty()) {
3926  auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
3927  if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
3928  isa<AddrSpaceCastInst>(UserI)) {
3929  pushUsers(*UserI);
3930  continue;
3931  }
3932  if (UserI == CB)
3933  continue;
3934  // TODO: support lifetime.start/end here
3935  return false;
3936  }
3937  return true;
3938 }
3939 
3940 /// Try to move the specified instruction from its current block into the
3941 /// beginning of DestBlock, which can only happen if it's safe to move the
3942 /// instruction past all of the instructions between it and the end of its
3943 /// block.
3945  TargetLibraryInfo &TLI) {
3946  BasicBlock *SrcBlock = I->getParent();
3947 
3948  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3949  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
3950  I->isTerminator())
3951  return false;
3952 
3953  // Do not sink static or dynamic alloca instructions. Static allocas must
3954  // remain in the entry block, and dynamic allocas must not be sunk in between
3955  // a stacksave / stackrestore pair, which would incorrectly shorten its
3956  // lifetime.
3957  if (isa<AllocaInst>(I))
3958  return false;
3959 
3960  // Do not sink into catchswitch blocks.
3961  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3962  return false;
3963 
3964  // Do not sink convergent call instructions.
3965  if (auto *CI = dyn_cast<CallInst>(I)) {
3966  if (CI->isConvergent())
3967  return false;
3968  }
3969 
3970  // Unless we can prove that the memory write isn't visibile except on the
3971  // path we're sinking to, we must bail.
3972  if (I->mayWriteToMemory()) {
3973  if (!SoleWriteToDeadLocal(I, TLI))
3974  return false;
3975  }
3976 
3977  // We can only sink load instructions if there is nothing between the load and
3978  // the end of block that could change the value.
3979  if (I->mayReadFromMemory()) {
3980  // We don't want to do any sophisticated alias analysis, so we only check
3981  // the instructions after I in I's parent block if we try to sink to its
3982  // successor block.
3983  if (DestBlock->getUniquePredecessor() != I->getParent())
3984  return false;
3985  for (BasicBlock::iterator Scan = std::next(I->getIterator()),
3986  E = I->getParent()->end();
3987  Scan != E; ++Scan)
3988  if (Scan->mayWriteToMemory())
3989  return false;
3990  }
3991 
3992  I->dropDroppableUses([DestBlock](const Use *U) {
3993  if (auto *I = dyn_cast<Instruction>(U->getUser()))
3994  return I->getParent() != DestBlock;
3995  return true;
3996  });
3997  /// FIXME: We could remove droppable uses that are not dominated by
3998  /// the new position.
3999 
4000  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4001  I->moveBefore(&*InsertPos);
4002  ++NumSunkInst;
4003 
4004  // Also sink all related debug uses from the source basic block. Otherwise we
4005  // get debug use before the def. Attempt to salvage debug uses first, to
4006  // maximise the range variables have location for. If we cannot salvage, then
4007  // mark the location undef: we know it was supposed to receive a new location
4008  // here, but that computation has been sunk.
4010  findDbgUsers(DbgUsers, I);
4011  // Process the sinking DbgUsers in reverse order, as we only want to clone the
4012  // last appearing debug intrinsic for each given variable.
4014  for (DbgVariableIntrinsic *DVI : DbgUsers)
4015  if (DVI->getParent() == SrcBlock)
4016  DbgUsersToSink.push_back(DVI);
4017  llvm::sort(DbgUsersToSink,
4018  [](auto *A, auto *B) { return B->comesBefore(A); });
4019 
4021  SmallSet<DebugVariable, 4> SunkVariables;
4022  for (auto User : DbgUsersToSink) {
4023  // A dbg.declare instruction should not be cloned, since there can only be
4024  // one per variable fragment. It should be left in the original place
4025  // because the sunk instruction is not an alloca (otherwise we could not be
4026  // here).
4027  if (isa<DbgDeclareInst>(User))
4028  continue;
4029 
4030  DebugVariable DbgUserVariable =
4031  DebugVariable(User->getVariable(), User->getExpression(),
4032  User->getDebugLoc()->getInlinedAt());
4033 
4034  if (!SunkVariables.insert(DbgUserVariable).second)
4035  continue;
4036 
4037  DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4038  if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4039  DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4040  LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4041  }
4042 
4043  // Perform salvaging without the clones, then sink the clones.
4044  if (!DIIClones.empty()) {
4045  salvageDebugInfoForDbgValues(*I, DbgUsers);
4046  // The clones are in reverse order of original appearance, reverse again to
4047  // maintain the original order.
4048  for (auto &DIIClone : llvm::reverse(DIIClones)) {
4049  DIIClone->insertBefore(&*InsertPos);
4050  LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4051  }
4052  }
4053 
4054  return true;
4055 }
4056 
4058  while (!Worklist.isEmpty()) {
4059  // Walk deferred instructions in reverse order, and push them to the
4060  // worklist, which means they'll end up popped from the worklist in-order.
4061  while (Instruction *I = Worklist.popDeferred()) {
4062  // Check to see if we can DCE the instruction. We do this already here to
4063  // reduce the number of uses and thus allow other folds to trigger.
4064  // Note that eraseInstFromFunction() may push additional instructions on
4065  // the deferred worklist, so this will DCE whole instruction chains.
4066  if (isInstructionTriviallyDead(I, &TLI)) {
4067  eraseInstFromFunction(*I);
4068  ++NumDeadInst;
4069  continue;
4070  }
4071 
4072  Worklist.push(I);
4073  }
4074 
4075  Instruction *I = Worklist.removeOne();
4076  if (I == nullptr) continue; // skip null values.
4077 
4078  // Check to see if we can DCE the instruction.
4079  if (isInstructionTriviallyDead(I, &TLI)) {
4080  eraseInstFromFunction(*I);
4081  ++NumDeadInst;
4082  continue;
4083  }
4084 
4085  if (!DebugCounter::shouldExecute(VisitCounter))
4086  continue;
4087 
4088  // Instruction isn't dead, see if we can constant propagate it.
4089  if (!I->use_empty() &&
4090  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
4091  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
4092  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
4093  << '\n');
4094 
4095  // Add operands to the worklist.
4096  replaceInstUsesWith(*I, C);
4097  ++NumConstProp;
4098  if (isInstructionTriviallyDead(I, &TLI))
4099  eraseInstFromFunction(*I);
4100  MadeIRChange = true;
4101  continue;
4102  }
4103  }
4104 
4105  // See if we can trivially sink this instruction to its user if we can
4106  // prove that the successor is not executed more frequently than our block.
4107  // Return the UserBlock if successful.
4108  auto getOptionalSinkBlockForInst =
4109  [this](Instruction *I) -> Optional<BasicBlock *> {
4110  if (!EnableCodeSinking)
4111  return None;
4112 
4113  BasicBlock *BB = I->getParent();
4114  BasicBlock *UserParent = nullptr;
4115  unsigned NumUsers = 0;
4116 
4117  for (auto *U : I->users()) {
4118  if (U->isDroppable())
4119  continue;
4120  if (NumUsers > MaxSinkNumUsers)
4121  return None;
4122 
4123  Instruction *UserInst = cast<Instruction>(U);
4124  // Special handling for Phi nodes - get the block the use occurs in.
4125  if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4126  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4127  if (PN->getIncomingValue(i) == I) {
4128  // Bail out if we have uses in different blocks. We don't do any
4129  // sophisticated analysis (i.e finding NearestCommonDominator of
4130  // these use blocks).
4131  if (UserParent && UserParent != PN->getIncomingBlock(i))
4132  return None;
4133  UserParent = PN->getIncomingBlock(i);
4134  }
4135  }
4136  assert(UserParent && "expected to find user block!");
4137  } else {
4138  if (UserParent && UserParent != UserInst->getParent())
4139  return None;
4140  UserParent = UserInst->getParent();
4141  }
4142 
4143  // Make sure these checks are done only once, naturally we do the checks
4144  // the first time we get the userparent, this will save compile time.
4145  if (NumUsers == 0) {
4146  // Try sinking to another block. If that block is unreachable, then do
4147  // not bother. SimplifyCFG should handle it.
4148  if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4149  return None;
4150 
4151  auto *Term = UserParent->getTerminator();
4152  // See if the user is one of our successors that has only one
4153  // predecessor, so that we don't have to split the critical edge.
4154  // Another option where we can sink is a block that ends with a
4155  // terminator that does not pass control to other block (such as
4156  // return or unreachable or resume). In this case:
4157  // - I dominates the User (by SSA form);
4158  // - the User will be executed at most once.
4159  // So sinking I down to User is always profitable or neutral.
4160  if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4161  return None;
4162 
4163  assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4164  }
4165 
4166  NumUsers++;
4167  }
4168 
4169  // No user or only has droppable users.
4170  if (!UserParent)
4171  return None;
4172 
4173  return UserParent;
4174  };
4175 
4176  auto OptBB = getOptionalSinkBlockForInst(I);
4177  if (OptBB) {
4178  auto *UserParent = *OptBB;
4179  // Okay, the CFG is simple enough, try to sink this instruction.
4180  if (TryToSinkInstruction(I, UserParent, TLI)) {
4181  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4182  MadeIRChange = true;
4183  // We'll add uses of the sunk instruction below, but since
4184  // sinking can expose opportunities for it's *operands* add
4185  // them to the worklist
4186  for (Use &U : I->operands())
4187  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4188  Worklist.push(OpI);
4189  }
4190  }
4191 
4192  // Now that we have an instruction, try combining it to simplify it.
4193  Builder.SetInsertPoint(I);
4194  Builder.CollectMetadataToCopy(
4195  I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4196 
4197 #ifndef NDEBUG
4198  std::string OrigI;
4199 #endif
4200  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4201  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4202 
4203  if (Instruction *Result = visit(*I)) {
4204  ++NumCombined;
4205  // Should we replace the old instruction with a new one?
4206  if (Result != I) {
4207  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4208  << " New = " << *Result << '\n');
4209 
4210  Result->copyMetadata(*I,
4211  {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4212  // Everything uses the new instruction now.
4213  I->replaceAllUsesWith(Result);
4214 
4215  // Move the name to the new instruction first.
4216  Result->takeName(I);
4217 
4218  // Insert the new instruction into the basic block...
4219  BasicBlock *InstParent = I->getParent();
4220  BasicBlock::iterator InsertPos = I->getIterator();
4221 
4222  // Are we replace a PHI with something that isn't a PHI, or vice versa?
4223  if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4224  // We need to fix up the insertion point.
4225  if (isa<PHINode>(I)) // PHI -> Non-PHI
4226  InsertPos = InstParent->getFirstInsertionPt();
4227  else // Non-PHI -> PHI
4228  InsertPos = InstParent->getFirstNonPHI()->getIterator();
4229  }
4230 
4231  InstParent->getInstList().insert(InsertPos, Result);
4232 
4233  // Push the new instruction and any users onto the worklist.
4234  Worklist.pushUsersToWorkList(*Result);
4235  Worklist.push(Result);
4236 
4237  eraseInstFromFunction(*I);
4238  } else {
4239  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4240  << " New = " << *I << '\n');
4241 
4242  // If the instruction was modified, it's possible that it is now dead.
4243  // if so, remove it.
4244  if (isInstructionTriviallyDead(I, &TLI)) {
4245  eraseInstFromFunction(*I);
4246  } else {
4247  Worklist.pushUsersToWorkList(*I);
4248  Worklist.push(I);
4249  }
4250  }
4251  MadeIRChange = true;
4252  }
4253  }
4254 
4255  Worklist.zap();
4256  return MadeIRChange;
4257 }
4258 
4259 // Track the scopes used by !alias.scope and !noalias. In a function, a
4260 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4261 // by both sets. If not, the declaration of the scope can be safely omitted.
4262 // The MDNode of the scope can be omitted as well for the instructions that are
4263 // part of this function. We do not do that at this point, as this might become
4264 // too time consuming to do.
4266  SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4267  SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4268 
4269 public:
4271  // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4272  if (!I->hasMetadataOtherThanDebugLoc())
4273  return;
4274 
4275  auto Track = [](Metadata *ScopeList, auto &Container) {
4276  const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4277  if (!MDScopeList || !Container.insert(MDScopeList).second)
4278  return;
4279  for (auto &MDOperand : MDScopeList->operands())
4280  if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4281  Container.insert(MDScope);
4282  };
4283 
4284  Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4285  Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4286  }
4287 
4289  NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4290  if (!Decl)
4291  return false;
4292 
4293  assert(Decl->use_empty() &&
4294  "llvm.experimental.noalias.scope.decl in use ?");
4295  const MDNode *MDSL = Decl->getScopeList();
4296  assert(MDSL->getNumOperands() == 1 &&
4297  "llvm.experimental.noalias.scope should refer to a single scope");
4298  auto &MDOperand = MDSL->getOperand(0);
4299  if (auto *MD = dyn_cast<MDNode>(MDOperand))
4300  return !UsedAliasScopesAndLists.contains(MD) ||
4301  !UsedNoAliasScopesAndLists.contains(MD);
4302 
4303  // Not an MDNode ? throw away.
4304  return true;
4305  }
4306 };
4307 
4308 /// Populate the IC worklist from a function, by walking it in depth-first
4309 /// order and adding all reachable code to the worklist.
4310 ///
4311 /// This has a couple of tricks to make the code faster and more powerful. In
4312 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4313 /// them to the worklist (this significantly speeds up instcombine on code where
4314 /// many instructions are dead or constant). Additionally, if we find a branch
4315 /// whose condition is a known constant, we only visit the reachable successors.
4317  const TargetLibraryInfo *TLI,
4318  InstructionWorklist &ICWorklist) {
4319  bool MadeIRChange = false;
4322  Worklist.push_back(&F.front());
4323 
4324  SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4325  DenseMap<Constant *, Constant *> FoldedConstants;
4326  AliasScopeTracker SeenAliasScopes;
4327 
4328  do {
4329  BasicBlock *BB = Worklist.pop_back_val();
4330 
4331  // We have now visited this block! If we've already been here, ignore it.
4332  if (!Visited.insert(BB).second)
4333  continue;
4334 
4335  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4336  // ConstantProp instruction if trivially constant.
4337  if (!Inst.use_empty() &&
4338  (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4339  if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
4340  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4341  << '\n');
4342  Inst.replaceAllUsesWith(C);
4343  ++NumConstProp;
4344  if (isInstructionTriviallyDead(&Inst, TLI))
4345  Inst.eraseFromParent();
4346  MadeIRChange = true;
4347  continue;
4348  }
4349 
4350  // See if we can constant fold its operands.
4351  for (Use &U : Inst.operands()) {
4352  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4353  continue;
4354 
4355  auto *C = cast<Constant>(U);
4356  Constant *&FoldRes = FoldedConstants[C];
4357  if (!FoldRes)
4358  FoldRes = ConstantFoldConstant(C, DL, TLI);
4359 
4360  if (FoldRes != C) {
4361  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4362  << "\n Old = " << *C
4363  << "\n New = " << *FoldRes << '\n');
4364  U = FoldRes;
4365  MadeIRChange = true;
4366  }
4367  }
4368 
4369  // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4370  // these call instructions consumes non-trivial amount of time and
4371  // provides no value for the optimization.
4372  if (!Inst.isDebugOrPseudoInst()) {
4373  InstrsForInstructionWorklist.push_back(&Inst);
4374  SeenAliasScopes.analyse(&Inst);
4375  }
4376  }
4377 
4378  // Recursively visit successors. If this is a branch or switch on a
4379  // constant, only visit the reachable successor.
4380  Instruction *TI = BB->getTerminator();
4381  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
4382  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
4383  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
4384  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
4385  Worklist.push_back(ReachableBB);
4386  continue;
4387  }
4388  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4389  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4390  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
4391  continue;
4392  }
4393  }
4394 
4395  append_range(Worklist, successors(TI));
4396  } while (!Worklist.empty());
4397 
4398  // Remove instructions inside unreachable blocks. This prevents the
4399  // instcombine code from having to deal with some bad special cases, and
4400  // reduces use counts of instructions.
4401  for (BasicBlock &BB : F) {
4402  if (Visited.count(&BB))
4403  continue;
4404 
4405  unsigned NumDeadInstInBB;
4406  unsigned NumDeadDbgInstInBB;
4407  std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4409 
4410  MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4411  NumDeadInst += NumDeadInstInBB;
4412  }
4413 
4414  // Once we've found all of the instructions to add to instcombine's worklist,
4415  // add them in reverse order. This way instcombine will visit from the top
4416  // of the function down. This jives well with the way that it adds all uses
4417  // of instructions to the worklist after doing a transformation, thus avoiding
4418  // some N^2 behavior in pathological cases.
4419  ICWorklist.reserve(InstrsForInstructionWorklist.size());
4420  for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4421  // DCE instruction if trivially dead. As we iterate in reverse program
4422  // order here, we will clean up whole chains of dead instructions.
4423  if (isInstructionTriviallyDead(Inst, TLI) ||
4424  SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4425  ++NumDeadInst;
4426  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4427  salvageDebugInfo(*Inst);
4428  Inst->eraseFromParent();
4429  MadeIRChange = true;
4430  continue;
4431  }
4432 
4433  ICWorklist.push(Inst);
4434  }
4435 
4436  return MadeIRChange;
4437 }
4438 
4443  ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4444  auto &DL = F.getParent()->getDataLayout();
4445  MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
4446 
4447  /// Builder - This is an IRBuilder that automatically inserts new
4448  /// instructions into the worklist when they are created.
4450  F.getContext(), TargetFolder(DL),
4451  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4452  Worklist.add(I);
4453  if (auto *Assume = dyn_cast<AssumeInst>(I))
4454  AC.registerAssumption(Assume);
4455  }));
4456 
4457  // Lower dbg.declare intrinsics otherwise their value may be clobbered
4458  // by instcombiner.
4459  bool MadeIRChange = false;
4461  MadeIRChange = LowerDbgDeclare(F);
4462 
4463  // Iterate while there is work to do.
4464  unsigned Iteration = 0;
4465  while (true) {
4466  ++NumWorklistIterations;
4467  ++Iteration;
4468 
4469  if (Iteration > InfiniteLoopDetectionThreshold) {
4471  "Instruction Combining seems stuck in an infinite loop after " +
4472  Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4473  }
4474 
4475  if (Iteration > MaxIterations) {
4476  LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4477  << " on " << F.getName()
4478  << " reached; stopping before reaching a fixpoint\n");
4479  break;
4480  }
4481 
4482  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4483  << F.getName() << "\n");
4484 
4485  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4486 
4487  InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4488  ORE, BFI, PSI, DL, LI);
4490 
4491  if (!IC.run())
4492  break;
4493 
4494  MadeIRChange = true;
4495  }
4496 
4497  return MadeIRChange;
4498 }
4499 
4501 
4502 InstCombinePass::InstCombinePass(unsigned MaxIterations)
4503  : MaxIterations(MaxIterations) {}
4504 
4507  auto &AC = AM.getResult<AssumptionAnalysis>(F);
4508  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4509  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4511  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4512 
4513  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4514 
4515  auto *AA = &AM.getResult<AAManager>(F);
4516  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4517  ProfileSummaryInfo *PSI =
4518  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4519  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4520  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4521 
4522  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4523  BFI, PSI, MaxIterations, LI))
4524  // No changes, all analyses are preserved.
4525  return PreservedAnalyses::all();
4526 
4527  // Mark all the analyses that instcombine updates as preserved.
4528  PreservedAnalyses PA;
4529  PA.preserveSet<CFGAnalyses>();
4530  return PA;
4531 }
4532 
4534  AU.setPreservesCFG();
4547 }
4548 
4550  if (skipFunction(F))
4551  return false;
4552 
4553  // Required analyses.
4554  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4555  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4556  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4557  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4558  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4559  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4560 
4561  // Optional analyses.
4562  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4563  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4564  ProfileSummaryInfo *PSI =
4565  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4567  (PSI && PSI->hasProfileSummary()) ?
4568  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4569  nullptr;
4570 
4571  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4572  BFI, PSI, MaxIterations, LI);
4573 }
4574 
4576 
4578  : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
4580 }
4581 
4583  : FunctionPass(ID), MaxIterations(MaxIterations) {
4585 }
4586 
4588  "Combine redundant instructions", false, false)
4600 
4601 // Initialization Routines
4604 }
4605 
4608 }
4609 
4611  return new InstructionCombiningPass();
4612 }
4613 
4615  return new InstructionCombiningPass(MaxIterations);
4616 }
4617 
4620 }
llvm::EHPersonality::MSVC_CXX
@ MSVC_CXX
llvm::NoAliasScopeDeclInst
Definition: IntrinsicInst.h:1318
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::ExtractValueInst::hasIndices
bool hasIndices() const
Definition: Instructions.h:2484
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:160
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1062
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1299
foldSelectGEP
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
Definition: InstructionCombining.cpp:1927
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2458
llvm::LandingPadInst::Create
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
Definition: Instructions.cpp:214
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:755
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1828
llvm::InstCombinerImpl::tryFactorization
Value * tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, Value *, Value *, Value *)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:634
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1944
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
prepareICWorklistFromFunction
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
Definition: InstructionCombining.cpp:4316
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::LandingPadInst::isCleanup
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
Definition: Instructions.h:2963
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:301
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:169
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3820
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
llvm::InstCombinerImpl::visitGetElementPtrInst
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Definition: InstructionCombining.cpp:2247
foldOperationIntoSelectOperand
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:996
llvm::CastInst::CreatePointerBitCastOrAddrSpaceCast
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Definition: Instructions.cpp:3309
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConvertDebugDeclareToDebugValue
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1452
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:280
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3017
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::EHPersonality::GNU_C_SjLj
@ GNU_C_SjLj
leftDistributesOverRight
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
Definition: InstructionCombining.cpp:566
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:212
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InstCombinerImpl::visitBranchInst
Instruction * visitBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3150
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4576
llvm::InstructionCombiningPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: InstructionCombining.cpp:4533
llvm::InstCombinerImpl::run
bool run()
Run the combiner over the entire worklist until it is empty.
Definition: InstructionCombining.cpp:4057
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::EHPersonality::GNU_Ada
@ GNU_Ada
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::DIBuilder
Definition: DIBuilder.h:41
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::InstCombiner::targetInstCombineIntrinsic
Optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
Definition: InstructionCombining.cpp:171
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:218
llvm::InstCombinerImpl::foldBinOpIntoSelectOrPhi
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
Definition: InstructionCombining.cpp:1357
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Attribute
Definition: Attributes.h:52
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1100
TargetFolder.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
llvm::PatternMatch::m_SplatOrUndefMask
Definition: PatternMatch.h:1545
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5225
llvm::InstCombinerImpl::visitExtractValueInst
Instruction * visitExtractValueInst(ExtractValueInst &EV)
Definition: InstructionCombining.cpp:3236
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
GetElementPtrTypeIterator.h
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5246
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1539
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2916
llvm::InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic
Optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: InstructionCombining.cpp:190
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:988
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2047
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:173
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LazyBlockFrequencyInfo.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:709
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2561
getBinOpsForFactorization
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Definition: InstructionCombining.cpp:615
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3180
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::findDbgUsers
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:101
llvm::ConstantRange::getEquivalentICmp
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
Definition: ConstantRange.cpp:234
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::InstructionWorklist::reserve
void reserve(size_t Size)
Definition: InstructionWorklist.h:79
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::NoAliasScopeDeclInst::getScopeList
MDNode * getScopeList() const
Definition: IntrinsicInst.h:1328
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1741
llvm::InstCombiner::MaxArraySizeForCombine
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:53
APInt.h
llvm::InstCombinerImpl::freezeOtherUses
bool freezeOtherUses(FreezeInst &FI)
Definition: InstructionCombining.cpp:3790
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::InstructionCombiningPass::InstructionCombiningPass
InstructionCombiningPass()
Definition: InstructionCombining.cpp:4577
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:31
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1423
llvm::InstCombinerImpl::visitAllocSite
Instruction * visitAllocSite(Instruction &FI)
Definition: InstructionCombining.cpp:2817
MemoryBuiltins.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::AttributeList
Definition: Attributes.h:408
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1474
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:164
EHPersonalities.h
InfiniteLoopDetectionThreshold
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition: Instructions.h:2462
llvm::SimplifyFreezeInst
Value * SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
Definition: InstructionSimplify.cpp:6266
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:962
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 16 >
llvm::SimplifyGEPInst
Value * SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4662
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1133
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:3064
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InstCombinerImpl::Descale
Value * Descale(Value *Val, APInt Scale, bool &NoSignedWrap)
Returns a value X such that Val = X * Scale, or null if none.
Definition: InstructionCombining.cpp:1405
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:261
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
LLVMInitializeInstCombine
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
Definition: InstructionCombining.cpp:4606
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2794
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2295
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
BaseType
llvm::succ_empty
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
isRemovableWrite
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
Definition: InstructionCombining.cpp:2694
BasicAliasAnalysis.h
LegacyPassManager.h
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:382
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
Use.h
llvm::EHPersonality::GNU_ObjC
@ GNU_ObjC
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:402
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::InstCombinePass::InstCombinePass
InstCombinePass()
Definition: InstructionCombining.cpp:4500
llvm::InstructionWorklist::add
void add(Instruction *I)
Add instruction to the worklist.
Definition: InstructionWorklist.h:44
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1329
llvm::InstCombinerImpl::SimplifySelectsFeedingBinaryOp
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
Definition: InstructionCombining.cpp:858
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::initializeInstCombine
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
Definition: InstructionCombining.cpp:4602
KnownBits.h
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
combineInstructionsOverFunction
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI)
Definition: InstructionCombining.cpp:4439
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1199
llvm::InstCombinerImpl::visitUnreachableInst
Instruction * visitUnreachableInst(UnreachableInst &I)
Definition: InstructionCombining.cpp:3094
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1996
AliasAnalysis.h
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2726
llvm::isAllocationFn
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
Definition: MemoryBuiltins.cpp:272
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
llvm::InstCombinerImpl::foldBinopWithPhiOperands
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
Definition: InstructionCombining.cpp:1293
llvm::InstCombinerImpl::visitFree
Instruction * visitFree(CallInst &FI)
Definition: InstructionCombining.cpp:3020
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
CommandLine.h
llvm::PatternMatch::m_c_BinOp
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
Definition: PatternMatch.h:2221
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:215
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::Instruction::willReturn
bool willReturn() const
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
Definition: Instruction.cpp:704
AliasScopeTracker
Definition: InstructionCombining.cpp:4265
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1768
hasNoUnsignedWrap
static bool hasNoUnsignedWrap(BinaryOperator &I)
Definition: InstructionCombining.cpp:298
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5265
InstCombineInternal.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::ArrayType::getNumElements
uint64_t getNumElements() const
Definition: DerivedTypes.h:369
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2760
llvm::AAResults
Definition: AliasAnalysis.h:511
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_END(InstructionCombiningPass
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:127
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::InstCombinerImpl::foldOpIntoPhi
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Definition: InstructionCombining.cpp:1136
llvm::LandingPadInst::getNumClauses
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
Definition: Instructions.h:2988
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1478
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionWorklist.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InstCombinerImpl::visitFreeze
Instruction * visitFreeze(FreezeInst &I)
Definition: InstructionCombining.cpp:3833
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::EHPersonality::GNU_C
@ GNU_C
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
DEBUG_COUNTER
DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited")
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1386
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::InstCombinerImpl::visitUnconditionalBranchInst
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3121
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:726
l
This requires reassociating to forms of expressions that are already something that reassoc doesn t think about yet These two functions should generate the same code on big endian int * l
Definition: README.txt:100
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
<