LLVM  14.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/ADT/TinyPtrVector.h"
50 #include "llvm/Analysis/CFG.h"
56 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/IR/BasicBlock.h"
66 #include "llvm/IR/CFG.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Dominators.h"
73 #include "llvm/IR/Function.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InstrTypes.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/Metadata.h"
83 #include "llvm/IR/Operator.h"
84 #include "llvm/IR/PassManager.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/Use.h"
88 #include "llvm/IR/User.h"
89 #include "llvm/IR/Value.h"
90 #include "llvm/IR/ValueHandle.h"
91 #include "llvm/InitializePasses.h"
92 #include "llvm/Pass.h"
94 #include "llvm/Support/Casting.h"
96 #include "llvm/Support/Compiler.h"
97 #include "llvm/Support/Debug.h"
100 #include "llvm/Support/KnownBits.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <memory>
108 #include <string>
109 #include <utility>
110 
111 #define DEBUG_TYPE "instcombine"
113 
114 using namespace llvm;
115 using namespace llvm::PatternMatch;
116 
117 STATISTIC(NumWorklistIterations,
118  "Number of instruction combining iterations performed");
119 
120 STATISTIC(NumCombined , "Number of insts combined");
121 STATISTIC(NumConstProp, "Number of constant folds");
122 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
123 STATISTIC(NumSunkInst , "Number of instructions sunk");
124 STATISTIC(NumExpand, "Number of expansions");
125 STATISTIC(NumFactor , "Number of factorizations");
126 STATISTIC(NumReassoc , "Number of reassociations");
127 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
128  "Controls which instructions are visited");
129 
130 // FIXME: these limits eventually should be as low as 2.
131 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
132 #ifndef NDEBUG
133 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
134 #else
135 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
136 #endif
137 
138 static cl::opt<bool>
139 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
140  cl::init(true));
141 
143  "instcombine-max-iterations",
144  cl::desc("Limit the maximum number of instruction combining iterations"),
146 
148  "instcombine-infinite-loop-threshold",
149  cl::desc("Number of instruction combining iterations considered an "
150  "infinite loop"),
152 
153 static cl::opt<unsigned>
154 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
155  cl::desc("Maximum array size considered when doing a combine"));
156 
157 // FIXME: Remove this flag when it is no longer necessary to convert
158 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
159 // increases variable availability at the cost of accuracy. Variables that
160 // cannot be promoted by mem2reg or SROA will be described as living in memory
161 // for their entire lifetime. However, passes like DSE and instcombine can
162 // delete stores to the alloca, leading to misleading and inaccurate debug
163 // information. This flag can be removed when those passes are fixed.
164 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
165  cl::Hidden, cl::init(true));
166 
169  // Handle target specific intrinsics
170  if (II.getCalledFunction()->isTargetIntrinsic()) {
171  return TTI.instCombineIntrinsic(*this, II);
172  }
173  return None;
174 }
175 
177  IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
178  bool &KnownBitsComputed) {
179  // Handle target specific intrinsics
180  if (II.getCalledFunction()->isTargetIntrinsic()) {
181  return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
182  KnownBitsComputed);
183  }
184  return None;
185 }
186 
188  IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
189  APInt &UndefElts3,
190  std::function<void(Instruction *, unsigned, APInt, APInt &)>
191  SimplifyAndSetOp) {
192  // Handle target specific intrinsics
193  if (II.getCalledFunction()->isTargetIntrinsic()) {
195  *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
196  SimplifyAndSetOp);
197  }
198  return None;
199 }
200 
201 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
202  return llvm::EmitGEPOffset(&Builder, DL, GEP);
203 }
204 
205 /// Legal integers and common types are considered desirable. This is used to
206 /// avoid creating instructions with types that may not be supported well by the
207 /// the backend.
208 /// NOTE: This treats i8, i16 and i32 specially because they are common
209 /// types in frontend languages.
210 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
211  switch (BitWidth) {
212  case 8:
213  case 16:
214  case 32:
215  return true;
216  default:
217  return DL.isLegalInteger(BitWidth);
218  }
219 }
220 
221 /// Return true if it is desirable to convert an integer computation from a
222 /// given bit width to a new bit width.
223 /// We don't want to convert from a legal to an illegal type or from a smaller
224 /// to a larger illegal type. A width of '1' is always treated as a desirable
225 /// type because i1 is a fundamental type in IR, and there are many specialized
226 /// optimizations for i1 types. Common/desirable widths are equally treated as
227 /// legal to convert to, in order to open up more combining opportunities.
228 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
229  unsigned ToWidth) const {
230  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
231  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
232 
233  // Convert to desirable widths even if they are not legal types.
234  // Only shrink types, to prevent infinite loops.
235  if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
236  return true;
237 
238  // If this is a legal integer from type, and the result would be an illegal
239  // type, don't do the transformation.
240  if (FromLegal && !ToLegal)
241  return false;
242 
243  // Otherwise, if both are illegal, do not increase the size of the result. We
244  // do allow things like i160 -> i64, but not i64 -> i160.
245  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
246  return false;
247 
248  return true;
249 }
250 
251 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
252 /// We don't want to convert from a legal to an illegal type or from a smaller
253 /// to a larger illegal type. i1 is always treated as a legal type because it is
254 /// a fundamental type in IR, and there are many specialized optimizations for
255 /// i1 types.
256 bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
257  // TODO: This could be extended to allow vectors. Datalayout changes might be
258  // needed to properly support that.
259  if (!From->isIntegerTy() || !To->isIntegerTy())
260  return false;
261 
262  unsigned FromWidth = From->getPrimitiveSizeInBits();
263  unsigned ToWidth = To->getPrimitiveSizeInBits();
264  return shouldChangeType(FromWidth, ToWidth);
265 }
266 
267 // Return true, if No Signed Wrap should be maintained for I.
268 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
269 // where both B and C should be ConstantInts, results in a constant that does
270 // not overflow. This function only handles the Add and Sub opcodes. For
271 // all other opcodes, the function conservatively returns false.
273  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
274  if (!OBO || !OBO->hasNoSignedWrap())
275  return false;
276 
277  // We reason about Add and Sub Only.
278  Instruction::BinaryOps Opcode = I.getOpcode();
279  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
280  return false;
281 
282  const APInt *BVal, *CVal;
283  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
284  return false;
285 
286  bool Overflow = false;
287  if (Opcode == Instruction::Add)
288  (void)BVal->sadd_ov(*CVal, Overflow);
289  else
290  (void)BVal->ssub_ov(*CVal, Overflow);
291 
292  return !Overflow;
293 }
294 
296  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
297  return OBO && OBO->hasNoUnsignedWrap();
298 }
299 
301  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
302  return OBO && OBO->hasNoSignedWrap();
303 }
304 
305 /// Conservatively clears subclassOptionalData after a reassociation or
306 /// commutation. We preserve fast-math flags when applicable as they can be
307 /// preserved.
309  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
310  if (!FPMO) {
311  I.clearSubclassOptionalData();
312  return;
313  }
314 
315  FastMathFlags FMF = I.getFastMathFlags();
316  I.clearSubclassOptionalData();
317  I.setFastMathFlags(FMF);
318 }
319 
320 /// Combine constant operands of associative operations either before or after a
321 /// cast to eliminate one of the associative operations:
322 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
323 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
325  InstCombinerImpl &IC) {
326  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
327  if (!Cast || !Cast->hasOneUse())
328  return false;
329 
330  // TODO: Enhance logic for other casts and remove this check.
331  auto CastOpcode = Cast->getOpcode();
332  if (CastOpcode != Instruction::ZExt)
333  return false;
334 
335  // TODO: Enhance logic for other BinOps and remove this check.
336  if (!BinOp1->isBitwiseLogicOp())
337  return false;
338 
339  auto AssocOpcode = BinOp1->getOpcode();
340  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
341  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
342  return false;
343 
344  Constant *C1, *C2;
345  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
346  !match(BinOp2->getOperand(1), m_Constant(C2)))
347  return false;
348 
349  // TODO: This assumes a zext cast.
350  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
351  // to the destination type might lose bits.
352 
353  // Fold the constants together in the destination type:
354  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
355  Type *DestTy = C1->getType();
356  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
357  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
358  IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
359  IC.replaceOperand(*BinOp1, 1, FoldedC);
360  return true;
361 }
362 
363 // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
364 // inttoptr ( ptrtoint (x) ) --> x
365 Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
366  auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
367  if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) ==
368  DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
369  auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
370  Type *CastTy = IntToPtr->getDestTy();
371  if (PtrToInt &&
372  CastTy->getPointerAddressSpace() ==
373  PtrToInt->getSrcTy()->getPointerAddressSpace() &&
374  DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
375  DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
376  return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
377  "", PtrToInt);
378  }
379  }
380  return nullptr;
381 }
382 
383 /// This performs a few simplifications for operators that are associative or
384 /// commutative:
385 ///
386 /// Commutative operators:
387 ///
388 /// 1. Order operands such that they are listed from right (least complex) to
389 /// left (most complex). This puts constants before unary operators before
390 /// binary operators.
391 ///
392 /// Associative operators:
393 ///
394 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
395 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
396 ///
397 /// Associative and commutative operators:
398 ///
399 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
400 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
401 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
402 /// if C1 and C2 are constants.
404  Instruction::BinaryOps Opcode = I.getOpcode();
405  bool Changed = false;
406 
407  do {
408  // Order operands such that they are listed from right (least complex) to
409  // left (most complex). This puts constants before unary operators before
410  // binary operators.
411  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
412  getComplexity(I.getOperand(1)))
413  Changed = !I.swapOperands();
414 
415  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
416  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
417 
418  if (I.isAssociative()) {
419  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
420  if (Op0 && Op0->getOpcode() == Opcode) {
421  Value *A = Op0->getOperand(0);
422  Value *B = Op0->getOperand(1);
423  Value *C = I.getOperand(1);
424 
425  // Does "B op C" simplify?
426  if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
427  // It simplifies to V. Form "A op V".
428  replaceOperand(I, 0, A);
429  replaceOperand(I, 1, V);
430  bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
431  bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
432 
433  // Conservatively clear all optional flags since they may not be
434  // preserved by the reassociation. Reset nsw/nuw based on the above
435  // analysis.
437 
438  // Note: this is only valid because SimplifyBinOp doesn't look at
439  // the operands to Op0.
440  if (IsNUW)
441  I.setHasNoUnsignedWrap(true);
442 
443  if (IsNSW)
444  I.setHasNoSignedWrap(true);
445 
446  Changed = true;
447  ++NumReassoc;
448  continue;
449  }
450  }
451 
452  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
453  if (Op1 && Op1->getOpcode() == Opcode) {
454  Value *A = I.getOperand(0);
455  Value *B = Op1->getOperand(0);
456  Value *C = Op1->getOperand(1);
457 
458  // Does "A op B" simplify?
459  if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
460  // It simplifies to V. Form "V op C".
461  replaceOperand(I, 0, V);
462  replaceOperand(I, 1, C);
463  // Conservatively clear the optional flags, since they may not be
464  // preserved by the reassociation.
466  Changed = true;
467  ++NumReassoc;
468  continue;
469  }
470  }
471  }
472 
473  if (I.isAssociative() && I.isCommutative()) {
474  if (simplifyAssocCastAssoc(&I, *this)) {
475  Changed = true;
476  ++NumReassoc;
477  continue;
478  }
479 
480  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
481  if (Op0 && Op0->getOpcode() == Opcode) {
482  Value *A = Op0->getOperand(0);
483  Value *B = Op0->getOperand(1);
484  Value *C = I.getOperand(1);
485 
486  // Does "C op A" simplify?
487  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
488  // It simplifies to V. Form "V op B".
489  replaceOperand(I, 0, V);
490  replaceOperand(I, 1, B);
491  // Conservatively clear the optional flags, since they may not be
492  // preserved by the reassociation.
494  Changed = true;
495  ++NumReassoc;
496  continue;
497  }
498  }
499 
500  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
501  if (Op1 && Op1->getOpcode() == Opcode) {
502  Value *A = I.getOperand(0);
503  Value *B = Op1->getOperand(0);
504  Value *C = Op1->getOperand(1);
505 
506  // Does "C op A" simplify?
507  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
508  // It simplifies to V. Form "B op V".
509  replaceOperand(I, 0, B);
510  replaceOperand(I, 1, V);
511  // Conservatively clear the optional flags, since they may not be
512  // preserved by the reassociation.
514  Changed = true;
515  ++NumReassoc;
516  continue;
517  }
518  }
519 
520  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
521  // if C1 and C2 are constants.
522  Value *A, *B;
523  Constant *C1, *C2;
524  if (Op0 && Op1 &&
525  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
526  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
527  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
528  bool IsNUW = hasNoUnsignedWrap(I) &&
529  hasNoUnsignedWrap(*Op0) &&
530  hasNoUnsignedWrap(*Op1);
531  BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
532  BinaryOperator::CreateNUW(Opcode, A, B) :
533  BinaryOperator::Create(Opcode, A, B);
534 
535  if (isa<FPMathOperator>(NewBO)) {
536  FastMathFlags Flags = I.getFastMathFlags();
537  Flags &= Op0->getFastMathFlags();
538  Flags &= Op1->getFastMathFlags();
539  NewBO->setFastMathFlags(Flags);
540  }
541  InsertNewInstWith(NewBO, I);
542  NewBO->takeName(Op1);
543  replaceOperand(I, 0, NewBO);
544  replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
545  // Conservatively clear the optional flags, since they may not be
546  // preserved by the reassociation.
548  if (IsNUW)
549  I.setHasNoUnsignedWrap(true);
550 
551  Changed = true;
552  continue;
553  }
554  }
555 
556  // No further simplifications.
557  return Changed;
558  } while (true);
559 }
560 
561 /// Return whether "X LOp (Y ROp Z)" is always equal to
562 /// "(X LOp Y) ROp (X LOp Z)".
565  // X & (Y | Z) <--> (X & Y) | (X & Z)
566  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
567  if (LOp == Instruction::And)
568  return ROp == Instruction::Or || ROp == Instruction::Xor;
569 
570  // X | (Y & Z) <--> (X | Y) & (X | Z)
571  if (LOp == Instruction::Or)
572  return ROp == Instruction::And;
573 
574  // X * (Y + Z) <--> (X * Y) + (X * Z)
575  // X * (Y - Z) <--> (X * Y) - (X * Z)
576  if (LOp == Instruction::Mul)
577  return ROp == Instruction::Add || ROp == Instruction::Sub;
578 
579  return false;
580 }
581 
582 /// Return whether "(X LOp Y) ROp Z" is always equal to
583 /// "(X ROp Z) LOp (Y ROp Z)".
587  return leftDistributesOverRight(ROp, LOp);
588 
589  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
591 
592  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
593  // but this requires knowing that the addition does not overflow and other
594  // such subtleties.
595 }
596 
597 /// This function returns identity value for given opcode, which can be used to
598 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
600  if (isa<Constant>(V))
601  return nullptr;
602 
603  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
604 }
605 
606 /// This function predicates factorization using distributive laws. By default,
607 /// it just returns the 'Op' inputs. But for special-cases like
608 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
609 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
610 /// allow more factorization opportunities.
613  Value *&LHS, Value *&RHS) {
614  assert(Op && "Expected a binary operator");
615  LHS = Op->getOperand(0);
616  RHS = Op->getOperand(1);
617  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
618  Constant *C;
619  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
620  // X << C --> X * (1 << C)
621  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
622  return Instruction::Mul;
623  }
624  // TODO: We can add other conversions e.g. shr => div etc.
625  }
626  return Op->getOpcode();
627 }
628 
629 /// This tries to simplify binary operations by factorizing out common terms
630 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
632  Instruction::BinaryOps InnerOpcode,
633  Value *A, Value *B, Value *C,
634  Value *D) {
635  assert(A && B && C && D && "All values must be provided");
636 
637  Value *V = nullptr;
638  Value *SimplifiedInst = nullptr;
639  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
640  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
641 
642  // Does "X op' Y" always equal "Y op' X"?
643  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
644 
645  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
646  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode))
647  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
648  // commutative case, "(A op' B) op (C op' A)"?
649  if (A == C || (InnerCommutative && A == D)) {
650  if (A != C)
651  std::swap(C, D);
652  // Consider forming "A op' (B op D)".
653  // If "B op D" simplifies then it can be formed with no cost.
654  V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
655  // If "B op D" doesn't simplify then only go on if both of the existing
656  // operations "A op' B" and "C op' D" will be zapped as no longer used.
657  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
658  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
659  if (V) {
660  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
661  }
662  }
663 
664  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
665  if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
666  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
667  // commutative case, "(A op' B) op (B op' D)"?
668  if (B == D || (InnerCommutative && B == C)) {
669  if (B != D)
670  std::swap(C, D);
671  // Consider forming "(A op C) op' B".
672  // If "A op C" simplifies then it can be formed with no cost.
673  V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
674 
675  // If "A op C" doesn't simplify then only go on if both of the existing
676  // operations "A op' B" and "C op' D" will be zapped as no longer used.
677  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
678  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
679  if (V) {
680  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
681  }
682  }
683 
684  if (SimplifiedInst) {
685  ++NumFactor;
686  SimplifiedInst->takeName(&I);
687 
688  // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
689  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
690  if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
691  bool HasNSW = false;
692  bool HasNUW = false;
693  if (isa<OverflowingBinaryOperator>(&I)) {
694  HasNSW = I.hasNoSignedWrap();
695  HasNUW = I.hasNoUnsignedWrap();
696  }
697 
698  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
699  HasNSW &= LOBO->hasNoSignedWrap();
700  HasNUW &= LOBO->hasNoUnsignedWrap();
701  }
702 
703  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
704  HasNSW &= ROBO->hasNoSignedWrap();
705  HasNUW &= ROBO->hasNoUnsignedWrap();
706  }
707 
708  if (TopLevelOpcode == Instruction::Add &&
709  InnerOpcode == Instruction::Mul) {
710  // We can propagate 'nsw' if we know that
711  // %Y = mul nsw i16 %X, C
712  // %Z = add nsw i16 %Y, %X
713  // =>
714  // %Z = mul nsw i16 %X, C+1
715  //
716  // iff C+1 isn't INT_MIN
717  const APInt *CInt;
718  if (match(V, m_APInt(CInt))) {
719  if (!CInt->isMinSignedValue())
720  BO->setHasNoSignedWrap(HasNSW);
721  }
722 
723  // nuw can be propagated with any constant or nuw value.
724  BO->setHasNoUnsignedWrap(HasNUW);
725  }
726  }
727  }
728  }
729  return SimplifiedInst;
730 }
731 
732 /// This tries to simplify binary operations which some other binary operation
733 /// distributes over either by factorizing out common terms
734 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
735 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
736 /// Returns the simplified value, or null if it didn't simplify.
738  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
739  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
740  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
741  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
742 
743  {
744  // Factorization.
745  Value *A, *B, *C, *D;
746  Instruction::BinaryOps LHSOpcode, RHSOpcode;
747  if (Op0)
748  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
749  if (Op1)
750  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
751 
752  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
753  // a common term.
754  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
755  if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
756  return V;
757 
758  // The instruction has the form "(A op' B) op (C)". Try to factorize common
759  // term.
760  if (Op0)
761  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
762  if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
763  return V;
764 
765  // The instruction has the form "(B) op (C op' D)". Try to factorize common
766  // term.
767  if (Op1)
768  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
769  if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
770  return V;
771  }
772 
773  // Expansion.
774  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
775  // The instruction has the form "(A op' B) op C". See if expanding it out
776  // to "(A op C) op' (B op C)" results in simplifications.
777  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
778  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
779 
780  // Disable the use of undef because it's not safe to distribute undef.
781  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
782  Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
783  Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
784 
785  // Do "A op C" and "B op C" both simplify?
786  if (L && R) {
787  // They do! Return "L op' R".
788  ++NumExpand;
789  C = Builder.CreateBinOp(InnerOpcode, L, R);
790  C->takeName(&I);
791  return C;
792  }
793 
794  // Does "A op C" simplify to the identity value for the inner opcode?
795  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
796  // They do! Return "B op C".
797  ++NumExpand;
798  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
799  C->takeName(&I);
800  return C;
801  }
802 
803  // Does "B op C" simplify to the identity value for the inner opcode?
804  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
805  // They do! Return "A op C".
806  ++NumExpand;
807  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
808  C->takeName(&I);
809  return C;
810  }
811  }
812 
813  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
814  // The instruction has the form "A op (B op' C)". See if expanding it out
815  // to "(A op B) op' (A op C)" results in simplifications.
816  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
817  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
818 
819  // Disable the use of undef because it's not safe to distribute undef.
820  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
821  Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
822  Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
823 
824  // Do "A op B" and "A op C" both simplify?
825  if (L && R) {
826  // They do! Return "L op' R".
827  ++NumExpand;
828  A = Builder.CreateBinOp(InnerOpcode, L, R);
829  A->takeName(&I);
830  return A;
831  }
832 
833  // Does "A op B" simplify to the identity value for the inner opcode?
834  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
835  // They do! Return "A op C".
836  ++NumExpand;
837  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
838  A->takeName(&I);
839  return A;
840  }
841 
842  // Does "A op C" simplify to the identity value for the inner opcode?
843  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
844  // They do! Return "A op B".
845  ++NumExpand;
846  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
847  A->takeName(&I);
848  return A;
849  }
850  }
851 
852  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
853 }
854 
856  Value *LHS,
857  Value *RHS) {
858  Value *A, *B, *C, *D, *E, *F;
859  bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
860  bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
861  if (!LHSIsSelect && !RHSIsSelect)
862  return nullptr;
863 
864  FastMathFlags FMF;
866  if (isa<FPMathOperator>(&I)) {
867  FMF = I.getFastMathFlags();
868  Builder.setFastMathFlags(FMF);
869  }
870 
871  Instruction::BinaryOps Opcode = I.getOpcode();
873 
874  Value *Cond, *True = nullptr, *False = nullptr;
875  if (LHSIsSelect && RHSIsSelect && A == D) {
876  // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
877  Cond = A;
878  True = SimplifyBinOp(Opcode, B, E, FMF, Q);
879  False = SimplifyBinOp(Opcode, C, F, FMF, Q);
880 
881  if (LHS->hasOneUse() && RHS->hasOneUse()) {
882  if (False && !True)
883  True = Builder.CreateBinOp(Opcode, B, E);
884  else if (True && !False)
885  False = Builder.CreateBinOp(Opcode, C, F);
886  }
887  } else if (LHSIsSelect && LHS->hasOneUse()) {
888  // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
889  Cond = A;
890  True = SimplifyBinOp(Opcode, B, RHS, FMF, Q);
891  False = SimplifyBinOp(Opcode, C, RHS, FMF, Q);
892  } else if (RHSIsSelect && RHS->hasOneUse()) {
893  // X op (D ? E : F) -> D ? (X op E) : (X op F)
894  Cond = D;
895  True = SimplifyBinOp(Opcode, LHS, E, FMF, Q);
896  False = SimplifyBinOp(Opcode, LHS, F, FMF, Q);
897  }
898 
899  if (!True || !False)
900  return nullptr;
901 
902  Value *SI = Builder.CreateSelect(Cond, True, False);
903  SI->takeName(&I);
904  return SI;
905 }
906 
907 /// Freely adapt every user of V as-if V was changed to !V.
908 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
909 void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
910  for (User *U : I->users()) {
911  switch (cast<Instruction>(U)->getOpcode()) {
912  case Instruction::Select: {
913  auto *SI = cast<SelectInst>(U);
914  SI->swapValues();
915  SI->swapProfMetadata();
916  break;
917  }
918  case Instruction::Br:
919  cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
920  break;
921  case Instruction::Xor:
922  replaceInstUsesWith(cast<Instruction>(*U), I);
923  break;
924  default:
925  llvm_unreachable("Got unexpected user - out of sync with "
926  "canFreelyInvertAllUsersOf() ?");
927  }
928  }
929 }
930 
931 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
932 /// constant zero (which is the 'negate' form).
933 Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
934  Value *NegV;
935  if (match(V, m_Neg(m_Value(NegV))))
936  return NegV;
937 
938  // Constants can be considered to be negated values if they can be folded.
939  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
940  return ConstantExpr::getNeg(C);
941 
942  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
943  if (C->getType()->getElementType()->isIntegerTy())
944  return ConstantExpr::getNeg(C);
945 
946  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
947  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
948  Constant *Elt = CV->getAggregateElement(i);
949  if (!Elt)
950  return nullptr;
951 
952  if (isa<UndefValue>(Elt))
953  continue;
954 
955  if (!isa<ConstantInt>(Elt))
956  return nullptr;
957  }
958  return ConstantExpr::getNeg(CV);
959  }
960 
961  // Negate integer vector splats.
962  if (auto *CV = dyn_cast<Constant>(V))
963  if (CV->getType()->isVectorTy() &&
964  CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
965  return ConstantExpr::getNeg(CV);
966 
967  return nullptr;
968 }
969 
970 /// A binop with a constant operand and a sign-extended boolean operand may be
971 /// converted into a select of constants by applying the binary operation to
972 /// the constant with the two possible values of the extended boolean (0 or -1).
973 Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
974  // TODO: Handle non-commutative binop (constant is operand 0).
975  // TODO: Handle zext.
976  // TODO: Peek through 'not' of cast.
977  Value *BO0 = BO.getOperand(0);
978  Value *BO1 = BO.getOperand(1);
979  Value *X;
980  Constant *C;
981  if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
982  !X->getType()->isIntOrIntVectorTy(1))
983  return nullptr;
984 
985  // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
988  Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
989  Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
990  return SelectInst::Create(X, TVal, FVal);
991 }
992 
995  if (auto *Cast = dyn_cast<CastInst>(&I))
996  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
997 
998  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
999  assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
1000  "Expected constant-foldable intrinsic");
1001  Intrinsic::ID IID = II->getIntrinsicID();
1002  if (II->arg_size() == 1)
1003  return Builder.CreateUnaryIntrinsic(IID, SO);
1004 
1005  // This works for real binary ops like min/max (where we always expect the
1006  // constant operand to be canonicalized as op1) and unary ops with a bonus
1007  // constant argument like ctlz/cttz.
1008  // TODO: Handle non-commutative binary intrinsics as below for binops.
1009  assert(II->arg_size() == 2 && "Expected binary intrinsic");
1010  assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
1011  return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
1012  }
1013 
1014  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
1015 
1016  // Figure out if the constant is the left or the right argument.
1017  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
1018  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
1019 
1020  if (auto *SOC = dyn_cast<Constant>(SO)) {
1021  if (ConstIsRHS)
1022  return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
1023  return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
1024  }
1025 
1026  Value *Op0 = SO, *Op1 = ConstOperand;
1027  if (!ConstIsRHS)
1028  std::swap(Op0, Op1);
1029 
1030  auto *BO = cast<BinaryOperator>(&I);
1031  Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
1032  SO->getName() + ".op");
1033  auto *FPInst = dyn_cast<Instruction>(RI);
1034  if (FPInst && isa<FPMathOperator>(FPInst))
1035  FPInst->copyFastMathFlags(BO);
1036  return RI;
1037 }
1038 
1040  SelectInst *SI) {
1041  // Don't modify shared select instructions.
1042  if (!SI->hasOneUse())
1043  return nullptr;
1044 
1045  Value *TV = SI->getTrueValue();
1046  Value *FV = SI->getFalseValue();
1047  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1048  return nullptr;
1049 
1050  // Bool selects with constant operands can be folded to logical ops.
1051  if (SI->getType()->isIntOrIntVectorTy(1))
1052  return nullptr;
1053 
1054  // If it's a bitcast involving vectors, make sure it has the same number of
1055  // elements on both sides.
1056  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1057  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1058  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1059 
1060  // Verify that either both or neither are vectors.
1061  if ((SrcTy == nullptr) != (DestTy == nullptr))
1062  return nullptr;
1063 
1064  // If vectors, verify that they have the same number of elements.
1065  if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1066  return nullptr;
1067  }
1068 
1069  // Test if a CmpInst instruction is used exclusively by a select as
1070  // part of a minimum or maximum operation. If so, refrain from doing
1071  // any other folding. This helps out other analyses which understand
1072  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
1073  // and CodeGen. And in this case, at least one of the comparison
1074  // operands has at least one user besides the compare (the select),
1075  // which would often largely negate the benefit of folding anyway.
1076  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
1077  if (CI->hasOneUse()) {
1078  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1079 
1080  // FIXME: This is a hack to avoid infinite looping with min/max patterns.
1081  // We have to ensure that vector constants that only differ with
1082  // undef elements are treated as equivalent.
1083  auto areLooselyEqual = [](Value *A, Value *B) {
1084  if (A == B)
1085  return true;
1086 
1087  // Test for vector constants.
1088  Constant *ConstA, *ConstB;
1089  if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
1090  return false;
1091 
1092  // TODO: Deal with FP constants?
1093  if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
1094  return false;
1095 
1096  // Compare for equality including undefs as equal.
1097  auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
1098  const APInt *C;
1099  return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
1100  };
1101 
1102  if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
1103  (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
1104  return nullptr;
1105  }
1106  }
1107 
1110  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1111 }
1112 
1115  bool ConstIsRHS = isa<Constant>(I->getOperand(1));
1116  Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
1117 
1118  if (auto *InC = dyn_cast<Constant>(InV)) {
1119  if (ConstIsRHS)
1120  return ConstantExpr::get(I->getOpcode(), InC, C);
1121  return ConstantExpr::get(I->getOpcode(), C, InC);
1122  }
1123 
1124  Value *Op0 = InV, *Op1 = C;
1125  if (!ConstIsRHS)
1126  std::swap(Op0, Op1);
1127 
1128  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo");
1129  auto *FPInst = dyn_cast<Instruction>(RI);
1130  if (FPInst && isa<FPMathOperator>(FPInst))
1131  FPInst->copyFastMathFlags(I);
1132  return RI;
1133 }
1134 
1136  unsigned NumPHIValues = PN->getNumIncomingValues();
1137  if (NumPHIValues == 0)
1138  return nullptr;
1139 
1140  // We normally only transform phis with a single use. However, if a PHI has
1141  // multiple uses and they are all the same operation, we can fold *all* of the
1142  // uses into the PHI.
1143  if (!PN->hasOneUse()) {
1144  // Walk the use list for the instruction, comparing them to I.
1145  for (User *U : PN->users()) {
1146  Instruction *UI = cast<Instruction>(U);
1147  if (UI != &I && !I.isIdenticalTo(UI))
1148  return nullptr;
1149  }
1150  // Otherwise, we can replace *all* users with the new PHI we form.
1151  }
1152 
1153  // Check to see if all of the operands of the PHI are simple constants
1154  // (constantint/constantfp/undef). If there is one non-constant value,
1155  // remember the BB it is in. If there is more than one or if *it* is a PHI,
1156  // bail out. We don't do arbitrary constant expressions here because moving
1157  // their computation can be expensive without a cost model.
1158  BasicBlock *NonConstBB = nullptr;
1159  for (unsigned i = 0; i != NumPHIValues; ++i) {
1160  Value *InVal = PN->getIncomingValue(i);
1161  // For non-freeze, require constant operand
1162  // For freeze, require non-undef, non-poison operand
1163  if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
1164  continue;
1165  if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
1166  continue;
1167 
1168  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1169  if (NonConstBB) return nullptr; // More than one non-const value.
1170 
1171  NonConstBB = PN->getIncomingBlock(i);
1172 
1173  // If the InVal is an invoke at the end of the pred block, then we can't
1174  // insert a computation after it without breaking the edge.
1175  if (isa<InvokeInst>(InVal))
1176  if (cast<Instruction>(InVal)->getParent() == NonConstBB)
1177  return nullptr;
1178 
1179  // If the incoming non-constant value is in I's block, we will remove one
1180  // instruction, but insert another equivalent one, leading to infinite
1181  // instcombine.
1182  if (isPotentiallyReachable(I.getParent(), NonConstBB, nullptr, &DT, LI))
1183  return nullptr;
1184  }
1185 
1186  // If there is exactly one non-constant value, we can insert a copy of the
1187  // operation in that block. However, if this is a critical edge, we would be
1188  // inserting the computation on some other paths (e.g. inside a loop). Only
1189  // do this if the pred block is unconditionally branching into the phi block.
1190  // Also, make sure that the pred block is not dead code.
1191  if (NonConstBB != nullptr) {
1192  BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
1193  if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
1194  return nullptr;
1195  }
1196 
1197  // Okay, we can do the transformation: create the new PHI node.
1198  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1199  InsertNewInstBefore(NewPN, *PN);
1200  NewPN->takeName(PN);
1201 
1202  // If we are going to have to insert a new computation, do so right before the
1203  // predecessor's terminator.
1204  if (NonConstBB)
1205  Builder.SetInsertPoint(NonConstBB->getTerminator());
1206 
1207  // Next, add all of the operands to the PHI.
1208  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
1209  // We only currently try to fold the condition of a select when it is a phi,
1210  // not the true/false values.
1211  Value *TrueV = SI->getTrueValue();
1212  Value *FalseV = SI->getFalseValue();
1213  BasicBlock *PhiTransBB = PN->getParent();
1214  for (unsigned i = 0; i != NumPHIValues; ++i) {
1215  BasicBlock *ThisBB = PN->getIncomingBlock(i);
1216  Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
1217  Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
1218  Value *InV = nullptr;
1219  // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
1220  // even if currently isNullValue gives false.
1221  Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
1222  // For vector constants, we cannot use isNullValue to fold into
1223  // FalseVInPred versus TrueVInPred. When we have individual nonzero
1224  // elements in the vector, we will incorrectly fold InC to
1225  // `TrueVInPred`.
1226  if (InC && isa<ConstantInt>(InC))
1227  InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
1228  else {
1229  // Generate the select in the same block as PN's current incoming block.
1230  // Note: ThisBB need not be the NonConstBB because vector constants
1231  // which are constants by definition are handled here.
1232  // FIXME: This can lead to an increase in IR generation because we might
1233  // generate selects for vector constant phi operand, that could not be
1234  // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
1235  // non-vector phis, this transformation was always profitable because
1236  // the select would be generated exactly once in the NonConstBB.
1237  Builder.SetInsertPoint(ThisBB->getTerminator());
1238  InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
1239  FalseVInPred, "phi.sel");
1240  }
1241  NewPN->addIncoming(InV, ThisBB);
1242  }
1243  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
1244  Constant *C = cast<Constant>(I.getOperand(1));
1245  for (unsigned i = 0; i != NumPHIValues; ++i) {
1246  Value *InV = nullptr;
1247  if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1248  InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
1249  else
1250  InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i),
1251  C, "phi.cmp");
1252  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1253  }
1254  } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
1255  for (unsigned i = 0; i != NumPHIValues; ++i) {
1257  Builder);
1258  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1259  }
1260  } else if (isa<FreezeInst>(&I)) {
1261  for (unsigned i = 0; i != NumPHIValues; ++i) {
1262  Value *InV;
1263  if (NonConstBB == PN->getIncomingBlock(i))
1264  InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr");
1265  else
1266  InV = PN->getIncomingValue(i);
1267  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1268  }
1269  } else {
1270  CastInst *CI = cast<CastInst>(&I);
1271  Type *RetTy = CI->getType();
1272  for (unsigned i = 0; i != NumPHIValues; ++i) {
1273  Value *InV;
1274  if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1275  InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
1276  else
1277  InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
1278  I.getType(), "phi.cast");
1279  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1280  }
1281  }
1282 
1283  for (User *U : make_early_inc_range(PN->users())) {
1284  Instruction *User = cast<Instruction>(U);
1285  if (User == &I) continue;
1286  replaceInstUsesWith(*User, NewPN);
1287  eraseInstFromFunction(*User);
1288  }
1289  return replaceInstUsesWith(I, NewPN);
1290 }
1291 
1293  if (!isa<Constant>(I.getOperand(1)))
1294  return nullptr;
1295 
1296  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1297  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1298  return NewSel;
1299  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1300  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1301  return NewPhi;
1302  }
1303  return nullptr;
1304 }
1305 
1306 /// Given a pointer type and a constant offset, determine whether or not there
1307 /// is a sequence of GEP indices into the pointed type that will land us at the
1308 /// specified offset. If so, fill them into NewIndices and return the resultant
1309 /// element type, otherwise return null.
1310 Type *
1311 InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
1312  SmallVectorImpl<Value *> &NewIndices) {
1313  Type *Ty = PtrTy->getElementType();
1314  if (!Ty->isSized())
1315  return nullptr;
1316 
1317  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
1318  SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
1319  if (!Offset.isZero())
1320  return nullptr;
1321 
1322  for (const APInt &Index : Indices)
1323  NewIndices.push_back(Builder.getInt(Index));
1324  return Ty;
1325 }
1326 
1328  // If this GEP has only 0 indices, it is the same pointer as
1329  // Src. If Src is not a trivial GEP too, don't combine
1330  // the indices.
1331  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1332  !Src.hasOneUse())
1333  return false;
1334  return true;
1335 }
1336 
1337 /// Return a value X such that Val = X * Scale, or null if none.
1338 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1339 Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1340  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1341  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1342  Scale.getBitWidth() && "Scale not compatible with value!");
1343 
1344  // If Val is zero or Scale is one then Val = Val * Scale.
1345  if (match(Val, m_Zero()) || Scale == 1) {
1346  NoSignedWrap = true;
1347  return Val;
1348  }
1349 
1350  // If Scale is zero then it does not divide Val.
1351  if (Scale.isMinValue())
1352  return nullptr;
1353 
1354  // Look through chains of multiplications, searching for a constant that is
1355  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1356  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1357  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1358  // down from Val:
1359  //
1360  // Val = M1 * X || Analysis starts here and works down
1361  // M1 = M2 * Y || Doesn't descend into terms with more
1362  // M2 = Z * 4 \/ than one use
1363  //
1364  // Then to modify a term at the bottom:
1365  //
1366  // Val = M1 * X
1367  // M1 = Z * Y || Replaced M2 with Z
1368  //
1369  // Then to work back up correcting nsw flags.
1370 
1371  // Op - the term we are currently analyzing. Starts at Val then drills down.
1372  // Replaced with its descaled value before exiting from the drill down loop.
1373  Value *Op = Val;
1374 
1375  // Parent - initially null, but after drilling down notes where Op came from.
1376  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1377  // 0'th operand of Val.
1378  std::pair<Instruction *, unsigned> Parent;
1379 
1380  // Set if the transform requires a descaling at deeper levels that doesn't
1381  // overflow.
1382  bool RequireNoSignedWrap = false;
1383 
1384  // Log base 2 of the scale. Negative if not a power of 2.
1385  int32_t logScale = Scale.exactLogBase2();
1386 
1387  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1388  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1389  // If Op is a constant divisible by Scale then descale to the quotient.
1390  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1391  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1392  if (!Remainder.isMinValue())
1393  // Not divisible by Scale.
1394  return nullptr;
1395  // Replace with the quotient in the parent.
1396  Op = ConstantInt::get(CI->getType(), Quotient);
1397  NoSignedWrap = true;
1398  break;
1399  }
1400 
1401  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1402  if (BO->getOpcode() == Instruction::Mul) {
1403  // Multiplication.
1404  NoSignedWrap = BO->hasNoSignedWrap();
1405  if (RequireNoSignedWrap && !NoSignedWrap)
1406  return nullptr;
1407 
1408  // There are three cases for multiplication: multiplication by exactly
1409  // the scale, multiplication by a constant different to the scale, and
1410  // multiplication by something else.
1411  Value *LHS = BO->getOperand(0);
1412  Value *RHS = BO->getOperand(1);
1413 
1414  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1415  // Multiplication by a constant.
1416  if (CI->getValue() == Scale) {
1417  // Multiplication by exactly the scale, replace the multiplication
1418  // by its left-hand side in the parent.
1419  Op = LHS;
1420  break;
1421  }
1422 
1423  // Otherwise drill down into the constant.
1424  if (!Op->hasOneUse())
1425  return nullptr;
1426 
1427  Parent = std::make_pair(BO, 1);
1428  continue;
1429  }
1430 
1431  // Multiplication by something else. Drill down into the left-hand side
1432  // since that's where the reassociate pass puts the good stuff.
1433  if (!Op->hasOneUse())
1434  return nullptr;
1435 
1436  Parent = std::make_pair(BO, 0);
1437  continue;
1438  }
1439 
1440  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1441  isa<ConstantInt>(BO->getOperand(1))) {
1442  // Multiplication by a power of 2.
1443  NoSignedWrap = BO->hasNoSignedWrap();
1444  if (RequireNoSignedWrap && !NoSignedWrap)
1445  return nullptr;
1446 
1447  Value *LHS = BO->getOperand(0);
1448  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1449  getLimitedValue(Scale.getBitWidth());
1450  // Op = LHS << Amt.
1451 
1452  if (Amt == logScale) {
1453  // Multiplication by exactly the scale, replace the multiplication
1454  // by its left-hand side in the parent.
1455  Op = LHS;
1456  break;
1457  }
1458  if (Amt < logScale || !Op->hasOneUse())
1459  return nullptr;
1460 
1461  // Multiplication by more than the scale. Reduce the multiplying amount
1462  // by the scale in the parent.
1463  Parent = std::make_pair(BO, 1);
1464  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1465  break;
1466  }
1467  }
1468 
1469  if (!Op->hasOneUse())
1470  return nullptr;
1471 
1472  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1473  if (Cast->getOpcode() == Instruction::SExt) {
1474  // Op is sign-extended from a smaller type, descale in the smaller type.
1475  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1476  APInt SmallScale = Scale.trunc(SmallSize);
1477  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1478  // descale Op as (sext Y) * Scale. In order to have
1479  // sext (Y * SmallScale) = (sext Y) * Scale
1480  // some conditions need to hold however: SmallScale must sign-extend to
1481  // Scale and the multiplication Y * SmallScale should not overflow.
1482  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1483  // SmallScale does not sign-extend to Scale.
1484  return nullptr;
1485  assert(SmallScale.exactLogBase2() == logScale);
1486  // Require that Y * SmallScale must not overflow.
1487  RequireNoSignedWrap = true;
1488 
1489  // Drill down through the cast.
1490  Parent = std::make_pair(Cast, 0);
1491  Scale = SmallScale;
1492  continue;
1493  }
1494 
1495  if (Cast->getOpcode() == Instruction::Trunc) {
1496  // Op is truncated from a larger type, descale in the larger type.
1497  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1498  // trunc (Y * sext Scale) = (trunc Y) * Scale
1499  // always holds. However (trunc Y) * Scale may overflow even if
1500  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1501  // from this point up in the expression (see later).
1502  if (RequireNoSignedWrap)
1503  return nullptr;
1504 
1505  // Drill down through the cast.
1506  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1507  Parent = std::make_pair(Cast, 0);
1508  Scale = Scale.sext(LargeSize);
1509  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1510  logScale = -1;
1511  assert(Scale.exactLogBase2() == logScale);
1512  continue;
1513  }
1514  }
1515 
1516  // Unsupported expression, bail out.
1517  return nullptr;
1518  }
1519 
1520  // If Op is zero then Val = Op * Scale.
1521  if (match(Op, m_Zero())) {
1522  NoSignedWrap = true;
1523  return Op;
1524  }
1525 
1526  // We know that we can successfully descale, so from here on we can safely
1527  // modify the IR. Op holds the descaled version of the deepest term in the
1528  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1529  // not to overflow.
1530 
1531  if (!Parent.first)
1532  // The expression only had one term.
1533  return Op;
1534 
1535  // Rewrite the parent using the descaled version of its operand.
1536  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1537  assert(Op != Parent.first->getOperand(Parent.second) &&
1538  "Descaling was a no-op?");
1539  replaceOperand(*Parent.first, Parent.second, Op);
1540  Worklist.push(Parent.first);
1541 
1542  // Now work back up the expression correcting nsw flags. The logic is based
1543  // on the following observation: if X * Y is known not to overflow as a signed
1544  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1545  // then X * Z will not overflow as a signed multiplication either. As we work
1546  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1547  // current level has strictly smaller absolute value than the original.
1548  Instruction *Ancestor = Parent.first;
1549  do {
1550  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1551  // If the multiplication wasn't nsw then we can't say anything about the
1552  // value of the descaled multiplication, and we have to clear nsw flags
1553  // from this point on up.
1554  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1555  NoSignedWrap &= OpNoSignedWrap;
1556  if (NoSignedWrap != OpNoSignedWrap) {
1557  BO->setHasNoSignedWrap(NoSignedWrap);
1558  Worklist.push(Ancestor);
1559  }
1560  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1561  // The fact that the descaled input to the trunc has smaller absolute
1562  // value than the original input doesn't tell us anything useful about
1563  // the absolute values of the truncations.
1564  NoSignedWrap = false;
1565  }
1566  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1567  "Failed to keep proper track of nsw flags while drilling down?");
1568 
1569  if (Ancestor == Val)
1570  // Got to the top, all done!
1571  return Val;
1572 
1573  // Move up one level in the expression.
1574  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1575  Ancestor = Ancestor->user_back();
1576  } while (true);
1577 }
1578 
1580  if (!isa<VectorType>(Inst.getType()))
1581  return nullptr;
1582 
1583  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1584  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1585  assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1586  cast<VectorType>(Inst.getType())->getElementCount());
1587  assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1588  cast<VectorType>(Inst.getType())->getElementCount());
1589 
1590  // If both operands of the binop are vector concatenations, then perform the
1591  // narrow binop on each pair of the source operands followed by concatenation
1592  // of the results.
1593  Value *L0, *L1, *R0, *R1;
1595  if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1597  LHS->hasOneUse() && RHS->hasOneUse() &&
1598  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1599  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1600  // This transform does not have the speculative execution constraint as
1601  // below because the shuffle is a concatenation. The new binops are
1602  // operating on exactly the same elements as the existing binop.
1603  // TODO: We could ease the mask requirement to allow different undef lanes,
1604  // but that requires an analysis of the binop-with-undef output value.
1605  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1606  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1607  BO->copyIRFlags(&Inst);
1608  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1609  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1610  BO->copyIRFlags(&Inst);
1611  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1612  }
1613 
1614  // It may not be safe to reorder shuffles and things like div, urem, etc.
1615  // because we may trap when executing those ops on unknown vector elements.
1616  // See PR20059.
1617  if (!isSafeToSpeculativelyExecute(&Inst))
1618  return nullptr;
1619 
1620  auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1621  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1622  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1623  BO->copyIRFlags(&Inst);
1624  return new ShuffleVectorInst(XY, M);
1625  };
1626 
1627  // If both arguments of the binary operation are shuffles that use the same
1628  // mask and shuffle within a single vector, move the shuffle after the binop.
1629  Value *V1, *V2;
1630  if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1632  V1->getType() == V2->getType() &&
1633  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1634  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1635  return createBinOpShuffle(V1, V2, Mask);
1636  }
1637 
1638  // If both arguments of a commutative binop are select-shuffles that use the
1639  // same mask with commuted operands, the shuffles are unnecessary.
1640  if (Inst.isCommutative() &&
1641  match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1642  match(RHS,
1644  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1645  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1646  // TODO: Allow shuffles that contain undefs in the mask?
1647  // That is legal, but it reduces undef knowledge.
1648  // TODO: Allow arbitrary shuffles by shuffling after binop?
1649  // That might be legal, but we have to deal with poison.
1650  if (LShuf->isSelect() &&
1651  !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
1652  RShuf->isSelect() &&
1653  !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
1654  // Example:
1655  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1656  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1657  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1658  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1659  NewBO->copyIRFlags(&Inst);
1660  return NewBO;
1661  }
1662  }
1663 
1664  // If one argument is a shuffle within one vector and the other is a constant,
1665  // try moving the shuffle after the binary operation. This canonicalization
1666  // intends to move shuffles closer to other shuffles and binops closer to
1667  // other binops, so they can be folded. It may also enable demanded elements
1668  // transforms.
1669  Constant *C;
1670  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1671  if (InstVTy &&
1672  match(&Inst,
1674  m_ImmConstant(C))) &&
1675  cast<FixedVectorType>(V1->getType())->getNumElements() <=
1676  InstVTy->getNumElements()) {
1677  assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1678  "Shuffle should not change scalar type");
1679 
1680  // Find constant NewC that has property:
1681  // shuffle(NewC, ShMask) = C
1682  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1683  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1684  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1685  bool ConstOp1 = isa<Constant>(RHS);
1686  ArrayRef<int> ShMask = Mask;
1687  unsigned SrcVecNumElts =
1688  cast<FixedVectorType>(V1->getType())->getNumElements();
1689  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1690  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1691  bool MayChange = true;
1692  unsigned NumElts = InstVTy->getNumElements();
1693  for (unsigned I = 0; I < NumElts; ++I) {
1694  Constant *CElt = C->getAggregateElement(I);
1695  if (ShMask[I] >= 0) {
1696  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1697  Constant *NewCElt = NewVecC[ShMask[I]];
1698  // Bail out if:
1699  // 1. The constant vector contains a constant expression.
1700  // 2. The shuffle needs an element of the constant vector that can't
1701  // be mapped to a new constant vector.
1702  // 3. This is a widening shuffle that copies elements of V1 into the
1703  // extended elements (extending with undef is allowed).
1704  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1705  I >= SrcVecNumElts) {
1706  MayChange = false;
1707  break;
1708  }
1709  NewVecC[ShMask[I]] = CElt;
1710  }
1711  // If this is a widening shuffle, we must be able to extend with undef
1712  // elements. If the original binop does not produce an undef in the high
1713  // lanes, then this transform is not safe.
1714  // Similarly for undef lanes due to the shuffle mask, we can only
1715  // transform binops that preserve undef.
1716  // TODO: We could shuffle those non-undef constant values into the
1717  // result by using a constant vector (rather than an undef vector)
1718  // as operand 1 of the new binop, but that might be too aggressive
1719  // for target-independent shuffle creation.
1720  if (I >= SrcVecNumElts || ShMask[I] < 0) {
1721  Constant *MaybeUndef =
1722  ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
1723  : ConstantExpr::get(Opcode, CElt, UndefScalar);
1724  if (!match(MaybeUndef, m_Undef())) {
1725  MayChange = false;
1726  break;
1727  }
1728  }
1729  }
1730  if (MayChange) {
1731  Constant *NewC = ConstantVector::get(NewVecC);
1732  // It may not be safe to execute a binop on a vector with undef elements
1733  // because the entire instruction can be folded to undef or create poison
1734  // that did not exist in the original code.
1735  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1736  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1737 
1738  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1739  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1740  Value *NewLHS = ConstOp1 ? V1 : NewC;
1741  Value *NewRHS = ConstOp1 ? NewC : V1;
1742  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1743  }
1744  }
1745 
1746  // Try to reassociate to sink a splat shuffle after a binary operation.
1747  if (Inst.isAssociative() && Inst.isCommutative()) {
1748  // Canonicalize shuffle operand as LHS.
1749  if (isa<ShuffleVectorInst>(RHS))
1750  std::swap(LHS, RHS);
1751 
1752  Value *X;
1753  ArrayRef<int> MaskC;
1754  int SplatIndex;
1755  Value *Y, *OtherOp;
1756  if (!match(LHS,
1757  m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1758  !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1759  X->getType() != Inst.getType() ||
1760  !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1761  return nullptr;
1762 
1763  // FIXME: This may not be safe if the analysis allows undef elements. By
1764  // moving 'Y' before the splat shuffle, we are implicitly assuming
1765  // that it is not undef/poison at the splat index.
1766  if (isSplatValue(OtherOp, SplatIndex)) {
1767  std::swap(Y, OtherOp);
1768  } else if (!isSplatValue(Y, SplatIndex)) {
1769  return nullptr;
1770  }
1771 
1772  // X and Y are splatted values, so perform the binary operation on those
1773  // values followed by a splat followed by the 2nd binary operation:
1774  // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1775  Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1776  SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1777  Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1778  Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1779 
1780  // Intersect FMF on both new binops. Other (poison-generating) flags are
1781  // dropped to be safe.
1782  if (isa<FPMathOperator>(R)) {
1783  R->copyFastMathFlags(&Inst);
1784  R->andIRFlags(RHS);
1785  }
1786  if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1787  NewInstBO->copyIRFlags(R);
1788  return R;
1789  }
1790 
1791  return nullptr;
1792 }
1793 
1794 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1795 /// of a value. This requires a potentially expensive known bits check to make
1796 /// sure the narrow op does not overflow.
1797 Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1798  // We need at least one extended operand.
1799  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1800 
1801  // If this is a sub, we swap the operands since we always want an extension
1802  // on the RHS. The LHS can be an extension or a constant.
1803  if (BO.getOpcode() == Instruction::Sub)
1804  std::swap(Op0, Op1);
1805 
1806  Value *X;
1807  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1808  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1809  return nullptr;
1810 
1811  // If both operands are the same extension from the same source type and we
1812  // can eliminate at least one (hasOneUse), this might work.
1813  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1814  Value *Y;
1815  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1816  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1817  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1818  // If that did not match, see if we have a suitable constant operand.
1819  // Truncating and extending must produce the same constant.
1820  Constant *WideC;
1821  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1822  return nullptr;
1823  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1824  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1825  return nullptr;
1826  Y = NarrowC;
1827  }
1828 
1829  // Swap back now that we found our operands.
1830  if (BO.getOpcode() == Instruction::Sub)
1831  std::swap(X, Y);
1832 
1833  // Both operands have narrow versions. Last step: the math must not overflow
1834  // in the narrow width.
1835  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1836  return nullptr;
1837 
1838  // bo (ext X), (ext Y) --> ext (bo X, Y)
1839  // bo (ext X), C --> ext (bo X, C')
1840  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1841  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1842  if (IsSext)
1843  NewBinOp->setHasNoSignedWrap();
1844  else
1845  NewBinOp->setHasNoUnsignedWrap();
1846  }
1847  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1848 }
1849 
1850 static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
1851  // At least one GEP must be inbounds.
1852  if (!GEP1.isInBounds() && !GEP2.isInBounds())
1853  return false;
1854 
1855  return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1856  (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1857 }
1858 
1859 /// Thread a GEP operation with constant indices through the constant true/false
1860 /// arms of a select.
1863  if (!GEP.hasAllConstantIndices())
1864  return nullptr;
1865 
1866  Instruction *Sel;
1867  Value *Cond;
1868  Constant *TrueC, *FalseC;
1869  if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1870  !match(Sel,
1871  m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1872  return nullptr;
1873 
1874  // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1875  // Propagate 'inbounds' and metadata from existing instructions.
1876  // Note: using IRBuilder to create the constants for efficiency.
1877  SmallVector<Value *, 4> IndexC(GEP.indices());
1878  bool IsInBounds = GEP.isInBounds();
1879  Type *Ty = GEP.getSourceElementType();
1880  Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, TrueC, IndexC)
1881  : Builder.CreateGEP(Ty, TrueC, IndexC);
1882  Value *NewFalseC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, FalseC, IndexC)
1883  : Builder.CreateGEP(Ty, FalseC, IndexC);
1884  return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1885 }
1886 
1888  GEPOperator *Src) {
1889  // Combine Indices - If the source pointer to this getelementptr instruction
1890  // is a getelementptr instruction with matching element type, combine the
1891  // indices of the two getelementptr instructions into a single instruction.
1892  if (Src->getResultElementType() != GEP.getSourceElementType())
1893  return nullptr;
1894 
1895  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1896  return nullptr;
1897 
1898  if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
1899  Src->hasOneUse()) {
1900  Value *GO1 = GEP.getOperand(1);
1901  Value *SO1 = Src->getOperand(1);
1902 
1903  if (LI) {
1904  // Try to reassociate loop invariant GEP chains to enable LICM.
1905  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
1906  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
1907  // invariant: this breaks the dependence between GEPs and allows LICM
1908  // to hoist the invariant part out of the loop.
1909  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
1910  // We have to be careful here.
1911  // We have something like:
1912  // %src = getelementptr <ty>, <ty>* %base, <ty> %idx
1913  // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
1914  // If we just swap idx & idx2 then we could inadvertantly
1915  // change %src from a vector to a scalar, or vice versa.
1916  // Cases:
1917  // 1) %base a scalar & idx a scalar & idx2 a vector
1918  // => Swapping idx & idx2 turns %src into a vector type.
1919  // 2) %base a scalar & idx a vector & idx2 a scalar
1920  // => Swapping idx & idx2 turns %src in a scalar type
1921  // 3) %base, %idx, and %idx2 are scalars
1922  // => %src & %gep are scalars
1923  // => swapping idx & idx2 is safe
1924  // 4) %base a vector
1925  // => %src is a vector
1926  // => swapping idx & idx2 is safe.
1927  auto *SO0 = Src->getOperand(0);
1928  auto *SO0Ty = SO0->getType();
1929  if (!isa<VectorType>(GEP.getType()) || // case 3
1930  isa<VectorType>(SO0Ty)) { // case 4
1931  Src->setOperand(1, GO1);
1932  GEP.setOperand(1, SO1);
1933  return &GEP;
1934  } else {
1935  // Case 1 or 2
1936  // -- have to recreate %src & %gep
1937  // put NewSrc at same location as %src
1938  Builder.SetInsertPoint(cast<Instruction>(Src));
1939  Value *NewSrc = Builder.CreateGEP(
1940  GEP.getSourceElementType(), SO0, GO1, Src->getName());
1941  // Propagate 'inbounds' if the new source was not constant-folded.
1942  if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
1943  NewSrcGEPI->setIsInBounds(Src->isInBounds());
1945  GEP.getSourceElementType(), NewSrc, {SO1});
1946  NewGEP->setIsInBounds(GEP.isInBounds());
1947  return NewGEP;
1948  }
1949  }
1950  }
1951  }
1952  }
1953 
1954  // Note that if our source is a gep chain itself then we wait for that
1955  // chain to be resolved before we perform this transformation. This
1956  // avoids us creating a TON of code in some cases.
1957  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
1958  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
1959  return nullptr; // Wait until our source is folded to completion.
1960 
1961  SmallVector<Value*, 8> Indices;
1962 
1963  // Find out whether the last index in the source GEP is a sequential idx.
1964  bool EndsWithSequential = false;
1965  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
1966  I != E; ++I)
1967  EndsWithSequential = I.isSequential();
1968 
1969  // Can we combine the two pointer arithmetics offsets?
1970  if (EndsWithSequential) {
1971  // Replace: gep (gep %P, long B), long A, ...
1972  // With: T = long A+B; gep %P, T, ...
1973  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
1974  Value *GO1 = GEP.getOperand(1);
1975 
1976  // If they aren't the same type, then the input hasn't been processed
1977  // by the loop above yet (which canonicalizes sequential index types to
1978  // intptr_t). Just avoid transforming this until the input has been
1979  // normalized.
1980  if (SO1->getType() != GO1->getType())
1981  return nullptr;
1982 
1983  Value *Sum =
1984  SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
1985  // Only do the combine when we are sure the cost after the
1986  // merge is never more than that before the merge.
1987  if (Sum == nullptr)
1988  return nullptr;
1989 
1990  // Update the GEP in place if possible.
1991  if (Src->getNumOperands() == 2) {
1992  GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
1993  replaceOperand(GEP, 0, Src->getOperand(0));
1994  replaceOperand(GEP, 1, Sum);
1995  return &GEP;
1996  }
1997  Indices.append(Src->op_begin()+1, Src->op_end()-1);
1998  Indices.push_back(Sum);
1999  Indices.append(GEP.op_begin()+2, GEP.op_end());
2000  } else if (isa<Constant>(*GEP.idx_begin()) &&
2001  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2002  Src->getNumOperands() != 1) {
2003  // Otherwise we can do the fold if the first index of the GEP is a zero
2004  Indices.append(Src->op_begin()+1, Src->op_end());
2005  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2006  }
2007 
2008  if (!Indices.empty())
2009  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2011  Src->getSourceElementType(), Src->getOperand(0), Indices,
2012  GEP.getName())
2013  : GetElementPtrInst::Create(Src->getSourceElementType(),
2014  Src->getOperand(0), Indices,
2015  GEP.getName());
2016 
2017  return nullptr;
2018 }
2019 
2021  Value *PtrOp = GEP.getOperand(0);
2022  SmallVector<Value *, 8> Indices(GEP.indices());
2023  Type *GEPType = GEP.getType();
2024  Type *GEPEltType = GEP.getSourceElementType();
2025  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2026  if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2027  SQ.getWithInstruction(&GEP)))
2028  return replaceInstUsesWith(GEP, V);
2029 
2030  // For vector geps, use the generic demanded vector support.
2031  // Skip if GEP return type is scalable. The number of elements is unknown at
2032  // compile-time.
2033  if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2034  auto VWidth = GEPFVTy->getNumElements();
2035  APInt UndefElts(VWidth, 0);
2036  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2037  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2038  UndefElts)) {
2039  if (V != &GEP)
2040  return replaceInstUsesWith(GEP, V);
2041  return &GEP;
2042  }
2043 
2044  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2045  // possible (decide on canonical form for pointer broadcast), 3) exploit
2046  // undef elements to decrease demanded bits
2047  }
2048 
2049  // Eliminate unneeded casts for indices, and replace indices which displace
2050  // by multiples of a zero size type with zero.
2051  bool MadeChange = false;
2052 
2053  // Index width may not be the same width as pointer width.
2054  // Data layout chooses the right type based on supported integer types.
2055  Type *NewScalarIndexTy =
2056  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2057 
2059  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2060  ++I, ++GTI) {
2061  // Skip indices into struct types.
2062  if (GTI.isStruct())
2063  continue;
2064 
2065  Type *IndexTy = (*I)->getType();
2066  Type *NewIndexType =
2067  IndexTy->isVectorTy()
2068  ? VectorType::get(NewScalarIndexTy,
2069  cast<VectorType>(IndexTy)->getElementCount())
2070  : NewScalarIndexTy;
2071 
2072  // If the element type has zero size then any index over it is equivalent
2073  // to an index of zero, so replace it with zero if it is not zero already.
2074  Type *EltTy = GTI.getIndexedType();
2075  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2076  if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2077  *I = Constant::getNullValue(NewIndexType);
2078  MadeChange = true;
2079  }
2080 
2081  if (IndexTy != NewIndexType) {
2082  // If we are using a wider index than needed for this platform, shrink
2083  // it to what we need. If narrower, sign-extend it to what we need.
2084  // This explicit cast can make subsequent optimizations more obvious.
2085  *I = Builder.CreateIntCast(*I, NewIndexType, true);
2086  MadeChange = true;
2087  }
2088  }
2089  if (MadeChange)
2090  return &GEP;
2091 
2092  // Check to see if the inputs to the PHI node are getelementptr instructions.
2093  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2094  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2095  if (!Op1)
2096  return nullptr;
2097 
2098  // Don't fold a GEP into itself through a PHI node. This can only happen
2099  // through the back-edge of a loop. Folding a GEP into itself means that
2100  // the value of the previous iteration needs to be stored in the meantime,
2101  // thus requiring an additional register variable to be live, but not
2102  // actually achieving anything (the GEP still needs to be executed once per
2103  // loop iteration).
2104  if (Op1 == &GEP)
2105  return nullptr;
2106 
2107  int DI = -1;
2108 
2109  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2110  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2111  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
2112  return nullptr;
2113 
2114  // As for Op1 above, don't try to fold a GEP into itself.
2115  if (Op2 == &GEP)
2116  return nullptr;
2117 
2118  // Keep track of the type as we walk the GEP.
2119  Type *CurTy = nullptr;
2120 
2121  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2122  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2123  return nullptr;
2124 
2125  if (Op1->getOperand(J) != Op2->getOperand(J)) {
2126  if (DI == -1) {
2127  // We have not seen any differences yet in the GEPs feeding the
2128  // PHI yet, so we record this one if it is allowed to be a
2129  // variable.
2130 
2131  // The first two arguments can vary for any GEP, the rest have to be
2132  // static for struct slots
2133  if (J > 1) {
2134  assert(CurTy && "No current type?");
2135  if (CurTy->isStructTy())
2136  return nullptr;
2137  }
2138 
2139  DI = J;
2140  } else {
2141  // The GEP is different by more than one input. While this could be
2142  // extended to support GEPs that vary by more than one variable it
2143  // doesn't make sense since it greatly increases the complexity and
2144  // would result in an R+R+R addressing mode which no backend
2145  // directly supports and would need to be broken into several
2146  // simpler instructions anyway.
2147  return nullptr;
2148  }
2149  }
2150 
2151  // Sink down a layer of the type for the next iteration.
2152  if (J > 0) {
2153  if (J == 1) {
2154  CurTy = Op1->getSourceElementType();
2155  } else {
2156  CurTy =
2157  GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2158  }
2159  }
2160  }
2161  }
2162 
2163  // If not all GEPs are identical we'll have to create a new PHI node.
2164  // Check that the old PHI node has only one use so that it will get
2165  // removed.
2166  if (DI != -1 && !PN->hasOneUse())
2167  return nullptr;
2168 
2169  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2170  if (DI == -1) {
2171  // All the GEPs feeding the PHI are identical. Clone one down into our
2172  // BB so that it can be merged with the current GEP.
2173  } else {
2174  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2175  // into the current block so it can be merged, and create a new PHI to
2176  // set that index.
2177  PHINode *NewPN;
2178  {
2180  Builder.SetInsertPoint(PN);
2181  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2182  PN->getNumOperands());
2183  }
2184 
2185  for (auto &I : PN->operands())
2186  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2187  PN->getIncomingBlock(I));
2188 
2189  NewGEP->setOperand(DI, NewPN);
2190  }
2191 
2192  GEP.getParent()->getInstList().insert(
2193  GEP.getParent()->getFirstInsertionPt(), NewGEP);
2194  replaceOperand(GEP, 0, NewGEP);
2195  PtrOp = NewGEP;
2196  }
2197 
2198  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2199  if (Instruction *I = visitGEPOfGEP(GEP, Src))
2200  return I;
2201 
2202  // Skip if GEP source element type is scalable. The type alloc size is unknown
2203  // at compile-time.
2204  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2205  unsigned AS = GEP.getPointerAddressSpace();
2206  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2207  DL.getIndexSizeInBits(AS)) {
2208  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2209 
2210  bool Matched = false;
2211  uint64_t C;
2212  Value *V = nullptr;
2213  if (TyAllocSize == 1) {
2214  V = GEP.getOperand(1);
2215  Matched = true;
2216  } else if (match(GEP.getOperand(1),
2217  m_AShr(m_Value(V), m_ConstantInt(C)))) {
2218  if (TyAllocSize == 1ULL << C)
2219  Matched = true;
2220  } else if (match(GEP.getOperand(1),
2221  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2222  if (TyAllocSize == C)
2223  Matched = true;
2224  }
2225 
2226  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2227  // only if both point to the same underlying object (otherwise provenance
2228  // is not necessarily retained).
2229  Value *Y;
2230  Value *X = GEP.getOperand(0);
2231  if (Matched &&
2235  }
2236  }
2237 
2238  // We do not handle pointer-vector geps here.
2239  if (GEPType->isVectorTy())
2240  return nullptr;
2241 
2242  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
2243  Value *StrippedPtr = PtrOp->stripPointerCasts();
2244  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
2245 
2246  // TODO: The basic approach of these folds is not compatible with opaque
2247  // pointers, because we can't use bitcasts as a hint for a desirable GEP
2248  // type. Instead, we should perform canonicalization directly on the GEP
2249  // type. For now, skip these.
2250  if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
2251  bool HasZeroPointerIndex = false;
2252  Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
2253 
2254  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
2255  HasZeroPointerIndex = C->isZero();
2256 
2257  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
2258  // into : GEP [10 x i8]* X, i32 0, ...
2259  //
2260  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
2261  // into : GEP i8* X, ...
2262  //
2263  // This occurs when the program declares an array extern like "int X[];"
2264  if (HasZeroPointerIndex) {
2265  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
2266  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
2267  if (CATy->getElementType() == StrippedPtrEltTy) {
2268  // -> GEP i8* X, ...
2269  SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
2271  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
2272  Res->setIsInBounds(GEP.isInBounds());
2273  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
2274  return Res;
2275  // Insert Res, and create an addrspacecast.
2276  // e.g.,
2277  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
2278  // ->
2279  // %0 = GEP i8 addrspace(1)* X, ...
2280  // addrspacecast i8 addrspace(1)* %0 to i8*
2281  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
2282  }
2283 
2284  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
2285  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
2286  if (CATy->getElementType() == XATy->getElementType()) {
2287  // -> GEP [10 x i8]* X, i32 0, ...
2288  // At this point, we know that the cast source type is a pointer
2289  // to an array of the same type as the destination pointer
2290  // array. Because the array type is never stepped over (there
2291  // is a leading zero) we can fold the cast into this GEP.
2292  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
2293  GEP.setSourceElementType(XATy);
2294  return replaceOperand(GEP, 0, StrippedPtr);
2295  }
2296  // Cannot replace the base pointer directly because StrippedPtr's
2297  // address space is different. Instead, create a new GEP followed by
2298  // an addrspacecast.
2299  // e.g.,
2300  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2301  // i32 0, ...
2302  // ->
2303  // %0 = GEP [10 x i8] addrspace(1)* X, ...
2304  // addrspacecast i8 addrspace(1)* %0 to i8*
2305  SmallVector<Value *, 8> Idx(GEP.indices());
2306  Value *NewGEP =
2307  GEP.isInBounds()
2308  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2309  Idx, GEP.getName())
2310  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2311  GEP.getName());
2312  return new AddrSpaceCastInst(NewGEP, GEPType);
2313  }
2314  }
2315  }
2316  } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
2317  // Skip if GEP source element type is scalable. The type alloc size is
2318  // unknown at compile-time.
2319  // Transform things like: %t = getelementptr i32*
2320  // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
2321  // x i32]* %str, i32 0, i32 %V; bitcast
2322  if (StrippedPtrEltTy->isArrayTy() &&
2323  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2324  DL.getTypeAllocSize(GEPEltType)) {
2325  Type *IdxType = DL.getIndexType(GEPType);
2326  Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
2327  Value *NewGEP =
2328  GEP.isInBounds()
2329  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2330  GEP.getName())
2331  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2332  GEP.getName());
2333 
2334  // V and GEP are both pointer types --> BitCast
2335  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2336  }
2337 
2338  // Transform things like:
2339  // %V = mul i64 %N, 4
2340  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2341  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2342  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2343  // Check that changing the type amounts to dividing the index by a scale
2344  // factor.
2345  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2346  uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
2347  if (ResSize && SrcSize % ResSize == 0) {
2348  Value *Idx = GEP.getOperand(1);
2349  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2350  uint64_t Scale = SrcSize / ResSize;
2351 
2352  // Earlier transforms ensure that the index has the right type
2353  // according to Data Layout, which considerably simplifies the
2354  // logic by eliminating implicit casts.
2355  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2356  "Index type does not match the Data Layout preferences");
2357 
2358  bool NSW;
2359  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2360  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2361  // If the multiplication NewIdx * Scale may overflow then the new
2362  // GEP may not be "inbounds".
2363  Value *NewGEP =
2364  GEP.isInBounds() && NSW
2365  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2366  NewIdx, GEP.getName())
2367  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2368  GEP.getName());
2369 
2370  // The NewGEP must be pointer typed, so must the old one -> BitCast
2372  GEPType);
2373  }
2374  }
2375  }
2376 
2377  // Similarly, transform things like:
2378  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2379  // (where tmp = 8*tmp2) into:
2380  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2381  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2382  StrippedPtrEltTy->isArrayTy()) {
2383  // Check that changing to the array element type amounts to dividing the
2384  // index by a scale factor.
2385  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2386  uint64_t ArrayEltSize =
2387  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
2388  .getFixedSize();
2389  if (ResSize && ArrayEltSize % ResSize == 0) {
2390  Value *Idx = GEP.getOperand(1);
2391  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2392  uint64_t Scale = ArrayEltSize / ResSize;
2393 
2394  // Earlier transforms ensure that the index has the right type
2395  // according to the Data Layout, which considerably simplifies
2396  // the logic by eliminating implicit casts.
2397  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2398  "Index type does not match the Data Layout preferences");
2399 
2400  bool NSW;
2401  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2402  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2403  // If the multiplication NewIdx * Scale may overflow then the new
2404  // GEP may not be "inbounds".
2405  Type *IndTy = DL.getIndexType(GEPType);
2406  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2407 
2408  Value *NewGEP =
2409  GEP.isInBounds() && NSW
2410  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2411  Off, GEP.getName())
2412  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2413  GEP.getName());
2414  // The NewGEP must be pointer typed, so must the old one -> BitCast
2416  GEPType);
2417  }
2418  }
2419  }
2420  }
2421  }
2422 
2423  // addrspacecast between types is canonicalized as a bitcast, then an
2424  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2425  // through the addrspacecast.
2426  Value *ASCStrippedPtrOp = PtrOp;
2427  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2428  // X = bitcast A addrspace(1)* to B addrspace(1)*
2429  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2430  // Z = gep Y, <...constant indices...>
2431  // Into an addrspacecasted GEP of the struct.
2432  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2433  ASCStrippedPtrOp = BC;
2434  }
2435 
2436  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
2437  Value *SrcOp = BCI->getOperand(0);
2438  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2439  Type *SrcEltType = SrcType->getElementType();
2440 
2441  // GEP directly using the source operand if this GEP is accessing an element
2442  // of a bitcasted pointer to vector or array of the same dimensions:
2443  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2444  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2445  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
2446  const DataLayout &DL) {
2447  auto *VecVTy = cast<FixedVectorType>(VecTy);
2448  return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
2449  ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
2450  DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
2451  };
2452  if (GEP.getNumOperands() == 3 &&
2453  ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
2454  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
2455  (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
2456  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
2457 
2458  // Create a new GEP here, as using `setOperand()` followed by
2459  // `setSourceElementType()` won't actually update the type of the
2460  // existing GEP Value. Causing issues if this Value is accessed when
2461  // constructing an AddrSpaceCastInst
2462  Value *NGEP = GEP.isInBounds()
2463  ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices)
2464  : Builder.CreateGEP(SrcEltType, SrcOp, Indices);
2465  NGEP->takeName(&GEP);
2466 
2467  // Preserve GEP address space to satisfy users
2468  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2469  return new AddrSpaceCastInst(NGEP, GEPType);
2470 
2471  return replaceInstUsesWith(GEP, NGEP);
2472  }
2473 
2474  // See if we can simplify:
2475  // X = bitcast A* to B*
2476  // Y = gep X, <...constant indices...>
2477  // into a gep of the original struct. This is important for SROA and alias
2478  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2479  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
2480  APInt Offset(OffsetBits, 0);
2481 
2482  // If the bitcast argument is an allocation, The bitcast is for convertion
2483  // to actual type of allocation. Removing such bitcasts, results in having
2484  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
2485  // struct or array hierarchy.
2486  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
2487  // a better chance to succeed.
2488  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
2489  !isAllocationFn(SrcOp, &TLI)) {
2490  // If this GEP instruction doesn't move the pointer, just replace the GEP
2491  // with a bitcast of the real input to the dest type.
2492  if (!Offset) {
2493  // If the bitcast is of an allocation, and the allocation will be
2494  // converted to match the type of the cast, don't touch this.
2495  if (isa<AllocaInst>(SrcOp)) {
2496  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2497  if (Instruction *I = visitBitCast(*BCI)) {
2498  if (I != BCI) {
2499  I->takeName(BCI);
2500  BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2501  replaceInstUsesWith(*BCI, I);
2502  }
2503  return &GEP;
2504  }
2505  }
2506 
2507  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2508  return new AddrSpaceCastInst(SrcOp, GEPType);
2509  return new BitCastInst(SrcOp, GEPType);
2510  }
2511 
2512  // Otherwise, if the offset is non-zero, we need to find out if there is a
2513  // field at Offset in 'A's type. If so, we can pull the cast through the
2514  // GEP.
2515  SmallVector<Value*, 8> NewIndices;
2516  if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
2517  Value *NGEP =
2518  GEP.isInBounds()
2519  ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
2520  : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
2521 
2522  if (NGEP->getType() == GEPType)
2523  return replaceInstUsesWith(GEP, NGEP);
2524  NGEP->takeName(&GEP);
2525 
2526  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2527  return new AddrSpaceCastInst(NGEP, GEPType);
2528  return new BitCastInst(NGEP, GEPType);
2529  }
2530  }
2531  }
2532 
2533  if (!GEP.isInBounds()) {
2534  unsigned IdxWidth =
2535  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2536  APInt BasePtrOffset(IdxWidth, 0);
2537  Value *UnderlyingPtrOp =
2539  BasePtrOffset);
2540  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2541  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2542  BasePtrOffset.isNonNegative()) {
2543  APInt AllocSize(
2544  IdxWidth,
2545  DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
2546  if (BasePtrOffset.ule(AllocSize)) {
2548  GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2549  }
2550  }
2551  }
2552  }
2553 
2554  if (Instruction *R = foldSelectGEP(GEP, Builder))
2555  return R;
2556 
2557  return nullptr;
2558 }
2559 
2561  Instruction *AI) {
2562  if (isa<ConstantPointerNull>(V))
2563  return true;
2564  if (auto *LI = dyn_cast<LoadInst>(V))
2565  return isa<GlobalVariable>(LI->getPointerOperand());
2566  // Two distinct allocations will never be equal.
2567  return isAllocLikeFn(V, &TLI) && V != AI;
2568 }
2569 
2570 /// Given a call CB which uses an address UsedV, return true if we can prove the
2571 /// call's only possible effect is storing to V.
2572 static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2573  const TargetLibraryInfo &TLI) {
2574  if (!CB.use_empty())
2575  // TODO: add recursion if returned attribute is present
2576  return false;
2577 
2578  if (CB.isTerminator())
2579  // TODO: remove implementation restriction
2580  return false;
2581 
2582  if (!CB.willReturn() || !CB.doesNotThrow())
2583  return false;
2584 
2585  // If the only possible side effect of the call is writing to the alloca,
2586  // and the result isn't used, we can safely remove any reads implied by the
2587  // call including those which might read the alloca itself.
2589  return Dest && Dest->Ptr == UsedV;
2590 }
2591 
2594  const TargetLibraryInfo &TLI) {
2596  Worklist.push_back(AI);
2597 
2598  do {
2599  Instruction *PI = Worklist.pop_back_val();
2600  for (User *U : PI->users()) {
2601  Instruction *I = cast<Instruction>(U);
2602  switch (I->getOpcode()) {
2603  default:
2604  // Give up the moment we see something we can't handle.
2605  return false;
2606 
2607  case Instruction::AddrSpaceCast:
2608  case Instruction::BitCast:
2609  case Instruction::GetElementPtr:
2610  Users.emplace_back(I);
2611  Worklist.push_back(I);
2612  continue;
2613 
2614  case Instruction::ICmp: {
2615  ICmpInst *ICI = cast<ICmpInst>(I);
2616  // We can fold eq/ne comparisons with null to false/true, respectively.
2617  // We also fold comparisons in some conditions provided the alloc has
2618  // not escaped (see isNeverEqualToUnescapedAlloc).
2619  if (!ICI->isEquality())
2620  return false;
2621  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2622  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2623  return false;
2624  Users.emplace_back(I);
2625  continue;
2626  }
2627 
2628  case Instruction::Call:
2629  // Ignore no-op and store intrinsics.
2630  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2631  switch (II->getIntrinsicID()) {
2632  default:
2633  return false;
2634 
2635  case Intrinsic::memmove:
2636  case Intrinsic::memcpy:
2637  case Intrinsic::memset: {
2638  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2639  if (MI->isVolatile() || MI->getRawDest() != PI)
2640  return false;
2642  }
2643  case Intrinsic::assume:
2644  case Intrinsic::invariant_start:
2645  case Intrinsic::invariant_end:
2646  case Intrinsic::lifetime_start:
2647  case Intrinsic::lifetime_end:
2648  case Intrinsic::objectsize:
2649  Users.emplace_back(I);
2650  continue;
2651  case Intrinsic::launder_invariant_group:
2652  case Intrinsic::strip_invariant_group:
2653  Users.emplace_back(I);
2654  Worklist.push_back(I);
2655  continue;
2656  }
2657  }
2658 
2659  if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2660  Users.emplace_back(I);
2661  continue;
2662  }
2663 
2664  if (isFreeCall(I, &TLI)) {
2665  Users.emplace_back(I);
2666  continue;
2667  }
2668 
2669  if (isReallocLikeFn(I, &TLI)) {
2670  Users.emplace_back(I);
2671  Worklist.push_back(I);
2672  continue;
2673  }
2674 
2675  return false;
2676 
2677  case Instruction::Store: {
2678  StoreInst *SI = cast<StoreInst>(I);
2679  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2680  return false;
2681  Users.emplace_back(I);
2682  continue;
2683  }
2684  }
2685  llvm_unreachable("missing a return?");
2686  }
2687  } while (!Worklist.empty());
2688  return true;
2689 }
2690 
2692  assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI));
2693 
2694  // If we have a malloc call which is only used in any amount of comparisons to
2695  // null and free calls, delete the calls and replace the comparisons with true
2696  // or false as appropriate.
2697 
2698  // This is based on the principle that we can substitute our own allocation
2699  // function (which will never return null) rather than knowledge of the
2700  // specific function being called. In some sense this can change the permitted
2701  // outputs of a program (when we convert a malloc to an alloca, the fact that
2702  // the allocation is now on the stack is potentially visible, for example),
2703  // but we believe in a permissible manner.
2705 
2706  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2707  // before each store.
2709  std::unique_ptr<DIBuilder> DIB;
2710  if (isa<AllocaInst>(MI)) {
2711  findDbgUsers(DVIs, &MI);
2712  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2713  }
2714 
2715  if (isAllocSiteRemovable(&MI, Users, TLI)) {
2716  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2717  // Lowering all @llvm.objectsize calls first because they may
2718  // use a bitcast/GEP of the alloca we are removing.
2719  if (!Users[i])
2720  continue;
2721 
2722  Instruction *I = cast<Instruction>(&*Users[i]);
2723 
2724  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2725  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2726  Value *Result =
2727  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
2728  replaceInstUsesWith(*I, Result);
2729  eraseInstFromFunction(*I);
2730  Users[i] = nullptr; // Skip examining in the next loop.
2731  }
2732  }
2733  }
2734  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2735  if (!Users[i])
2736  continue;
2737 
2738  Instruction *I = cast<Instruction>(&*Users[i]);
2739 
2740  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2741  replaceInstUsesWith(*C,
2742  ConstantInt::get(Type::getInt1Ty(C->getContext()),
2743  C->isFalseWhenEqual()));
2744  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2745  for (auto *DVI : DVIs)
2746  if (DVI->isAddressOfVariable())
2747  ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2748  } else {
2749  // Casts, GEP, or anything else: we're about to delete this instruction,
2750  // so it can not have any valid uses.
2751  replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2752  }
2753  eraseInstFromFunction(*I);
2754  }
2755 
2756  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2757  // Replace invoke with a NOP intrinsic to maintain the original CFG
2758  Module *M = II->getModule();
2759  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2760  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2761  None, "", II->getParent());
2762  }
2763 
2764  // Remove debug intrinsics which describe the value contained within the
2765  // alloca. In addition to removing dbg.{declare,addr} which simply point to
2766  // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2767  //
2768  // ```
2769  // define void @foo(i32 %0) {
2770  // %a = alloca i32 ; Deleted.
2771  // store i32 %0, i32* %a
2772  // dbg.value(i32 %0, "arg0") ; Not deleted.
2773  // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2774  // call void @trivially_inlinable_no_op(i32* %a)
2775  // ret void
2776  // }
2777  // ```
2778  //
2779  // This may not be required if we stop describing the contents of allocas
2780  // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2781  // the LowerDbgDeclare utility.
2782  //
2783  // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2784  // "arg0" dbg.value may be stale after the call. However, failing to remove
2785  // the DW_OP_deref dbg.value causes large gaps in location coverage.
2786  for (auto *DVI : DVIs)
2787  if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2788  DVI->eraseFromParent();
2789 
2790  return eraseInstFromFunction(MI);
2791  }
2792  return nullptr;
2793 }
2794 
2795 /// Move the call to free before a NULL test.
2796 ///
2797 /// Check if this free is accessed after its argument has been test
2798 /// against NULL (property 0).
2799 /// If yes, it is legal to move this call in its predecessor block.
2800 ///
2801 /// The move is performed only if the block containing the call to free
2802 /// will be removed, i.e.:
2803 /// 1. it has only one predecessor P, and P has two successors
2804 /// 2. it contains the call, noops, and an unconditional branch
2805 /// 3. its successor is the same as its predecessor's successor
2806 ///
2807 /// The profitability is out-of concern here and this function should
2808 /// be called only if the caller knows this transformation would be
2809 /// profitable (e.g., for code size).
2811  const DataLayout &DL) {
2812  Value *Op = FI.getArgOperand(0);
2813  BasicBlock *FreeInstrBB = FI.getParent();
2814  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2815 
2816  // Validate part of constraint #1: Only one predecessor
2817  // FIXME: We can extend the number of predecessor, but in that case, we
2818  // would duplicate the call to free in each predecessor and it may
2819  // not be profitable even for code size.
2820  if (!PredBB)
2821  return nullptr;
2822 
2823  // Validate constraint #2: Does this block contains only the call to
2824  // free, noops, and an unconditional branch?
2825  BasicBlock *SuccBB;
2826  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2827  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2828  return nullptr;
2829 
2830  // If there are only 2 instructions in the block, at this point,
2831  // this is the call to free and unconditional.
2832  // If there are more than 2 instructions, check that they are noops
2833  // i.e., they won't hurt the performance of the generated code.
2834  if (FreeInstrBB->size() != 2) {
2835  for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2836  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2837  continue;
2838  auto *Cast = dyn_cast<CastInst>(&Inst);
2839  if (!Cast || !Cast->isNoopCast(DL))
2840  return nullptr;
2841  }
2842  }
2843  // Validate the rest of constraint #1 by matching on the pred branch.
2844  Instruction *TI = PredBB->getTerminator();
2845  BasicBlock *TrueBB, *FalseBB;
2846  ICmpInst::Predicate Pred;
2847  if (!match(TI, m_Br(m_ICmp(Pred,
2849  m_Specific(Op->stripPointerCasts())),
2850  m_Zero()),
2851  TrueBB, FalseBB)))
2852  return nullptr;
2853  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2854  return nullptr;
2855 
2856  // Validate constraint #3: Ensure the null case just falls through.
2857  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2858  return nullptr;
2859  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2860  "Broken CFG: missing edge from predecessor to successor");
2861 
2862  // At this point, we know that everything in FreeInstrBB can be moved
2863  // before TI.
2864  for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
2865  if (&Instr == FreeInstrBBTerminator)
2866  break;
2867  Instr.moveBefore(TI);
2868  }
2869  assert(FreeInstrBB->size() == 1 &&
2870  "Only the branch instruction should remain");
2871 
2872  // Now that we've moved the call to free before the NULL check, we have to
2873  // remove any attributes on its parameter that imply it's non-null, because
2874  // those attributes might have only been valid because of the NULL check, and
2875  // we can get miscompiles if we keep them. This is conservative if non-null is
2876  // also implied by something other than the NULL check, but it's guaranteed to
2877  // be correct, and the conservativeness won't matter in practice, since the
2878  // attributes are irrelevant for the call to free itself and the pointer
2879  // shouldn't be used after the call.
2881  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
2882  Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
2883  if (Dereferenceable.isValid()) {
2884  uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
2885  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
2886  Attribute::Dereferenceable);
2888  }
2889  FI.setAttributes(Attrs);
2890 
2891  return &FI;
2892 }
2893 
2895  Value *Op = FI.getArgOperand(0);
2896 
2897  // free undef -> unreachable.
2898  if (isa<UndefValue>(Op)) {
2899  // Leave a marker since we can't modify the CFG here.
2900  CreateNonTerminatorUnreachable(&FI);
2901  return eraseInstFromFunction(FI);
2902  }
2903 
2904  // If we have 'free null' delete the instruction. This can happen in stl code
2905  // when lots of inlining happens.
2906  if (isa<ConstantPointerNull>(Op))
2907  return eraseInstFromFunction(FI);
2908 
2909  // If we had free(realloc(...)) with no intervening uses, then eliminate the
2910  // realloc() entirely.
2911  if (CallInst *CI = dyn_cast<CallInst>(Op)) {
2912  if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) {
2913  return eraseInstFromFunction(
2914  *replaceInstUsesWith(*CI, CI->getOperand(0)));
2915  }
2916  }
2917 
2918  // If we optimize for code size, try to move the call to free before the null
2919  // test so that simplify cfg can remove the empty block and dead code
2920  // elimination the branch. I.e., helps to turn something like:
2921  // if (foo) free(foo);
2922  // into
2923  // free(foo);
2924  //
2925  // Note that we can only do this for 'free' and not for any flavor of
2926  // 'operator delete'; there is no 'operator delete' symbol for which we are
2927  // permitted to invent a call, even if we're passing in a null pointer.
2928  if (MinimizeSize) {
2929  LibFunc Func;
2930  if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
2932  return I;
2933  }
2934 
2935  return nullptr;
2936 }
2937 
2938 static bool isMustTailCall(Value *V) {
2939  if (auto *CI = dyn_cast<CallInst>(V))
2940  return CI->isMustTailCall();
2941  return false;
2942 }
2943 
2945  if (RI.getNumOperands() == 0) // ret void
2946  return nullptr;
2947 
2948  Value *ResultOp = RI.getOperand(0);
2949  Type *VTy = ResultOp->getType();
2950  if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
2951  return nullptr;
2952 
2953  // Don't replace result of musttail calls.
2954  if (isMustTailCall(ResultOp))
2955  return nullptr;
2956 
2957  // There might be assume intrinsics dominating this return that completely
2958  // determine the value. If so, constant fold it.
2959  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
2960  if (Known.isConstant())
2961  return replaceOperand(RI, 0,
2962  Constant::getIntegerValue(VTy, Known.getConstant()));
2963 
2964  return nullptr;
2965 }
2966 
2967 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
2969  // Try to remove the previous instruction if it must lead to unreachable.
2970  // This includes instructions like stores and "llvm.assume" that may not get
2971  // removed by simple dead code elimination.
2972  while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
2973  // While we theoretically can erase EH, that would result in a block that
2974  // used to start with an EH no longer starting with EH, which is invalid.
2975  // To make it valid, we'd need to fixup predecessors to no longer refer to
2976  // this block, but that changes CFG, which is not allowed in InstCombine.
2977  if (Prev->isEHPad())
2978  return nullptr; // Can not drop any more instructions. We're done here.
2979 
2981  return nullptr; // Can not drop any more instructions. We're done here.
2982  // Otherwise, this instruction can be freely erased,
2983  // even if it is not side-effect free.
2984 
2985  // A value may still have uses before we process it here (for example, in
2986  // another unreachable block), so convert those to poison.
2987  replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
2988  eraseInstFromFunction(*Prev);
2989  }
2990  assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
2991  // FIXME: recurse into unconditional predecessors?
2992  return nullptr;
2993 }
2994 
2996  assert(BI.isUnconditional() && "Only for unconditional branches.");
2997 
2998  // If this store is the second-to-last instruction in the basic block
2999  // (excluding debug info and bitcasts of pointers) and if the block ends with
3000  // an unconditional branch, try to move the store to the successor block.
3001 
3002  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3003  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3004  return BBI->isDebugOrPseudoInst() ||
3005  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3006  };
3007 
3008  BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3009  do {
3010  if (BBI != FirstInstr)
3011  --BBI;
3012  } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3013 
3014  return dyn_cast<StoreInst>(BBI);
3015  };
3016 
3017  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3018  if (mergeStoreIntoSuccessor(*SI))
3019  return &BI;
3020 
3021  return nullptr;
3022 }
3023 
3025  if (BI.isUnconditional())
3026  return visitUnconditionalBranchInst(BI);
3027 
3028  // Change br (not X), label True, label False to: br X, label False, True
3029  Value *X = nullptr;
3030  if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
3031  !isa<Constant>(X)) {
3032  // Swap Destinations and condition...
3033  BI.swapSuccessors();
3034  return replaceOperand(BI, 0, X);
3035  }
3036 
3037  // If the condition is irrelevant, remove the use so that other
3038  // transforms on the condition become more effective.
3039  if (!isa<ConstantInt>(BI.getCondition()) &&
3040  BI.getSuccessor(0) == BI.getSuccessor(1))
3041  return replaceOperand(
3042  BI, 0, ConstantInt::getFalse(BI.getCondition()->getType()));
3043 
3044  // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3045  CmpInst::Predicate Pred;
3046  if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())),
3047  m_BasicBlock(), m_BasicBlock())) &&
3048  !isCanonicalPredicate(Pred)) {
3049  // Swap destinations and condition.
3050  CmpInst *Cond = cast<CmpInst>(BI.getCondition());
3051  Cond->setPredicate(CmpInst::getInversePredicate(Pred));
3052  BI.swapSuccessors();
3053  Worklist.push(Cond);
3054  return &BI;
3055  }
3056 
3057  return nullptr;
3058 }
3059 
3061  Value *Cond = SI.getCondition();
3062  Value *Op0;
3063  ConstantInt *AddRHS;
3064  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3065  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3066  for (auto Case : SI.cases()) {
3067  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3068  assert(isa<ConstantInt>(NewCase) &&
3069  "Result of expression should be constant");
3070  Case.setValue(cast<ConstantInt>(NewCase));
3071  }
3072  return replaceOperand(SI, 0, Op0);
3073  }
3074 
3075  KnownBits Known = computeKnownBits(Cond, 0, &SI);
3076  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3077  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3078 
3079  // Compute the number of leading bits we can ignore.
3080  // TODO: A better way to determine this would use ComputeNumSignBits().
3081  for (auto &C : SI.cases()) {
3082  LeadingKnownZeros = std::min(
3083  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
3084  LeadingKnownOnes = std::min(
3085  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
3086  }
3087 
3088  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3089 
3090  // Shrink the condition operand if the new type is smaller than the old type.
3091  // But do not shrink to a non-standard type, because backend can't generate
3092  // good code for that yet.
3093  // TODO: We can make it aggressive again after fixing PR39569.
3094  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3095  shouldChangeType(Known.getBitWidth(), NewWidth)) {
3096  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3097  Builder.SetInsertPoint(&SI);
3098  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3099 
3100  for (auto Case : SI.cases()) {
3101  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3102  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3103  }
3104  return replaceOperand(SI, 0, NewCond);
3105  }
3106 
3107  return nullptr;
3108 }
3109 
3111  Value *Agg = EV.getAggregateOperand();
3112 
3113  if (!EV.hasIndices())
3114  return replaceInstUsesWith(EV, Agg);
3115 
3116  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
3117  SQ.getWithInstruction(&EV)))
3118  return replaceInstUsesWith(EV, V);
3119 
3120  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3121  // We're extracting from an insertvalue instruction, compare the indices
3122  const unsigned *exti, *exte, *insi, *inse;
3123  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3124  exte = EV.idx_end(), inse = IV->idx_end();
3125  exti != exte && insi != inse;
3126  ++exti, ++insi) {
3127  if (*insi != *exti)
3128  // The insert and extract both reference distinctly different elements.
3129  // This means the extract is not influenced by the insert, and we can
3130  // replace the aggregate operand of the extract with the aggregate
3131  // operand of the insert. i.e., replace
3132  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3133  // %E = extractvalue { i32, { i32 } } %I, 0
3134  // with
3135  // %E = extractvalue { i32, { i32 } } %A, 0
3136  return ExtractValueInst::Create(IV->getAggregateOperand(),
3137  EV.getIndices());
3138  }
3139  if (exti == exte && insi == inse)
3140  // Both iterators are at the end: Index lists are identical. Replace
3141  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3142  // %C = extractvalue { i32, { i32 } } %B, 1, 0
3143  // with "i32 42"
3144  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3145  if (exti == exte) {
3146  // The extract list is a prefix of the insert list. i.e. replace
3147  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3148  // %E = extractvalue { i32, { i32 } } %I, 1
3149  // with
3150  // %X = extractvalue { i32, { i32 } } %A, 1
3151  // %E = insertvalue { i32 } %X, i32 42, 0
3152  // by switching the order of the insert and extract (though the
3153  // insertvalue should be left in, since it may have other uses).
3154  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3155  EV.getIndices());
3156  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3157  makeArrayRef(insi, inse));
3158  }
3159  if (insi == inse)
3160  // The insert list is a prefix of the extract list
3161  // We can simply remove the common indices from the extract and make it
3162  // operate on the inserted value instead of the insertvalue result.
3163  // i.e., replace
3164  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3165  // %E = extractvalue { i32, { i32 } } %I, 1, 0
3166  // with
3167  // %E extractvalue { i32 } { i32 42 }, 0
3168  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3169  makeArrayRef(exti, exte));
3170  }
3171  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
3172  // We're extracting from an overflow intrinsic, see if we're the only user,
3173  // which allows us to simplify multiple result intrinsics to simpler
3174  // things that just get one value.
3175  if (WO->hasOneUse()) {
3176  // Check if we're grabbing only the result of a 'with overflow' intrinsic
3177  // and replace it with a traditional binary instruction.
3178  if (*EV.idx_begin() == 0) {
3179  Instruction::BinaryOps BinOp = WO->getBinaryOp();
3180  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3181  // Replace the old instruction's uses with poison.
3182  replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3183  eraseInstFromFunction(*WO);
3184  return BinaryOperator::Create(BinOp, LHS, RHS);
3185  }
3186 
3187  assert(*EV.idx_begin() == 1 &&
3188  "unexpected extract index for overflow inst");
3189 
3190  // If only the overflow result is used, and the right hand side is a
3191  // constant (or constant splat), we can remove the intrinsic by directly
3192  // checking for overflow.
3193  const APInt *C;
3194  if (match(WO->getRHS(), m_APInt(C))) {
3195  // Compute the no-wrap range for LHS given RHS=C, then construct an
3196  // equivalent icmp, potentially using an offset.
3197  ConstantRange NWR =
3198  ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
3199  WO->getNoWrapKind());
3200 
3201  CmpInst::Predicate Pred;
3202  APInt NewRHSC, Offset;
3203  NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3204  auto *OpTy = WO->getRHS()->getType();
3205  auto *NewLHS = WO->getLHS();
3206  if (Offset != 0)
3207  NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3208  return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3209  ConstantInt::get(OpTy, NewRHSC));
3210  }
3211  }
3212  }
3213  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
3214  // If the (non-volatile) load only has one use, we can rewrite this to a
3215  // load from a GEP. This reduces the size of the load. If a load is used
3216  // only by extractvalue instructions then this either must have been
3217  // optimized before, or it is a struct with padding, in which case we
3218  // don't want to do the transformation as it loses padding knowledge.
3219  if (L->isSimple() && L->hasOneUse()) {
3220  // extractvalue has integer indices, getelementptr has Value*s. Convert.
3221  SmallVector<Value*, 4> Indices;
3222  // Prefix an i32 0 since we need the first element.
3223  Indices.push_back(Builder.getInt32(0));
3224  for (unsigned Idx : EV.indices())
3225  Indices.push_back(Builder.getInt32(Idx));
3226 
3227  // We need to insert these at the location of the old load, not at that of
3228  // the extractvalue.
3229  Builder.SetInsertPoint(L);
3230  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3231  L->getPointerOperand(), Indices);
3232  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
3233  // Whatever aliasing information we had for the orignal load must also
3234  // hold for the smaller load, so propagate the annotations.
3235  NL->setAAMetadata(L->getAAMetadata());
3236  // Returning the load directly will cause the main loop to insert it in
3237  // the wrong spot, so use replaceInstUsesWith().
3238  return replaceInstUsesWith(EV, NL);
3239  }
3240  // We could simplify extracts from other values. Note that nested extracts may
3241  // already be simplified implicitly by the above: extract (extract (insert) )
3242  // will be translated into extract ( insert ( extract ) ) first and then just
3243  // the value inserted, if appropriate. Similarly for extracts from single-use
3244  // loads: extract (extract (load)) will be translated to extract (load (gep))
3245  // and if again single-use then via load (gep (gep)) to load (gep).
3246  // However, double extracts from e.g. function arguments or return values
3247  // aren't handled yet.
3248  return nullptr;
3249 }
3250 
3251 /// Return 'true' if the given typeinfo will match anything.
3252 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3253  switch (Personality) {
3254  case EHPersonality::GNU_C:
3256  case EHPersonality::Rust:
3257  // The GCC C EH and Rust personality only exists to support cleanups, so
3258  // it's not clear what the semantics of catch clauses are.
3259  return false;
3261  return false;
3263  // While __gnat_all_others_value will match any Ada exception, it doesn't
3264  // match foreign exceptions (or didn't, before gcc-4.7).
3265  return false;
3274  case EHPersonality::XL_CXX:
3275  return TypeInfo->isNullValue();
3276  }
3277  llvm_unreachable("invalid enum");
3278 }
3279 
3280 static bool shorter_filter(const Value *LHS, const Value *RHS) {
3281  return
3282  cast<ArrayType>(LHS->getType())->getNumElements()
3283  <
3284  cast<ArrayType>(RHS->getType())->getNumElements();
3285 }
3286 
3288  // The logic here should be correct for any real-world personality function.
3289  // However if that turns out not to be true, the offending logic can always
3290  // be conditioned on the personality function, like the catch-all logic is.
3291  EHPersonality Personality =
3293 
3294  // Simplify the list of clauses, eg by removing repeated catch clauses
3295  // (these are often created by inlining).
3296  bool MakeNewInstruction = false; // If true, recreate using the following:
3297  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3298  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3299 
3300  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3301  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3302  bool isLastClause = i + 1 == e;
3303  if (LI.isCatch(i)) {
3304  // A catch clause.
3305  Constant *CatchClause = LI.getClause(i);
3306  Constant *TypeInfo = CatchClause->stripPointerCasts();
3307 
3308  // If we already saw this clause, there is no point in having a second
3309  // copy of it.
3310  if (AlreadyCaught.insert(TypeInfo).second) {
3311  // This catch clause was not already seen.
3312  NewClauses.push_back(CatchClause);
3313  } else {
3314  // Repeated catch clause - drop the redundant copy.
3315  MakeNewInstruction = true;
3316  }
3317 
3318  // If this is a catch-all then there is no point in keeping any following
3319  // clauses or marking the landingpad as having a cleanup.
3320  if (isCatchAll(Personality, TypeInfo)) {
3321  if (!isLastClause)
3322  MakeNewInstruction = true;
3323  CleanupFlag = false;
3324  break;
3325  }
3326  } else {
3327  // A filter clause. If any of the filter elements were already caught
3328  // then they can be dropped from the filter. It is tempting to try to
3329  // exploit the filter further by saying that any typeinfo that does not
3330  // occur in the filter can't be caught later (and thus can be dropped).
3331  // However this would be wrong, since typeinfos can match without being
3332  // equal (for example if one represents a C++ class, and the other some
3333  // class derived from it).
3334  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3335  Constant *FilterClause = LI.getClause(i);
3336  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3337  unsigned NumTypeInfos = FilterType->getNumElements();
3338 
3339  // An empty filter catches everything, so there is no point in keeping any
3340  // following clauses or marking the landingpad as having a cleanup. By
3341  // dealing with this case here the following code is made a bit simpler.
3342  if (!NumTypeInfos) {
3343  NewClauses.push_back(FilterClause);
3344  if (!isLastClause)
3345  MakeNewInstruction = true;
3346  CleanupFlag = false;
3347  break;
3348  }
3349 
3350  bool MakeNewFilter = false; // If true, make a new filter.
3351  SmallVector<Constant *, 16> NewFilterElts; // New elements.
3352  if (isa<ConstantAggregateZero>(FilterClause)) {
3353  // Not an empty filter - it contains at least one null typeinfo.
3354  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3355  Constant *TypeInfo =
3356  Constant::getNullValue(FilterType->getElementType());
3357  // If this typeinfo is a catch-all then the filter can never match.
3358  if (isCatchAll(Personality, TypeInfo)) {
3359  // Throw the filter away.
3360  MakeNewInstruction = true;
3361  continue;
3362  }
3363 
3364  // There is no point in having multiple copies of this typeinfo, so
3365  // discard all but the first copy if there is more than one.
3366  NewFilterElts.push_back(TypeInfo);
3367  if (NumTypeInfos > 1)
3368  MakeNewFilter = true;
3369  } else {
3370  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3371  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3372  NewFilterElts.reserve(NumTypeInfos);
3373 
3374  // Remove any filter elements that were already caught or that already
3375  // occurred in the filter. While there, see if any of the elements are
3376  // catch-alls. If so, the filter can be discarded.
3377  bool SawCatchAll = false;
3378  for (unsigned j = 0; j != NumTypeInfos; ++j) {
3379  Constant *Elt = Filter->getOperand(j);
3380  Constant *TypeInfo = Elt->stripPointerCasts();
3381  if (isCatchAll(Personality, TypeInfo)) {
3382  // This element is a catch-all. Bail out, noting this fact.
3383  SawCatchAll = true;
3384  break;
3385  }
3386 
3387  // Even if we've seen a type in a catch clause, we don't want to
3388  // remove it from the filter. An unexpected type handler may be
3389  // set up for a call site which throws an exception of the same
3390  // type caught. In order for the exception thrown by the unexpected
3391  // handler to propagate correctly, the filter must be correctly
3392  // described for the call site.
3393  //
3394  // Example:
3395  //
3396  // void unexpected() { throw 1;}
3397  // void foo() throw (int) {
3398  // std::set_unexpected(unexpected);
3399  // try {
3400  // throw 2.0;
3401  // } catch (int i) {}
3402  // }
3403 
3404  // There is no point in having multiple copies of the same typeinfo in
3405  // a filter, so only add it if we didn't already.
3406  if (SeenInFilter.insert(TypeInfo).second)
3407  NewFilterElts.push_back(cast<Constant>(Elt));
3408  }
3409  // A filter containing a catch-all cannot match anything by definition.
3410  if (SawCatchAll) {
3411  // Throw the filter away.
3412  MakeNewInstruction = true;
3413  continue;
3414  }
3415 
3416  // If we dropped something from the filter, make a new one.
3417  if (NewFilterElts.size() < NumTypeInfos)
3418  MakeNewFilter = true;
3419  }
3420  if (MakeNewFilter) {
3421  FilterType = ArrayType::get(FilterType->getElementType(),
3422  NewFilterElts.size());
3423  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3424  MakeNewInstruction = true;
3425  }
3426 
3427  NewClauses.push_back(FilterClause);
3428 
3429  // If the new filter is empty then it will catch everything so there is
3430  // no point in keeping any following clauses or marking the landingpad
3431  // as having a cleanup. The case of the original filter being empty was
3432  // already handled above.
3433  if (MakeNewFilter && !NewFilterElts.size()) {
3434  assert(MakeNewInstruction && "New filter but not a new instruction!");
3435  CleanupFlag = false;
3436  break;
3437  }
3438  }
3439  }
3440 
3441  // If several filters occur in a row then reorder them so that the shortest
3442  // filters come first (those with the smallest number of elements). This is
3443  // advantageous because shorter filters are more likely to match, speeding up
3444  // unwinding, but mostly because it increases the effectiveness of the other
3445  // filter optimizations below.
3446  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3447  unsigned j;
3448  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3449  for (j = i; j != e; ++j)
3450  if (!isa<ArrayType>(NewClauses[j]->getType()))
3451  break;
3452 
3453  // Check whether the filters are already sorted by length. We need to know
3454  // if sorting them is actually going to do anything so that we only make a
3455  // new landingpad instruction if it does.
3456  for (unsigned k = i; k + 1 < j; ++k)
3457  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3458  // Not sorted, so sort the filters now. Doing an unstable sort would be
3459  // correct too but reordering filters pointlessly might confuse users.
3460  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3461  shorter_filter);
3462  MakeNewInstruction = true;
3463  break;
3464  }
3465 
3466  // Look for the next batch of filters.
3467  i = j + 1;
3468  }
3469 
3470  // If typeinfos matched if and only if equal, then the elements of a filter L
3471  // that occurs later than a filter F could be replaced by the intersection of
3472  // the elements of F and L. In reality two typeinfos can match without being
3473  // equal (for example if one represents a C++ class, and the other some class
3474  // derived from it) so it would be wrong to perform this transform in general.
3475  // However the transform is correct and useful if F is a subset of L. In that
3476  // case L can be replaced by F, and thus removed altogether since repeating a
3477  // filter is pointless. So here we look at all pairs of filters F and L where
3478  // L follows F in the list of clauses, and remove L if every element of F is
3479  // an element of L. This can occur when inlining C++ functions with exception
3480  // specifications.
3481  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3482  // Examine each filter in turn.
3483  Value *Filter = NewClauses[i];
3484  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3485  if (!FTy)
3486  // Not a filter - skip it.
3487  continue;
3488  unsigned FElts = FTy->getNumElements();
3489  // Examine each filter following this one. Doing this backwards means that
3490  // we don't have to worry about filters disappearing under us when removed.
3491  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3492  Value *LFilter = NewClauses[j];
3493  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3494  if (!LTy)
3495  // Not a filter - skip it.
3496  continue;
3497  // If Filter is a subset of LFilter, i.e. every element of Filter is also
3498  // an element of LFilter, then discard LFilter.
3499  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3500  // If Filter is empty then it is a subset of LFilter.
3501  if (!FElts) {
3502  // Discard LFilter.
3503  NewClauses.erase(J);
3504  MakeNewInstruction = true;
3505  // Move on to the next filter.
3506  continue;
3507  }
3508  unsigned LElts = LTy->getNumElements();
3509  // If Filter is longer than LFilter then it cannot be a subset of it.
3510  if (FElts > LElts)
3511  // Move on to the next filter.
3512  continue;
3513  // At this point we know that LFilter has at least one element.
3514  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3515  // Filter is a subset of LFilter iff Filter contains only zeros (as we
3516  // already know that Filter is not longer than LFilter).
3517  if (isa<ConstantAggregateZero>(Filter)) {
3518  assert(FElts <= LElts && "Should have handled this case earlier!");
3519  // Discard LFilter.
3520  NewClauses.erase(J);
3521  MakeNewInstruction = true;
3522  }
3523  // Move on to the next filter.
3524  continue;
3525  }
3526  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3527  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3528  // Since Filter is non-empty and contains only zeros, it is a subset of
3529  // LFilter iff LFilter contains a zero.
3530  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3531  for (unsigned l = 0; l != LElts; ++l)
3532  if (LArray->getOperand(l)->isNullValue()) {
3533  // LFilter contains a zero - discard it.
3534  NewClauses.erase(J);
3535  MakeNewInstruction = true;
3536  break;
3537  }
3538  // Move on to the next filter.
3539  continue;
3540  }
3541  // At this point we know that both filters are ConstantArrays. Loop over
3542  // operands to see whether every element of Filter is also an element of
3543  // LFilter. Since filters tend to be short this is probably faster than
3544  // using a method that scales nicely.
3545  ConstantArray *FArray = cast<ConstantArray>(Filter);
3546  bool AllFound = true;
3547  for (unsigned f = 0; f != FElts; ++f) {
3548  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3549  AllFound = false;
3550  for (unsigned l = 0; l != LElts; ++l) {
3551  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3552  if (LTypeInfo == FTypeInfo) {
3553  AllFound = true;
3554  break;
3555  }
3556  }
3557  if (!AllFound)
3558  break;
3559  }
3560  if (AllFound) {
3561  // Discard LFilter.
3562  NewClauses.erase(J);
3563  MakeNewInstruction = true;
3564  }
3565  // Move on to the next filter.
3566  }
3567  }
3568 
3569  // If we changed any of the clauses, replace the old landingpad instruction
3570  // with a new one.
3571  if (MakeNewInstruction) {
3573  NewClauses.size());
3574  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3575  NLI->addClause(NewClauses[i]);
3576  // A landing pad with no clauses must have the cleanup flag set. It is
3577  // theoretically possible, though highly unlikely, that we eliminated all
3578  // clauses. If so, force the cleanup flag to true.
3579  if (NewClauses.empty())
3580  CleanupFlag = true;
3581  NLI->setCleanup(CleanupFlag);
3582  return NLI;
3583  }
3584 
3585  // Even if none of the clauses changed, we may nonetheless have understood
3586  // that the cleanup flag is pointless. Clear it if so.
3587  if (LI.isCleanup() != CleanupFlag) {
3588  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3589  LI.setCleanup(CleanupFlag);
3590  return &LI;
3591  }
3592 
3593  return nullptr;
3594 }
3595 
3596 Value *
3598  // Try to push freeze through instructions that propagate but don't produce
3599  // poison as far as possible. If an operand of freeze follows three
3600  // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3601  // guaranteed-non-poison operands then push the freeze through to the one
3602  // operand that is not guaranteed non-poison. The actual transform is as
3603  // follows.
3604  // Op1 = ... ; Op1 can be posion
3605  // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3606  // ; single guaranteed-non-poison operands
3607  // ... = Freeze(Op0)
3608  // =>
3609  // Op1 = ...
3610  // Op1.fr = Freeze(Op1)
3611  // ... = Inst(Op1.fr, NonPoisonOps...)
3612  auto *OrigOp = OrigFI.getOperand(0);
3613  auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3614 
3615  // While we could change the other users of OrigOp to use freeze(OrigOp), that
3616  // potentially reduces their optimization potential, so let's only do this iff
3617  // the OrigOp is only used by the freeze.
3618  if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3619  return nullptr;
3620 
3621  // We can't push the freeze through an instruction which can itself create
3622  // poison. If the only source of new poison is flags, we can simply
3623  // strip them (since we know the only use is the freeze and nothing can
3624  // benefit from them.)
3625  if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
3626  return nullptr;
3627 
3628  // If operand is guaranteed not to be poison, there is no need to add freeze
3629  // to the operand. So we first find the operand that is not guaranteed to be
3630  // poison.
3631  Use *MaybePoisonOperand = nullptr;
3632  for (Use &U : OrigOpInst->operands()) {
3633  if (isGuaranteedNotToBeUndefOrPoison(U.get()))
3634  continue;
3635  if (!MaybePoisonOperand)
3636  MaybePoisonOperand = &U;
3637  else
3638  return nullptr;
3639  }
3640 
3641  OrigOpInst->dropPoisonGeneratingFlags();
3642 
3643  // If all operands are guaranteed to be non-poison, we can drop freeze.
3644  if (!MaybePoisonOperand)
3645  return OrigOp;
3646 
3647  auto *FrozenMaybePoisonOperand = new FreezeInst(
3648  MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3649 
3650  replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3651  FrozenMaybePoisonOperand->insertBefore(OrigOpInst);
3652  return OrigOp;
3653 }
3654 
3656  Value *Op = FI.getOperand(0);
3657 
3658  if (isa<Constant>(Op))
3659  return false;
3660 
3661  bool Changed = false;
3662  Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3663  bool Dominates = DT.dominates(&FI, U);
3664  Changed |= Dominates;
3665  return Dominates;
3666  });
3667 
3668  return Changed;
3669 }
3670 
3672  Value *Op0 = I.getOperand(0);
3673 
3674  if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
3675  return replaceInstUsesWith(I, V);
3676 
3677  // freeze (phi const, x) --> phi const, (freeze x)
3678  if (auto *PN = dyn_cast<PHINode>(Op0)) {
3679  if (Instruction *NV = foldOpIntoPhi(I, PN))
3680  return NV;
3681  }
3682 
3683  if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
3684  return replaceInstUsesWith(I, NI);
3685 
3686  if (match(Op0, m_Undef())) {
3687  // If I is freeze(undef), see its uses and fold it to the best constant.
3688  // - or: pick -1
3689  // - select's condition: pick the value that leads to choosing a constant
3690  // - other ops: pick 0
3691  Constant *BestValue = nullptr;
3692  Constant *NullValue = Constant::getNullValue(I.getType());
3693  for (const auto *U : I.users()) {
3694  Constant *C = NullValue;
3695 
3696  if (match(U, m_Or(m_Value(), m_Value())))
3697  C = Constant::getAllOnesValue(I.getType());
3698  else if (const auto *SI = dyn_cast<SelectInst>(U)) {
3699  if (SI->getCondition() == &I) {
3700  APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1);
3701  C = Constant::getIntegerValue(I.getType(), CondVal);
3702  }
3703  }
3704 
3705  if (!BestValue)
3706  BestValue = C;
3707  else if (BestValue != C)
3708  BestValue = NullValue;
3709  }
3710 
3711  return replaceInstUsesWith(I, BestValue);
3712  }
3713 
3714  // Replace all dominated uses of Op to freeze(Op).
3715  if (freezeDominatedUses(I))
3716  return &I;
3717 
3718  return nullptr;
3719 }
3720 
3721 /// Check for case where the call writes to an otherwise dead alloca. This
3722 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
3723 /// helper *only* analyzes the write; doesn't check any other legality aspect.
3725  auto *CB = dyn_cast<CallBase>(I);
3726  if (!CB)
3727  // TODO: handle e.g. store to alloca here - only worth doing if we extend
3728  // to allow reload along used path as described below. Otherwise, this
3729  // is simply a store to a dead allocation which will be removed.
3730  return false;
3732  if (!Dest)
3733  return false;
3734  auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
3735  if (!AI)
3736  // TODO: allow malloc?
3737  return false;
3738  // TODO: allow memory access dominated by move point? Note that since AI
3739  // could have a reference to itself captured by the call, we would need to
3740  // account for cycles in doing so.
3741  SmallVector<const User *> AllocaUsers;
3743  auto pushUsers = [&](const Instruction &I) {
3744  for (const User *U : I.users()) {
3745  if (Visited.insert(U).second)
3746  AllocaUsers.push_back(U);
3747  }
3748  };
3749  pushUsers(*AI);
3750  while (!AllocaUsers.empty()) {
3751  auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
3752  if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
3753  isa<AddrSpaceCastInst>(UserI)) {
3754  pushUsers(*UserI);
3755  continue;
3756  }
3757  if (UserI == CB)
3758  continue;
3759  // TODO: support lifetime.start/end here
3760  return false;
3761  }
3762  return true;
3763 }
3764 
3765 /// Try to move the specified instruction from its current block into the
3766 /// beginning of DestBlock, which can only happen if it's safe to move the
3767 /// instruction past all of the instructions between it and the end of its
3768 /// block.
3770  TargetLibraryInfo &TLI) {
3771  assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
3772  BasicBlock *SrcBlock = I->getParent();
3773 
3774  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3775  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
3776  I->isTerminator())
3777  return false;
3778 
3779  // Do not sink static or dynamic alloca instructions. Static allocas must
3780  // remain in the entry block, and dynamic allocas must not be sunk in between
3781  // a stacksave / stackrestore pair, which would incorrectly shorten its
3782  // lifetime.
3783  if (isa<AllocaInst>(I))
3784  return false;
3785 
3786  // Do not sink into catchswitch blocks.
3787  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3788  return false;
3789 
3790  // Do not sink convergent call instructions.
3791  if (auto *CI = dyn_cast<CallInst>(I)) {
3792  if (CI->isConvergent())
3793  return false;
3794  }
3795 
3796  // Unless we can prove that the memory write isn't visibile except on the
3797  // path we're sinking to, we must bail.
3798  if (I->mayWriteToMemory()) {
3799  if (!SoleWriteToDeadLocal(I, TLI))
3800  return false;
3801  }
3802 
3803  // We can only sink load instructions if there is nothing between the load and
3804  // the end of block that could change the value.
3805  if (I->mayReadFromMemory()) {
3806  // We don't want to do any sophisticated alias analysis, so we only check
3807  // the instructions after I in I's parent block if we try to sink to its
3808  // successor block.
3809  if (DestBlock->getUniquePredecessor() != I->getParent())
3810  return false;
3811  for (BasicBlock::iterator Scan = std::next(I->getIterator()),
3812  E = I->getParent()->end();
3813  Scan != E; ++Scan)
3814  if (Scan->mayWriteToMemory())
3815  return false;
3816  }
3817 
3818  I->dropDroppableUses([DestBlock](const Use *U) {
3819  if (auto *I = dyn_cast<Instruction>(U->getUser()))
3820  return I->getParent() != DestBlock;
3821  return true;
3822  });
3823  /// FIXME: We could remove droppable uses that are not dominated by
3824  /// the new position.
3825 
3826  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3827  I->moveBefore(&*InsertPos);
3828  ++NumSunkInst;
3829 
3830  // Also sink all related debug uses from the source basic block. Otherwise we
3831  // get debug use before the def. Attempt to salvage debug uses first, to
3832  // maximise the range variables have location for. If we cannot salvage, then
3833  // mark the location undef: we know it was supposed to receive a new location
3834  // here, but that computation has been sunk.
3836  findDbgUsers(DbgUsers, I);
3837  // Process the sinking DbgUsers in reverse order, as we only want to clone the
3838  // last appearing debug intrinsic for each given variable.
3840  for (DbgVariableIntrinsic *DVI : DbgUsers)
3841  if (DVI->getParent() == SrcBlock)
3842  DbgUsersToSink.push_back(DVI);
3843  llvm::sort(DbgUsersToSink,
3844  [](auto *A, auto *B) { return B->comesBefore(A); });
3845 
3847  SmallSet<DebugVariable, 4> SunkVariables;
3848  for (auto User : DbgUsersToSink) {
3849  // A dbg.declare instruction should not be cloned, since there can only be
3850  // one per variable fragment. It should be left in the original place
3851  // because the sunk instruction is not an alloca (otherwise we could not be
3852  // here).
3853  if (isa<DbgDeclareInst>(User))
3854  continue;
3855 
3856  DebugVariable DbgUserVariable =
3857  DebugVariable(User->getVariable(), User->getExpression(),
3858  User->getDebugLoc()->getInlinedAt());
3859 
3860  if (!SunkVariables.insert(DbgUserVariable).second)
3861  continue;
3862 
3863  DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
3864  if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
3865  DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
3866  LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
3867  }
3868 
3869  // Perform salvaging without the clones, then sink the clones.
3870  if (!DIIClones.empty()) {
3871  salvageDebugInfoForDbgValues(*I, DbgUsers);
3872  // The clones are in reverse order of original appearance, reverse again to
3873  // maintain the original order.
3874  for (auto &DIIClone : llvm::reverse(DIIClones)) {
3875  DIIClone->insertBefore(&*InsertPos);
3876  LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
3877  }
3878  }
3879 
3880  return true;
3881 }
3882 
3884  while (!Worklist.isEmpty()) {
3885  // Walk deferred instructions in reverse order, and push them to the
3886  // worklist, which means they'll end up popped from the worklist in-order.
3887  while (Instruction *I = Worklist.popDeferred()) {
3888  // Check to see if we can DCE the instruction. We do this already here to
3889  // reduce the number of uses and thus allow other folds to trigger.
3890  // Note that eraseInstFromFunction() may push additional instructions on
3891  // the deferred worklist, so this will DCE whole instruction chains.
3892  if (isInstructionTriviallyDead(I, &TLI)) {
3893  eraseInstFromFunction(*I);
3894  ++NumDeadInst;
3895  continue;
3896  }
3897 
3898  Worklist.push(I);
3899  }
3900 
3901  Instruction *I = Worklist.removeOne();
3902  if (I == nullptr) continue; // skip null values.
3903 
3904  // Check to see if we can DCE the instruction.
3905  if (isInstructionTriviallyDead(I, &TLI)) {
3906  eraseInstFromFunction(*I);
3907  ++NumDeadInst;
3908  continue;
3909  }
3910 
3911  if (!DebugCounter::shouldExecute(VisitCounter))
3912  continue;
3913 
3914  // Instruction isn't dead, see if we can constant propagate it.
3915  if (!I->use_empty() &&
3916  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
3917  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
3918  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
3919  << '\n');
3920 
3921  // Add operands to the worklist.
3922  replaceInstUsesWith(*I, C);
3923  ++NumConstProp;
3924  if (isInstructionTriviallyDead(I, &TLI))
3925  eraseInstFromFunction(*I);
3926  MadeIRChange = true;
3927  continue;
3928  }
3929  }
3930 
3931  // See if we can trivially sink this instruction to its user if we can
3932  // prove that the successor is not executed more frequently than our block.
3933  // Return the UserBlock if successful.
3934  auto getOptionalSinkBlockForInst =
3935  [this](Instruction *I) -> Optional<BasicBlock *> {
3936  if (!EnableCodeSinking)
3937  return None;
3938  auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
3939  if (!UserInst)
3940  return None;
3941 
3942  BasicBlock *BB = I->getParent();
3943  BasicBlock *UserParent = nullptr;
3944 
3945  // Special handling for Phi nodes - get the block the use occurs in.
3946  if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
3947  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
3948  if (PN->getIncomingValue(i) == I) {
3949  // Bail out if we have uses in different blocks. We don't do any
3950  // sophisticated analysis (i.e finding NearestCommonDominator of these
3951  // use blocks).
3952  if (UserParent && UserParent != PN->getIncomingBlock(i))
3953  return None;
3954  UserParent = PN->getIncomingBlock(i);
3955  }
3956  }
3957  assert(UserParent && "expected to find user block!");
3958  } else
3959  UserParent = UserInst->getParent();
3960 
3961  // Try sinking to another block. If that block is unreachable, then do
3962  // not bother. SimplifyCFG should handle it.
3963  if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
3964  return None;
3965 
3966  auto *Term = UserParent->getTerminator();
3967  // See if the user is one of our successors that has only one
3968  // predecessor, so that we don't have to split the critical edge.
3969  // Another option where we can sink is a block that ends with a
3970  // terminator that does not pass control to other block (such as
3971  // return or unreachable). In this case:
3972  // - I dominates the User (by SSA form);
3973  // - the User will be executed at most once.
3974  // So sinking I down to User is always profitable or neutral.
3975  if (UserParent->getUniquePredecessor() == BB ||
3976  (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) {
3977  assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
3978  return UserParent;
3979  }
3980  return None;
3981  };
3982 
3983  auto OptBB = getOptionalSinkBlockForInst(I);
3984  if (OptBB) {
3985  auto *UserParent = *OptBB;
3986  // Okay, the CFG is simple enough, try to sink this instruction.
3987  if (TryToSinkInstruction(I, UserParent, TLI)) {
3988  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
3989  MadeIRChange = true;
3990  // We'll add uses of the sunk instruction below, but since
3991  // sinking can expose opportunities for it's *operands* add
3992  // them to the worklist
3993  for (Use &U : I->operands())
3994  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
3995  Worklist.push(OpI);
3996  }
3997  }
3998 
3999  // Now that we have an instruction, try combining it to simplify it.
4000  Builder.SetInsertPoint(I);
4001  Builder.CollectMetadataToCopy(
4002  I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4003 
4004 #ifndef NDEBUG
4005  std::string OrigI;
4006 #endif
4007  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4008  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4009 
4010  if (Instruction *Result = visit(*I)) {
4011  ++NumCombined;
4012  // Should we replace the old instruction with a new one?
4013  if (Result != I) {
4014  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4015  << " New = " << *Result << '\n');
4016 
4017  Result->copyMetadata(*I,
4018  {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4019  // Everything uses the new instruction now.
4020  I->replaceAllUsesWith(Result);
4021 
4022  // Move the name to the new instruction first.
4023  Result->takeName(I);
4024 
4025  // Insert the new instruction into the basic block...
4026  BasicBlock *InstParent = I->getParent();
4027  BasicBlock::iterator InsertPos = I->getIterator();
4028 
4029  // Are we replace a PHI with something that isn't a PHI, or vice versa?
4030  if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4031  // We need to fix up the insertion point.
4032  if (isa<PHINode>(I)) // PHI -> Non-PHI
4033  InsertPos = InstParent->getFirstInsertionPt();
4034  else // Non-PHI -> PHI
4035  InsertPos = InstParent->getFirstNonPHI()->getIterator();
4036  }
4037 
4038  InstParent->getInstList().insert(InsertPos, Result);
4039 
4040  // Push the new instruction and any users onto the worklist.
4041  Worklist.pushUsersToWorkList(*Result);
4042  Worklist.push(Result);
4043 
4044  eraseInstFromFunction(*I);
4045  } else {
4046  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4047  << " New = " << *I << '\n');
4048 
4049  // If the instruction was modified, it's possible that it is now dead.
4050  // if so, remove it.
4051  if (isInstructionTriviallyDead(I, &TLI)) {
4052  eraseInstFromFunction(*I);
4053  } else {
4054  Worklist.pushUsersToWorkList(*I);
4055  Worklist.push(I);
4056  }
4057  }
4058  MadeIRChange = true;
4059  }
4060  }
4061 
4062  Worklist.zap();
4063  return MadeIRChange;
4064 }
4065 
4066 // Track the scopes used by !alias.scope and !noalias. In a function, a
4067 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4068 // by both sets. If not, the declaration of the scope can be safely omitted.
4069 // The MDNode of the scope can be omitted as well for the instructions that are
4070 // part of this function. We do not do that at this point, as this might become
4071 // too time consuming to do.
4073  SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4074  SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4075 
4076 public:
4078  // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4079  if (!I->hasMetadataOtherThanDebugLoc())
4080  return;
4081 
4082  auto Track = [](Metadata *ScopeList, auto &Container) {
4083  const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4084  if (!MDScopeList || !Container.insert(MDScopeList).second)
4085  return;
4086  for (auto &MDOperand : MDScopeList->operands())
4087  if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4088  Container.insert(MDScope);
4089  };
4090 
4091  Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4092  Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4093  }
4094 
4096  NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4097  if (!Decl)
4098  return false;
4099 
4100  assert(Decl->use_empty() &&
4101  "llvm.experimental.noalias.scope.decl in use ?");
4102  const MDNode *MDSL = Decl->getScopeList();
4103  assert(MDSL->getNumOperands() == 1 &&
4104  "llvm.experimental.noalias.scope should refer to a single scope");
4105  auto &MDOperand = MDSL->getOperand(0);
4106  if (auto *MD = dyn_cast<MDNode>(MDOperand))
4107  return !UsedAliasScopesAndLists.contains(MD) ||
4108  !UsedNoAliasScopesAndLists.contains(MD);
4109 
4110  // Not an MDNode ? throw away.
4111  return true;
4112  }
4113 };
4114 
4115 /// Populate the IC worklist from a function, by walking it in depth-first
4116 /// order and adding all reachable code to the worklist.
4117 ///
4118 /// This has a couple of tricks to make the code faster and more powerful. In
4119 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4120 /// them to the worklist (this significantly speeds up instcombine on code where
4121 /// many instructions are dead or constant). Additionally, if we find a branch
4122 /// whose condition is a known constant, we only visit the reachable successors.
4124  const TargetLibraryInfo *TLI,
4125  InstructionWorklist &ICWorklist) {
4126  bool MadeIRChange = false;
4129  Worklist.push_back(&F.front());
4130 
4131  SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4132  DenseMap<Constant *, Constant *> FoldedConstants;
4133  AliasScopeTracker SeenAliasScopes;
4134 
4135  do {
4136  BasicBlock *BB = Worklist.pop_back_val();
4137 
4138  // We have now visited this block! If we've already been here, ignore it.
4139  if (!Visited.insert(BB).second)
4140  continue;
4141 
4142  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4143  // ConstantProp instruction if trivially constant.
4144  if (!Inst.use_empty() &&
4145  (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4146  if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
4147  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4148  << '\n');
4149  Inst.replaceAllUsesWith(C);
4150  ++NumConstProp;
4151  if (isInstructionTriviallyDead(&Inst, TLI))
4152  Inst.eraseFromParent();
4153  MadeIRChange = true;
4154  continue;
4155  }
4156 
4157  // See if we can constant fold its operands.
4158  for (Use &U : Inst.operands()) {
4159  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4160  continue;
4161 
4162  auto *C = cast<Constant>(U);
4163  Constant *&FoldRes = FoldedConstants[C];
4164  if (!FoldRes)
4165  FoldRes = ConstantFoldConstant(C, DL, TLI);
4166 
4167  if (FoldRes != C) {
4168  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4169  << "\n Old = " << *C
4170  << "\n New = " << *FoldRes << '\n');
4171  U = FoldRes;
4172  MadeIRChange = true;
4173  }
4174  }
4175 
4176  // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4177  // these call instructions consumes non-trivial amount of time and
4178  // provides no value for the optimization.
4179  if (!Inst.isDebugOrPseudoInst()) {
4180  InstrsForInstructionWorklist.push_back(&Inst);
4181  SeenAliasScopes.analyse(&Inst);
4182  }
4183  }
4184 
4185  // Recursively visit successors. If this is a branch or switch on a
4186  // constant, only visit the reachable successor.
4187  Instruction *TI = BB->getTerminator();
4188  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
4189  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
4190  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
4191  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
4192  Worklist.push_back(ReachableBB);
4193  continue;
4194  }
4195  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4196  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4197  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
4198  continue;
4199  }
4200  }
4201 
4202  append_range(Worklist, successors(TI));
4203  } while (!Worklist.empty());
4204 
4205  // Remove instructions inside unreachable blocks. This prevents the
4206  // instcombine code from having to deal with some bad special cases, and
4207  // reduces use counts of instructions.
4208  for (BasicBlock &BB : F) {
4209  if (Visited.count(&BB))
4210  continue;
4211 
4212  unsigned NumDeadInstInBB;
4213  unsigned NumDeadDbgInstInBB;
4214  std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4216 
4217  MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4218  NumDeadInst += NumDeadInstInBB;
4219  }
4220 
4221  // Once we've found all of the instructions to add to instcombine's worklist,
4222  // add them in reverse order. This way instcombine will visit from the top
4223  // of the function down. This jives well with the way that it adds all uses
4224  // of instructions to the worklist after doing a transformation, thus avoiding
4225  // some N^2 behavior in pathological cases.
4226  ICWorklist.reserve(InstrsForInstructionWorklist.size());
4227  for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4228  // DCE instruction if trivially dead. As we iterate in reverse program
4229  // order here, we will clean up whole chains of dead instructions.
4230  if (isInstructionTriviallyDead(Inst, TLI) ||
4231  SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4232  ++NumDeadInst;
4233  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4234  salvageDebugInfo(*Inst);
4235  Inst->eraseFromParent();
4236  MadeIRChange = true;
4237  continue;
4238  }
4239 
4240  ICWorklist.push(Inst);
4241  }
4242 
4243  return MadeIRChange;
4244 }
4245 
4247  Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
4250  ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4251  auto &DL = F.getParent()->getDataLayout();
4252  MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
4253 
4254  /// Builder - This is an IRBuilder that automatically inserts new
4255  /// instructions into the worklist when they are created.
4257  F.getContext(), TargetFolder(DL),
4258  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4259  Worklist.add(I);
4260  if (auto *Assume = dyn_cast<AssumeInst>(I))
4261  AC.registerAssumption(Assume);
4262  }));
4263 
4264  // Lower dbg.declare intrinsics otherwise their value may be clobbered
4265  // by instcombiner.
4266  bool MadeIRChange = false;
4268  MadeIRChange = LowerDbgDeclare(F);
4269 
4270  // Iterate while there is work to do.
4271  unsigned Iteration = 0;
4272  while (true) {
4273  ++NumWorklistIterations;
4274  ++Iteration;
4275 
4276  if (Iteration > InfiniteLoopDetectionThreshold) {
4278  "Instruction Combining seems stuck in an infinite loop after " +
4279  Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4280  }
4281 
4282  if (Iteration > MaxIterations) {
4283  LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4284  << " on " << F.getName()
4285  << " reached; stopping before reaching a fixpoint\n");
4286  break;
4287  }
4288 
4289  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4290  << F.getName() << "\n");
4291 
4292  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4293 
4294  InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4295  ORE, BFI, PSI, DL, LI);
4297 
4298  if (!IC.run())
4299  break;
4300 
4301  MadeIRChange = true;
4302  }
4303 
4304  return MadeIRChange;
4305 }
4306 
4308 
4309 InstCombinePass::InstCombinePass(unsigned MaxIterations)
4310  : MaxIterations(MaxIterations) {}
4311 
4314  auto &AC = AM.getResult<AssumptionAnalysis>(F);
4315  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4316  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4318  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4319 
4320  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4321 
4322  auto *AA = &AM.getResult<AAManager>(F);
4323  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4324  ProfileSummaryInfo *PSI =
4325  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4326  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4327  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4328 
4329  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4330  BFI, PSI, MaxIterations, LI))
4331  // No changes, all analyses are preserved.
4332  return PreservedAnalyses::all();
4333 
4334  // Mark all the analyses that instcombine updates as preserved.
4335  PreservedAnalyses PA;
4336  PA.preserveSet<CFGAnalyses>();
4337  return PA;
4338 }
4339 
4341  AU.setPreservesCFG();
4354 }
4355 
4357  if (skipFunction(F))
4358  return false;
4359 
4360  // Required analyses.
4361  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4362  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4363  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4364  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4365  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4366  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4367 
4368  // Optional analyses.
4369  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4370  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4371  ProfileSummaryInfo *PSI =
4372  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4374  (PSI && PSI->hasProfileSummary()) ?
4375  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4376  nullptr;
4377 
4378  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4379  BFI, PSI, MaxIterations, LI);
4380 }
4381 
4383 
4385  : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
4387 }
4388 
4390  : FunctionPass(ID), MaxIterations(MaxIterations) {
4392 }
4393 
4395  "Combine redundant instructions", false, false)
4407 
4408 // Initialization Routines
4411 }
4412 
4415 }
4416 
4418  return new InstructionCombiningPass();
4419 }
4420 
4422  return new InstructionCombiningPass(MaxIterations);
4423 }
4424 
4427 }
llvm::EHPersonality::MSVC_CXX
@ MSVC_CXX
llvm::NoAliasScopeDeclInst
Definition: IntrinsicInst.h:1279
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::ExtractValueInst::hasIndices
bool hasIndices() const
Definition: Instructions.h:2477
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:163
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1065
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1287
foldSelectGEP
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
Definition: InstructionCombining.cpp:1861
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:263
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2418
llvm::LandingPadInst::Create
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
Definition: Instructions.cpp:214
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:753
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1806
llvm::InstCombinerImpl::tryFactorization
Value * tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, Value *, Value *, Value *)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:631
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1920
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
prepareICWorklistFromFunction
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
Definition: InstructionCombining.cpp:4123
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:714
llvm::LandingPadInst::isCleanup
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
Definition: Instructions.h:2956
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:303
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:169
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3813
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::InstCombinerImpl::visitGetElementPtrInst
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Definition: InstructionCombining.cpp:2020
foldOperationIntoSelectOperand
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:993
llvm::CastInst::CreatePointerBitCastOrAddrSpaceCast
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Definition: Instructions.cpp:3281
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConvertDebugDeclareToDebugValue
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1458
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:321
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:226
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3010
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::EHPersonality::GNU_C_SjLj
@ GNU_C_SjLj
leftDistributesOverRight
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
Definition: InstructionCombining.cpp:563
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:215
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InstCombinerImpl::visitBranchInst
Instruction * visitBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3024
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1399
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4632
llvm::InstructionCombiningPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: InstructionCombining.cpp:4340
llvm::InstCombinerImpl::run
bool run()
Run the combiner over the entire worklist until it is empty.
Definition: InstructionCombining.cpp:3883
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::EHPersonality::GNU_Ada
@ GNU_Ada
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
llvm::DIBuilder
Definition: DIBuilder.h:41
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::InstCombiner::targetInstCombineIntrinsic
Optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
Definition: InstructionCombining.cpp:168
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
llvm::InstCombinerImpl::foldBinOpIntoSelectOrPhi
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
Definition: InstructionCombining.cpp:1292
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:673
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Attribute
Definition: Attributes.h:53
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1083
TargetFolder.h
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:631
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:104
llvm::PatternMatch::m_SplatOrUndefMask
Definition: PatternMatch.h:1545
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5218
llvm::InstCombinerImpl::visitExtractValueInst
Instruction * visitExtractValueInst(ExtractValueInst &EV)
Definition: InstructionCombining.cpp:3110
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
GetElementPtrTypeIterator.h
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5274
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
Statistic.h
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1539
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2909
llvm::InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic
Optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: InstructionCombining.cpp:187
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:988
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:163
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
LazyBlockFrequencyInfo.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2554
getBinOpsForFactorization
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Definition: InstructionCombining.cpp:612
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3152
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:835
llvm::findDbgUsers
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:102
llvm::ConstantRange::getEquivalentICmp
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
Definition: ConstantRange.cpp:216
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::InstructionWorklist::reserve
void reserve(size_t Size)
Definition: InstructionWorklist.h:79
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::NoAliasScopeDeclInst::getScopeList
MDNode * getScopeList() const
Definition: IntrinsicInst.h:1289
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1741
llvm::InstCombiner::MaxArraySizeForCombine
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:53
APInt.h
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::InstructionCombiningPass::InstructionCombiningPass
InstructionCombiningPass()
Definition: InstructionCombining.cpp:4384
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:30
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1412
llvm::InstCombinerImpl::FoldOpIntoSelect
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI)
Given an instruction with a select as one operand and a constant as the other operand,...
Definition: InstructionCombining.cpp:1039
llvm::InstCombinerImpl::visitAllocSite
Instruction * visitAllocSite(Instruction &FI)
Definition: InstructionCombining.cpp:2691
MemoryBuiltins.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:414
llvm::AttributeList
Definition: Attributes.h:404
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1473
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:167
EHPersonalities.h
InfiniteLoopDetectionThreshold
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition: Instructions.h:2455
llvm::SimplifyFreezeInst
Value * SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
Definition: InstructionSimplify.cpp:6169
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:926
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::SmallPtrSet< Value *, 16 >
CBindingWrapping.h
llvm::SimplifyGEPInst
Value * SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4576
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1133
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:2938
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:31
llvm::InstCombinerImpl::Descale
Value * Descale(Value *Val, APInt Scale, bool &NoSignedWrap)
Returns a value X such that Val = X * Scale, or null if none.
Definition: InstructionCombining.cpp:1339
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:644
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
LLVMInitializeInstCombine
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
Definition: InstructionCombining.cpp:4413
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2287
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:165
isRemovableWrite
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
Definition: InstructionCombining.cpp:2572
BasicAliasAnalysis.h
LegacyPassManager.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
Use.h
llvm::EHPersonality::GNU_ObjC
@ GNU_ObjC
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:402
llvm::InstCombinePass::InstCombinePass
InstCombinePass()
Definition: InstructionCombining.cpp:4307
llvm::InstructionWorklist::add
void add(Instruction *I)
Add instruction to the worklist.
Definition: InstructionWorklist.h:44
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1307
llvm::InstCombinerImpl::SimplifySelectsFeedingBinaryOp
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
Definition: InstructionCombining.cpp:855
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::initializeInstCombine
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
Definition: InstructionCombining.cpp:4409
KnownBits.h
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:99
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
combineInstructionsOverFunction
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI)
Definition: InstructionCombining.cpp:4246
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1143
llvm::InstCombinerImpl::visitUnreachableInst
Instruction * visitUnreachableInst(UnreachableInst &I)
Definition: InstructionCombining.cpp:2968
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:2000
AliasAnalysis.h
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2750
llvm::isAllocationFn
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
Definition: MemoryBuiltins.cpp:229
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
llvm::InstCombinerImpl::visitFree
Instruction * visitFree(CallInst &FI)
Definition: InstructionCombining.cpp:2894
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
CommandLine.h
llvm::PatternMatch::m_c_BinOp
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
Definition: PatternMatch.h:2213
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:214
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1355
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::Instruction::willReturn
bool willReturn() const
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
Definition: Instruction.cpp:702
AliasScopeTracker
Definition: InstructionCombining.cpp:4072
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:381
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1772
hasNoUnsignedWrap
static bool hasNoUnsignedWrap(BinaryOperator &I)
Definition: InstructionCombining.cpp:295
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5258
InstCombineInternal.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::ArrayType::getNumElements
uint64_t getNumElements() const
Definition: DerivedTypes.h:369
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2753
llvm::AAResults
Definition: AliasAnalysis.h:507
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_END(InstructionCombiningPass
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:657
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:128
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::InstCombinerImpl::foldOpIntoPhi
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Definition: InstructionCombining.cpp:1135
llvm::LandingPadInst::getNumClauses
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
Definition: Instructions.h:2981
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1477
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionWorklist.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InstCombinerImpl::visitFreeze
Instruction * visitFreeze(FreezeInst &I)
Definition: InstructionCombining.cpp:3671
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::EHPersonality::GNU_C
@ GNU_C
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
DEBUG_COUNTER
DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited")
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1387
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::InstCombinerImpl::visitUnconditionalBranchInst
Instruction * visitUnconditionalBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:2995
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:726
l
This requires reassociating to forms of expressions that are already something that reassoc doesn t think about yet These two functions should generate the same code on big endian int * l
Definition: README.txt:100
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
false
Definition: StackSlotColoring.cpp:142
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::isPotentiallyReachable
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Definition: CFG.cpp:236
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::removeAllNonTerminatorAndEHPadInstructions
std::pair< unsigned, unsigned > removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition: Local.cpp:2104
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:803
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:409
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::PatternMatch::m_SDiv
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1073
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:394
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::ExtractValueInst::idx_end
idx_iterator idx_end() const
Definition: Instructions.h:2454
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::unwrap
Attribute unwrap(LLVMAttributeRef Attr)
Definition: Attributes.h:261
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:315
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::LandingPadInst::addClause
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
Definition: Instructions.cpp:243
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1804
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4399
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
SmallPtrSet.h
llvm::InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
Definition: InstructionCombining.cpp:3597
LLVMAddInstructionCombiningPass
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM)
See llvm::createInstructionCombiningPass function.
Definition: InstructionCombining.cpp:4425
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:216
willNotOverflow
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
Definition: CorrelatedValuePropagation.cpp:435
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
rightDistributesOverLeft
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
Definition: InstructionCombining.cpp:584
llvm::PatternMatch::m_APIntAllowUndef
apint_match m_APIntAllowUndef(const APInt *&Res)
Match APInt while allowing undefs in splat vector constants.
Definition: PatternMatch.h:276
llvm::PointerType::isOpaque
bool isOpaque() const
Definition: DerivedTypes.h:678
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:166
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2749
llvm::None
const NoneType None
Definition: None.h:23
llvm::EHPersonality::Wasm_CXX
@ Wasm_CXX
ClearSubclassDataAfterReassociation
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
Definition: InstructionCombining.cpp:308
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
SoleWriteToDeadLocal
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
Definition: InstructionCombining.cpp:3724
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::InstCombinerImpl::SimplifyAssociativeOrCommutative
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Definition: InstructionCombining.cpp:403
simplifyAssocCastAssoc
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
Definition: InstructionCombining.cpp:324
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:768
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3173
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3769
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:711
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1137
foldOperationIntoPhiValue
static Value * foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:1113
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::IRBuilderBase::FastMathFlagGuard
Definition: IRBuilder.h:389
llvm::Type::getArrayElementType
Type * getArrayElementType() const
Definition: Type.h:364
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1527
VectorUtils.h
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2144
llvm::GEPOperator::hasAllZeroIndices
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:548
InstCombineDefaultInfiniteLoopThreshold
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold
Definition: InstructionCombining.cpp:133
BasicBlock.h
llvm::cl::opt< bool >
llvm::ClrHandlerType::Filter
@ Filter
llvm::DbgVariableIntrinsic
This is the common base class for debug info intrinsics for variables.
Definition: IntrinsicInst.h:148
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2447
llvm::EHPersonality::GNU_CXX_SjLj
@ GNU_CXX_SjLj
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::PatternMatch::m_UnconditionalBr
br_match m_UnconditionalBr(BasicBlock *&Succ)
Definition: PatternMatch.h:1720
llvm::PatternMatch::m_ImmConstant
match_combine_and< class_match< Constant >, match_unless< class_match< ConstantExpr > > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:759
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1190
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2861
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
EnableCodeSinking
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
llvm::KnownBits::countMinLeadingOnes
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:241
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
llvm::ExtractValueInst::Create
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2430
uint64_t
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2474
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
MaxArraySize
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
llvm::InstructionWorklist::push
void push(Instruction *I)
Push the instruction onto the worklist stack.
Definition: InstructionWorklist.h:58
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::APInt::sdivrem
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1882
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:175
llvm::SimplifyBinOp
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5449
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2807
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1109
AliasScopeTracker::analyse
void analyse(Instruction *I)
Definition: InstructionCombining.cpp:4077
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ConstantExpr::get
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:2316
llvm::ExtractValueInst::idx_begin
idx_iterator idx_begin() const
Definition: Instructions.h:2453
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:752
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1384
llvm::Function::addDereferenceableOrNullParamAttr
void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes)
adds the dereferenceable_or_null attribute to the list of attributes for the given arg.
Definition: