LLVM  16.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
49 #include "llvm/Analysis/CFG.h"
55 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/IR/BasicBlock.h"
66 #include "llvm/IR/CFG.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugInfo.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Function.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InstrTypes.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/IR/Operator.h"
85 #include "llvm/IR/PassManager.h"
86 #include "llvm/IR/PatternMatch.h"
87 #include "llvm/IR/Type.h"
88 #include "llvm/IR/Use.h"
89 #include "llvm/IR/User.h"
90 #include "llvm/IR/Value.h"
91 #include "llvm/IR/ValueHandle.h"
92 #include "llvm/InitializePasses.h"
93 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
103 #include <algorithm>
104 #include <cassert>
105 #include <cstdint>
106 #include <memory>
107 #include <string>
108 #include <utility>
109 
110 #define DEBUG_TYPE "instcombine"
112 
113 using namespace llvm;
114 using namespace llvm::PatternMatch;
115 
116 STATISTIC(NumWorklistIterations,
117  "Number of instruction combining iterations performed");
118 
119 STATISTIC(NumCombined , "Number of insts combined");
120 STATISTIC(NumConstProp, "Number of constant folds");
121 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
122 STATISTIC(NumSunkInst , "Number of instructions sunk");
123 STATISTIC(NumExpand, "Number of expansions");
124 STATISTIC(NumFactor , "Number of factorizations");
125 STATISTIC(NumReassoc , "Number of reassociations");
126 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
127  "Controls which instructions are visited");
128 
129 // FIXME: these limits eventually should be as low as 2.
130 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
131 #ifndef NDEBUG
132 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
133 #else
134 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
135 #endif
136 
137 static cl::opt<bool>
138 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
139  cl::init(true));
140 
142  "instcombine-max-sink-users", cl::init(32),
143  cl::desc("Maximum number of undroppable users for instruction sinking"));
144 
146  "instcombine-max-iterations",
147  cl::desc("Limit the maximum number of instruction combining iterations"),
149 
151  "instcombine-infinite-loop-threshold",
152  cl::desc("Number of instruction combining iterations considered an "
153  "infinite loop"),
155 
156 static cl::opt<unsigned>
157 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
158  cl::desc("Maximum array size considered when doing a combine"));
159 
160 // FIXME: Remove this flag when it is no longer necessary to convert
161 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
162 // increases variable availability at the cost of accuracy. Variables that
163 // cannot be promoted by mem2reg or SROA will be described as living in memory
164 // for their entire lifetime. However, passes like DSE and instcombine can
165 // delete stores to the alloca, leading to misleading and inaccurate debug
166 // information. This flag can be removed when those passes are fixed.
167 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
168  cl::Hidden, cl::init(true));
169 
172  // Handle target specific intrinsics
173  if (II.getCalledFunction()->isTargetIntrinsic()) {
174  return TTI.instCombineIntrinsic(*this, II);
175  }
176  return None;
177 }
178 
180  IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
181  bool &KnownBitsComputed) {
182  // Handle target specific intrinsics
183  if (II.getCalledFunction()->isTargetIntrinsic()) {
184  return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
185  KnownBitsComputed);
186  }
187  return None;
188 }
189 
191  IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
192  APInt &UndefElts3,
193  std::function<void(Instruction *, unsigned, APInt, APInt &)>
194  SimplifyAndSetOp) {
195  // Handle target specific intrinsics
196  if (II.getCalledFunction()->isTargetIntrinsic()) {
198  *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
199  SimplifyAndSetOp);
200  }
201  return None;
202 }
203 
204 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
205  return llvm::EmitGEPOffset(&Builder, DL, GEP);
206 }
207 
208 /// Legal integers and common types are considered desirable. This is used to
209 /// avoid creating instructions with types that may not be supported well by the
210 /// the backend.
211 /// NOTE: This treats i8, i16 and i32 specially because they are common
212 /// types in frontend languages.
213 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
214  switch (BitWidth) {
215  case 8:
216  case 16:
217  case 32:
218  return true;
219  default:
220  return DL.isLegalInteger(BitWidth);
221  }
222 }
223 
224 /// Return true if it is desirable to convert an integer computation from a
225 /// given bit width to a new bit width.
226 /// We don't want to convert from a legal to an illegal type or from a smaller
227 /// to a larger illegal type. A width of '1' is always treated as a desirable
228 /// type because i1 is a fundamental type in IR, and there are many specialized
229 /// optimizations for i1 types. Common/desirable widths are equally treated as
230 /// legal to convert to, in order to open up more combining opportunities.
231 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
232  unsigned ToWidth) const {
233  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
234  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
235 
236  // Convert to desirable widths even if they are not legal types.
237  // Only shrink types, to prevent infinite loops.
238  if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
239  return true;
240 
241  // If this is a legal integer from type, and the result would be an illegal
242  // type, don't do the transformation.
243  if (FromLegal && !ToLegal)
244  return false;
245 
246  // Otherwise, if both are illegal, do not increase the size of the result. We
247  // do allow things like i160 -> i64, but not i64 -> i160.
248  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
249  return false;
250 
251  return true;
252 }
253 
254 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
255 /// We don't want to convert from a legal to an illegal type or from a smaller
256 /// to a larger illegal type. i1 is always treated as a legal type because it is
257 /// a fundamental type in IR, and there are many specialized optimizations for
258 /// i1 types.
259 bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
260  // TODO: This could be extended to allow vectors. Datalayout changes might be
261  // needed to properly support that.
262  if (!From->isIntegerTy() || !To->isIntegerTy())
263  return false;
264 
265  unsigned FromWidth = From->getPrimitiveSizeInBits();
266  unsigned ToWidth = To->getPrimitiveSizeInBits();
267  return shouldChangeType(FromWidth, ToWidth);
268 }
269 
270 // Return true, if No Signed Wrap should be maintained for I.
271 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
272 // where both B and C should be ConstantInts, results in a constant that does
273 // not overflow. This function only handles the Add and Sub opcodes. For
274 // all other opcodes, the function conservatively returns false.
276  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
277  if (!OBO || !OBO->hasNoSignedWrap())
278  return false;
279 
280  // We reason about Add and Sub Only.
281  Instruction::BinaryOps Opcode = I.getOpcode();
282  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
283  return false;
284 
285  const APInt *BVal, *CVal;
286  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
287  return false;
288 
289  bool Overflow = false;
290  if (Opcode == Instruction::Add)
291  (void)BVal->sadd_ov(*CVal, Overflow);
292  else
293  (void)BVal->ssub_ov(*CVal, Overflow);
294 
295  return !Overflow;
296 }
297 
299  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
300  return OBO && OBO->hasNoUnsignedWrap();
301 }
302 
304  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
305  return OBO && OBO->hasNoSignedWrap();
306 }
307 
308 /// Conservatively clears subclassOptionalData after a reassociation or
309 /// commutation. We preserve fast-math flags when applicable as they can be
310 /// preserved.
312  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
313  if (!FPMO) {
314  I.clearSubclassOptionalData();
315  return;
316  }
317 
318  FastMathFlags FMF = I.getFastMathFlags();
319  I.clearSubclassOptionalData();
320  I.setFastMathFlags(FMF);
321 }
322 
323 /// Combine constant operands of associative operations either before or after a
324 /// cast to eliminate one of the associative operations:
325 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
326 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
328  InstCombinerImpl &IC) {
329  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
330  if (!Cast || !Cast->hasOneUse())
331  return false;
332 
333  // TODO: Enhance logic for other casts and remove this check.
334  auto CastOpcode = Cast->getOpcode();
335  if (CastOpcode != Instruction::ZExt)
336  return false;
337 
338  // TODO: Enhance logic for other BinOps and remove this check.
339  if (!BinOp1->isBitwiseLogicOp())
340  return false;
341 
342  auto AssocOpcode = BinOp1->getOpcode();
343  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
344  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
345  return false;
346 
347  Constant *C1, *C2;
348  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
349  !match(BinOp2->getOperand(1), m_Constant(C2)))
350  return false;
351 
352  // TODO: This assumes a zext cast.
353  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
354  // to the destination type might lose bits.
355 
356  // Fold the constants together in the destination type:
357  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
358  Type *DestTy = C1->getType();
359  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
360  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
361  IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
362  IC.replaceOperand(*BinOp1, 1, FoldedC);
363  return true;
364 }
365 
366 // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
367 // inttoptr ( ptrtoint (x) ) --> x
368 Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
369  auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
370  if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) ==
371  DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
372  auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
373  Type *CastTy = IntToPtr->getDestTy();
374  if (PtrToInt &&
375  CastTy->getPointerAddressSpace() ==
376  PtrToInt->getSrcTy()->getPointerAddressSpace() &&
377  DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
378  DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
379  return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
380  "", PtrToInt);
381  }
382  }
383  return nullptr;
384 }
385 
386 /// This performs a few simplifications for operators that are associative or
387 /// commutative:
388 ///
389 /// Commutative operators:
390 ///
391 /// 1. Order operands such that they are listed from right (least complex) to
392 /// left (most complex). This puts constants before unary operators before
393 /// binary operators.
394 ///
395 /// Associative operators:
396 ///
397 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
398 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
399 ///
400 /// Associative and commutative operators:
401 ///
402 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
403 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
404 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
405 /// if C1 and C2 are constants.
407  Instruction::BinaryOps Opcode = I.getOpcode();
408  bool Changed = false;
409 
410  do {
411  // Order operands such that they are listed from right (least complex) to
412  // left (most complex). This puts constants before unary operators before
413  // binary operators.
414  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
415  getComplexity(I.getOperand(1)))
416  Changed = !I.swapOperands();
417 
418  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
419  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
420 
421  if (I.isAssociative()) {
422  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
423  if (Op0 && Op0->getOpcode() == Opcode) {
424  Value *A = Op0->getOperand(0);
425  Value *B = Op0->getOperand(1);
426  Value *C = I.getOperand(1);
427 
428  // Does "B op C" simplify?
429  if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
430  // It simplifies to V. Form "A op V".
431  replaceOperand(I, 0, A);
432  replaceOperand(I, 1, V);
433  bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
434  bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
435 
436  // Conservatively clear all optional flags since they may not be
437  // preserved by the reassociation. Reset nsw/nuw based on the above
438  // analysis.
440 
441  // Note: this is only valid because SimplifyBinOp doesn't look at
442  // the operands to Op0.
443  if (IsNUW)
444  I.setHasNoUnsignedWrap(true);
445 
446  if (IsNSW)
447  I.setHasNoSignedWrap(true);
448 
449  Changed = true;
450  ++NumReassoc;
451  continue;
452  }
453  }
454 
455  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
456  if (Op1 && Op1->getOpcode() == Opcode) {
457  Value *A = I.getOperand(0);
458  Value *B = Op1->getOperand(0);
459  Value *C = Op1->getOperand(1);
460 
461  // Does "A op B" simplify?
462  if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
463  // It simplifies to V. Form "V op C".
464  replaceOperand(I, 0, V);
465  replaceOperand(I, 1, C);
466  // Conservatively clear the optional flags, since they may not be
467  // preserved by the reassociation.
469  Changed = true;
470  ++NumReassoc;
471  continue;
472  }
473  }
474  }
475 
476  if (I.isAssociative() && I.isCommutative()) {
477  if (simplifyAssocCastAssoc(&I, *this)) {
478  Changed = true;
479  ++NumReassoc;
480  continue;
481  }
482 
483  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
484  if (Op0 && Op0->getOpcode() == Opcode) {
485  Value *A = Op0->getOperand(0);
486  Value *B = Op0->getOperand(1);
487  Value *C = I.getOperand(1);
488 
489  // Does "C op A" simplify?
490  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
491  // It simplifies to V. Form "V op B".
492  replaceOperand(I, 0, V);
493  replaceOperand(I, 1, B);
494  // Conservatively clear the optional flags, since they may not be
495  // preserved by the reassociation.
497  Changed = true;
498  ++NumReassoc;
499  continue;
500  }
501  }
502 
503  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
504  if (Op1 && Op1->getOpcode() == Opcode) {
505  Value *A = I.getOperand(0);
506  Value *B = Op1->getOperand(0);
507  Value *C = Op1->getOperand(1);
508 
509  // Does "C op A" simplify?
510  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
511  // It simplifies to V. Form "B op V".
512  replaceOperand(I, 0, B);
513  replaceOperand(I, 1, V);
514  // Conservatively clear the optional flags, since they may not be
515  // preserved by the reassociation.
517  Changed = true;
518  ++NumReassoc;
519  continue;
520  }
521  }
522 
523  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
524  // if C1 and C2 are constants.
525  Value *A, *B;
526  Constant *C1, *C2, *CRes;
527  if (Op0 && Op1 &&
528  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
529  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
530  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
531  (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
532  bool IsNUW = hasNoUnsignedWrap(I) &&
533  hasNoUnsignedWrap(*Op0) &&
534  hasNoUnsignedWrap(*Op1);
535  BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
536  BinaryOperator::CreateNUW(Opcode, A, B) :
537  BinaryOperator::Create(Opcode, A, B);
538 
539  if (isa<FPMathOperator>(NewBO)) {
540  FastMathFlags Flags = I.getFastMathFlags();
541  Flags &= Op0->getFastMathFlags();
542  Flags &= Op1->getFastMathFlags();
543  NewBO->setFastMathFlags(Flags);
544  }
545  InsertNewInstWith(NewBO, I);
546  NewBO->takeName(Op1);
547  replaceOperand(I, 0, NewBO);
548  replaceOperand(I, 1, CRes);
549  // Conservatively clear the optional flags, since they may not be
550  // preserved by the reassociation.
552  if (IsNUW)
553  I.setHasNoUnsignedWrap(true);
554 
555  Changed = true;
556  continue;
557  }
558  }
559 
560  // No further simplifications.
561  return Changed;
562  } while (true);
563 }
564 
565 /// Return whether "X LOp (Y ROp Z)" is always equal to
566 /// "(X LOp Y) ROp (X LOp Z)".
569  // X & (Y | Z) <--> (X & Y) | (X & Z)
570  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
571  if (LOp == Instruction::And)
572  return ROp == Instruction::Or || ROp == Instruction::Xor;
573 
574  // X | (Y & Z) <--> (X | Y) & (X | Z)
575  if (LOp == Instruction::Or)
576  return ROp == Instruction::And;
577 
578  // X * (Y + Z) <--> (X * Y) + (X * Z)
579  // X * (Y - Z) <--> (X * Y) - (X * Z)
580  if (LOp == Instruction::Mul)
581  return ROp == Instruction::Add || ROp == Instruction::Sub;
582 
583  return false;
584 }
585 
586 /// Return whether "(X LOp Y) ROp Z" is always equal to
587 /// "(X ROp Z) LOp (Y ROp Z)".
591  return leftDistributesOverRight(ROp, LOp);
592 
593  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
595 
596  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
597  // but this requires knowing that the addition does not overflow and other
598  // such subtleties.
599 }
600 
601 /// This function returns identity value for given opcode, which can be used to
602 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
604  if (isa<Constant>(V))
605  return nullptr;
606 
607  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
608 }
609 
610 /// This function predicates factorization using distributive laws. By default,
611 /// it just returns the 'Op' inputs. But for special-cases like
612 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
613 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
614 /// allow more factorization opportunities.
617  Value *&LHS, Value *&RHS) {
618  assert(Op && "Expected a binary operator");
619  LHS = Op->getOperand(0);
620  RHS = Op->getOperand(1);
621  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
622  Constant *C;
623  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
624  // X << C --> X * (1 << C)
625  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
626  return Instruction::Mul;
627  }
628  // TODO: We can add other conversions e.g. shr => div etc.
629  }
630  return Op->getOpcode();
631 }
632 
633 /// This tries to simplify binary operations by factorizing out common terms
634 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
636  Instruction::BinaryOps InnerOpcode,
637  Value *A, Value *B, Value *C,
638  Value *D) {
639  assert(A && B && C && D && "All values must be provided");
640 
641  Value *V = nullptr;
642  Value *RetVal = nullptr;
643  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
644  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
645 
646  // Does "X op' Y" always equal "Y op' X"?
647  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
648 
649  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
650  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
651  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
652  // commutative case, "(A op' B) op (C op' A)"?
653  if (A == C || (InnerCommutative && A == D)) {
654  if (A != C)
655  std::swap(C, D);
656  // Consider forming "A op' (B op D)".
657  // If "B op D" simplifies then it can be formed with no cost.
658  V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
659 
660  // If "B op D" doesn't simplify then only go on if one of the existing
661  // operations "A op' B" and "C op' D" will be zapped as no longer used.
662  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
663  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
664  if (V)
665  RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
666  }
667  }
668 
669  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
670  if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
671  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
672  // commutative case, "(A op' B) op (B op' D)"?
673  if (B == D || (InnerCommutative && B == C)) {
674  if (B != D)
675  std::swap(C, D);
676  // Consider forming "(A op C) op' B".
677  // If "A op C" simplifies then it can be formed with no cost.
678  V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
679 
680  // If "A op C" doesn't simplify then only go on if one of the existing
681  // operations "A op' B" and "C op' D" will be zapped as no longer used.
682  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
683  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
684  if (V)
685  RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
686  }
687  }
688 
689  if (!RetVal)
690  return nullptr;
691 
692  ++NumFactor;
693  RetVal->takeName(&I);
694 
695  // Try to add no-overflow flags to the final value.
696  if (isa<OverflowingBinaryOperator>(RetVal)) {
697  bool HasNSW = false;
698  bool HasNUW = false;
699  if (isa<OverflowingBinaryOperator>(&I)) {
700  HasNSW = I.hasNoSignedWrap();
701  HasNUW = I.hasNoUnsignedWrap();
702  }
703  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
704  HasNSW &= LOBO->hasNoSignedWrap();
705  HasNUW &= LOBO->hasNoUnsignedWrap();
706  }
707 
708  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
709  HasNSW &= ROBO->hasNoSignedWrap();
710  HasNUW &= ROBO->hasNoUnsignedWrap();
711  }
712 
713  if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
714  // We can propagate 'nsw' if we know that
715  // %Y = mul nsw i16 %X, C
716  // %Z = add nsw i16 %Y, %X
717  // =>
718  // %Z = mul nsw i16 %X, C+1
719  //
720  // iff C+1 isn't INT_MIN
721  const APInt *CInt;
722  if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
723  cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
724 
725  // nuw can be propagated with any constant or nuw value.
726  cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
727  }
728  }
729  return RetVal;
730 }
731 
732 /// This tries to simplify binary operations which some other binary operation
733 /// distributes over either by factorizing out common terms
734 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
735 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
736 /// Returns the simplified value, or null if it didn't simplify.
738  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
739  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
740  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
741  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
742 
743  {
744  // Factorization.
745  Value *A, *B, *C, *D;
746  Instruction::BinaryOps LHSOpcode, RHSOpcode;
747  if (Op0)
748  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
749  if (Op1)
750  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
751 
752  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
753  // a common term.
754  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
755  if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
756  return V;
757 
758  // The instruction has the form "(A op' B) op (C)". Try to factorize common
759  // term.
760  if (Op0)
761  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
762  if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
763  return V;
764 
765  // The instruction has the form "(B) op (C op' D)". Try to factorize common
766  // term.
767  if (Op1)
768  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
769  if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
770  return V;
771  }
772 
773  // Expansion.
774  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
775  // The instruction has the form "(A op' B) op C". See if expanding it out
776  // to "(A op C) op' (B op C)" results in simplifications.
777  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
778  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
779 
780  // Disable the use of undef because it's not safe to distribute undef.
781  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
782  Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
783  Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
784 
785  // Do "A op C" and "B op C" both simplify?
786  if (L && R) {
787  // They do! Return "L op' R".
788  ++NumExpand;
789  C = Builder.CreateBinOp(InnerOpcode, L, R);
790  C->takeName(&I);
791  return C;
792  }
793 
794  // Does "A op C" simplify to the identity value for the inner opcode?
795  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
796  // They do! Return "B op C".
797  ++NumExpand;
798  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
799  C->takeName(&I);
800  return C;
801  }
802 
803  // Does "B op C" simplify to the identity value for the inner opcode?
804  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
805  // They do! Return "A op C".
806  ++NumExpand;
807  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
808  C->takeName(&I);
809  return C;
810  }
811  }
812 
813  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
814  // The instruction has the form "A op (B op' C)". See if expanding it out
815  // to "(A op B) op' (A op C)" results in simplifications.
816  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
817  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
818 
819  // Disable the use of undef because it's not safe to distribute undef.
820  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
821  Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
822  Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
823 
824  // Do "A op B" and "A op C" both simplify?
825  if (L && R) {
826  // They do! Return "L op' R".
827  ++NumExpand;
828  A = Builder.CreateBinOp(InnerOpcode, L, R);
829  A->takeName(&I);
830  return A;
831  }
832 
833  // Does "A op B" simplify to the identity value for the inner opcode?
834  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
835  // They do! Return "A op C".
836  ++NumExpand;
837  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
838  A->takeName(&I);
839  return A;
840  }
841 
842  // Does "A op C" simplify to the identity value for the inner opcode?
843  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
844  // They do! Return "A op B".
845  ++NumExpand;
846  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
847  A->takeName(&I);
848  return A;
849  }
850  }
851 
852  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
853 }
854 
856  Value *LHS,
857  Value *RHS) {
858  Value *A, *B, *C, *D, *E, *F;
859  bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
860  bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
861  if (!LHSIsSelect && !RHSIsSelect)
862  return nullptr;
863 
864  FastMathFlags FMF;
866  if (isa<FPMathOperator>(&I)) {
867  FMF = I.getFastMathFlags();
868  Builder.setFastMathFlags(FMF);
869  }
870 
871  Instruction::BinaryOps Opcode = I.getOpcode();
873 
874  Value *Cond, *True = nullptr, *False = nullptr;
875 
876  // Special-case for add/negate combination. Replace the zero in the negation
877  // with the trailing add operand:
878  // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
879  // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
880  auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
881  // We need an 'add' and exactly 1 arm of the select to have been simplified.
882  if (Opcode != Instruction::Add || (!True && !False) || (True && False))
883  return nullptr;
884 
885  Value *N;
886  if (True && match(FVal, m_Neg(m_Value(N)))) {
887  Value *Sub = Builder.CreateSub(Z, N);
888  return Builder.CreateSelect(Cond, True, Sub, I.getName());
889  }
890  if (False && match(TVal, m_Neg(m_Value(N)))) {
891  Value *Sub = Builder.CreateSub(Z, N);
892  return Builder.CreateSelect(Cond, Sub, False, I.getName());
893  }
894  return nullptr;
895  };
896 
897  if (LHSIsSelect && RHSIsSelect && A == D) {
898  // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
899  Cond = A;
900  True = simplifyBinOp(Opcode, B, E, FMF, Q);
901  False = simplifyBinOp(Opcode, C, F, FMF, Q);
902 
903  if (LHS->hasOneUse() && RHS->hasOneUse()) {
904  if (False && !True)
905  True = Builder.CreateBinOp(Opcode, B, E);
906  else if (True && !False)
907  False = Builder.CreateBinOp(Opcode, C, F);
908  }
909  } else if (LHSIsSelect && LHS->hasOneUse()) {
910  // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
911  Cond = A;
912  True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
913  False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
914  if (Value *NewSel = foldAddNegate(B, C, RHS))
915  return NewSel;
916  } else if (RHSIsSelect && RHS->hasOneUse()) {
917  // X op (D ? E : F) -> D ? (X op E) : (X op F)
918  Cond = D;
919  True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
920  False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
921  if (Value *NewSel = foldAddNegate(E, F, LHS))
922  return NewSel;
923  }
924 
925  if (!True || !False)
926  return nullptr;
927 
928  Value *SI = Builder.CreateSelect(Cond, True, False);
929  SI->takeName(&I);
930  return SI;
931 }
932 
933 /// Freely adapt every user of V as-if V was changed to !V.
934 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
935 void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
936  for (User *U : I->users()) {
937  switch (cast<Instruction>(U)->getOpcode()) {
938  case Instruction::Select: {
939  auto *SI = cast<SelectInst>(U);
940  SI->swapValues();
941  SI->swapProfMetadata();
942  break;
943  }
944  case Instruction::Br:
945  cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
946  break;
947  case Instruction::Xor:
948  replaceInstUsesWith(cast<Instruction>(*U), I);
949  break;
950  default:
951  llvm_unreachable("Got unexpected user - out of sync with "
952  "canFreelyInvertAllUsersOf() ?");
953  }
954  }
955 }
956 
957 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
958 /// constant zero (which is the 'negate' form).
959 Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
960  Value *NegV;
961  if (match(V, m_Neg(m_Value(NegV))))
962  return NegV;
963 
964  // Constants can be considered to be negated values if they can be folded.
965  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
966  return ConstantExpr::getNeg(C);
967 
968  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
969  if (C->getType()->getElementType()->isIntegerTy())
970  return ConstantExpr::getNeg(C);
971 
972  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
973  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
974  Constant *Elt = CV->getAggregateElement(i);
975  if (!Elt)
976  return nullptr;
977 
978  if (isa<UndefValue>(Elt))
979  continue;
980 
981  if (!isa<ConstantInt>(Elt))
982  return nullptr;
983  }
984  return ConstantExpr::getNeg(CV);
985  }
986 
987  // Negate integer vector splats.
988  if (auto *CV = dyn_cast<Constant>(V))
989  if (CV->getType()->isVectorTy() &&
990  CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
991  return ConstantExpr::getNeg(CV);
992 
993  return nullptr;
994 }
995 
996 /// A binop with a constant operand and a sign-extended boolean operand may be
997 /// converted into a select of constants by applying the binary operation to
998 /// the constant with the two possible values of the extended boolean (0 or -1).
999 Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1000  // TODO: Handle non-commutative binop (constant is operand 0).
1001  // TODO: Handle zext.
1002  // TODO: Peek through 'not' of cast.
1003  Value *BO0 = BO.getOperand(0);
1004  Value *BO1 = BO.getOperand(1);
1005  Value *X;
1006  Constant *C;
1007  if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1008  !X->getType()->isIntOrIntVectorTy(1))
1009  return nullptr;
1010 
1011  // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1014  Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1015  Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1016  return SelectInst::Create(X, TVal, FVal);
1017 }
1018 
1020  Instruction &I, SelectInst *SI, Value *SO) {
1021  auto *ConstSO = dyn_cast<Constant>(SO);
1022  if (!ConstSO)
1023  return nullptr;
1024 
1025  SmallVector<Constant *> ConstOps;
1026  for (Value *Op : I.operands()) {
1027  if (Op == SI)
1028  ConstOps.push_back(ConstSO);
1029  else if (auto *C = dyn_cast<Constant>(Op))
1030  ConstOps.push_back(C);
1031  else
1032  llvm_unreachable("Operands should be select or constant");
1033  }
1034  return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1035 }
1036 
1039  if (auto *Cast = dyn_cast<CastInst>(&I))
1040  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
1041 
1042  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1043  assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
1044  "Expected constant-foldable intrinsic");
1045  Intrinsic::ID IID = II->getIntrinsicID();
1046  if (II->arg_size() == 1)
1047  return Builder.CreateUnaryIntrinsic(IID, SO);
1048 
1049  // This works for real binary ops like min/max (where we always expect the
1050  // constant operand to be canonicalized as op1) and unary ops with a bonus
1051  // constant argument like ctlz/cttz.
1052  // TODO: Handle non-commutative binary intrinsics as below for binops.
1053  assert(II->arg_size() == 2 && "Expected binary intrinsic");
1054  assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
1055  return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
1056  }
1057 
1058  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
1059 
1060  // Figure out if the constant is the left or the right argument.
1061  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
1062  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
1063 
1064  Value *Op0 = SO, *Op1 = ConstOperand;
1065  if (!ConstIsRHS)
1066  std::swap(Op0, Op1);
1067 
1068  Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
1069  Op1, SO->getName() + ".op");
1070  if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
1071  NewBOI->copyIRFlags(&I);
1072  return NewBO;
1073 }
1074 
1076  bool FoldWithMultiUse) {
1077  // Don't modify shared select instructions unless set FoldWithMultiUse
1078  if (!SI->hasOneUse() && !FoldWithMultiUse)
1079  return nullptr;
1080 
1081  Value *TV = SI->getTrueValue();
1082  Value *FV = SI->getFalseValue();
1083  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1084  return nullptr;
1085 
1086  // Bool selects with constant operands can be folded to logical ops.
1087  if (SI->getType()->isIntOrIntVectorTy(1))
1088  return nullptr;
1089 
1090  // If it's a bitcast involving vectors, make sure it has the same number of
1091  // elements on both sides.
1092  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1093  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1094  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1095 
1096  // Verify that either both or neither are vectors.
1097  if ((SrcTy == nullptr) != (DestTy == nullptr))
1098  return nullptr;
1099 
1100  // If vectors, verify that they have the same number of elements.
1101  if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1102  return nullptr;
1103  }
1104 
1105  // Test if a CmpInst instruction is used exclusively by a select as
1106  // part of a minimum or maximum operation. If so, refrain from doing
1107  // any other folding. This helps out other analyses which understand
1108  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
1109  // and CodeGen. And in this case, at least one of the comparison
1110  // operands has at least one user besides the compare (the select),
1111  // which would often largely negate the benefit of folding anyway.
1112  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
1113  if (CI->hasOneUse()) {
1114  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1115 
1116  // FIXME: This is a hack to avoid infinite looping with min/max patterns.
1117  // We have to ensure that vector constants that only differ with
1118  // undef elements are treated as equivalent.
1119  auto areLooselyEqual = [](Value *A, Value *B) {
1120  if (A == B)
1121  return true;
1122 
1123  // Test for vector constants.
1124  Constant *ConstA, *ConstB;
1125  if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
1126  return false;
1127 
1128  // TODO: Deal with FP constants?
1129  if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
1130  return false;
1131 
1132  // Compare for equality including undefs as equal.
1133  auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
1134  const APInt *C;
1135  return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
1136  };
1137 
1138  if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
1139  (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
1140  return nullptr;
1141  }
1142  }
1143 
1144  // Make sure that one of the select arms constant folds successfully.
1147  if (!NewTV && !NewFV)
1148  return nullptr;
1149 
1150  // Create an instruction for the arm that did not fold.
1151  if (!NewTV)
1153  if (!NewFV)
1155  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1156 }
1157 
1160  bool ConstIsRHS = isa<Constant>(I->getOperand(1));
1161  Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
1162 
1163  Value *Op0 = InV, *Op1 = C;
1164  if (!ConstIsRHS)
1165  std::swap(Op0, Op1);
1166 
1167  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo");
1168  auto *FPInst = dyn_cast<Instruction>(RI);
1169  if (FPInst && isa<FPMathOperator>(FPInst))
1170  FPInst->copyFastMathFlags(I);
1171  return RI;
1172 }
1173 
1175  unsigned NumPHIValues = PN->getNumIncomingValues();
1176  if (NumPHIValues == 0)
1177  return nullptr;
1178 
1179  // We normally only transform phis with a single use. However, if a PHI has
1180  // multiple uses and they are all the same operation, we can fold *all* of the
1181  // uses into the PHI.
1182  if (!PN->hasOneUse()) {
1183  // Walk the use list for the instruction, comparing them to I.
1184  for (User *U : PN->users()) {
1185  Instruction *UI = cast<Instruction>(U);
1186  if (UI != &I && !I.isIdenticalTo(UI))
1187  return nullptr;
1188  }
1189  // Otherwise, we can replace *all* users with the new PHI we form.
1190  }
1191 
1192  // Check to see if all of the operands of the PHI are simple constants
1193  // (constantint/constantfp/undef). If there is one non-constant value,
1194  // remember the BB it is in. If there is more than one or if *it* is a PHI,
1195  // bail out. We don't do arbitrary constant expressions here because moving
1196  // their computation can be expensive without a cost model.
1197  BasicBlock *NonConstBB = nullptr;
1198  for (unsigned i = 0; i != NumPHIValues; ++i) {
1199  Value *InVal = PN->getIncomingValue(i);
1200  // For non-freeze, require constant operand
1201  // For freeze, require non-undef, non-poison operand
1202  if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
1203  continue;
1204  if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
1205  continue;
1206 
1207  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1208  if (NonConstBB) return nullptr; // More than one non-const value.
1209 
1210  NonConstBB = PN->getIncomingBlock(i);
1211 
1212  // If the InVal is an invoke at the end of the pred block, then we can't
1213  // insert a computation after it without breaking the edge.
1214  if (isa<InvokeInst>(InVal))
1215  if (cast<Instruction>(InVal)->getParent() == NonConstBB)
1216  return nullptr;
1217 
1218  // If the incoming non-constant value is reachable from the phis block,
1219  // we'll push the operation across a loop backedge. This could result in
1220  // an infinite combine loop, and is generally non-profitable (especially
1221  // if the operation was originally outside the loop).
1222  if (isPotentiallyReachable(PN->getParent(), NonConstBB, nullptr, &DT, LI))
1223  return nullptr;
1224  }
1225 
1226  // If there is exactly one non-constant value, we can insert a copy of the
1227  // operation in that block. However, if this is a critical edge, we would be
1228  // inserting the computation on some other paths (e.g. inside a loop). Only
1229  // do this if the pred block is unconditionally branching into the phi block.
1230  // Also, make sure that the pred block is not dead code.
1231  if (NonConstBB != nullptr) {
1232  BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
1233  if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
1234  return nullptr;
1235  }
1236 
1237  // Okay, we can do the transformation: create the new PHI node.
1238  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1239  InsertNewInstBefore(NewPN, *PN);
1240  NewPN->takeName(PN);
1241 
1242  // If we are going to have to insert a new computation, do so right before the
1243  // predecessor's terminator.
1244  if (NonConstBB)
1245  Builder.SetInsertPoint(NonConstBB->getTerminator());
1246 
1247  // Next, add all of the operands to the PHI.
1248  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
1249  // We only currently try to fold the condition of a select when it is a phi,
1250  // not the true/false values.
1251  Value *TrueV = SI->getTrueValue();
1252  Value *FalseV = SI->getFalseValue();
1253  BasicBlock *PhiTransBB = PN->getParent();
1254  for (unsigned i = 0; i != NumPHIValues; ++i) {
1255  BasicBlock *ThisBB = PN->getIncomingBlock(i);
1256  Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
1257  Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
1258  Value *InV = nullptr;
1259  // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
1260  // even if currently isNullValue gives false.
1261  Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
1262  // For vector constants, we cannot use isNullValue to fold into
1263  // FalseVInPred versus TrueVInPred. When we have individual nonzero
1264  // elements in the vector, we will incorrectly fold InC to
1265  // `TrueVInPred`.
1266  if (InC && isa<ConstantInt>(InC))
1267  InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
1268  else {
1269  // Generate the select in the same block as PN's current incoming block.
1270  // Note: ThisBB need not be the NonConstBB because vector constants
1271  // which are constants by definition are handled here.
1272  // FIXME: This can lead to an increase in IR generation because we might
1273  // generate selects for vector constant phi operand, that could not be
1274  // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
1275  // non-vector phis, this transformation was always profitable because
1276  // the select would be generated exactly once in the NonConstBB.
1277  Builder.SetInsertPoint(ThisBB->getTerminator());
1278  InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
1279  FalseVInPred, "phi.sel");
1280  }
1281  NewPN->addIncoming(InV, ThisBB);
1282  }
1283  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
1284  Constant *C = cast<Constant>(I.getOperand(1));
1285  for (unsigned i = 0; i != NumPHIValues; ++i) {
1286  Value *InV = nullptr;
1287  if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1288  InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
1289  else
1290  InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i),
1291  C, "phi.cmp");
1292  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1293  }
1294  } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
1295  for (unsigned i = 0; i != NumPHIValues; ++i) {
1297  Builder);
1298  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1299  }
1300  } else if (isa<FreezeInst>(&I)) {
1301  for (unsigned i = 0; i != NumPHIValues; ++i) {
1302  Value *InV;
1303  if (NonConstBB == PN->getIncomingBlock(i))
1304  InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr");
1305  else
1306  InV = PN->getIncomingValue(i);
1307  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1308  }
1309  } else if (auto *EV = dyn_cast<ExtractValueInst>(&I)) {
1310  for (unsigned i = 0; i != NumPHIValues; ++i)
1311  NewPN->addIncoming(Builder.CreateExtractValue(PN->getIncomingValue(i),
1312  EV->getIndices(), "phi.ev"),
1313  PN->getIncomingBlock(i));
1314  } else {
1315  CastInst *CI = cast<CastInst>(&I);
1316  Type *RetTy = CI->getType();
1317  for (unsigned i = 0; i != NumPHIValues; ++i) {
1318  Value *InV;
1319  if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1320  InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
1321  else
1322  InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
1323  I.getType(), "phi.cast");
1324  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1325  }
1326  }
1327 
1328  for (User *U : make_early_inc_range(PN->users())) {
1329  Instruction *User = cast<Instruction>(U);
1330  if (User == &I) continue;
1331  replaceInstUsesWith(*User, NewPN);
1332  eraseInstFromFunction(*User);
1333  }
1334  return replaceInstUsesWith(I, NewPN);
1335 }
1336 
1338  // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1339  // we are guarding against replicating the binop in >1 predecessor.
1340  // This could miss matching a phi with 2 constant incoming values.
1341  auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1342  auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1343  if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1344  Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1345  return nullptr;
1346 
1347  // TODO: Remove the restriction for binop being in the same block as the phis.
1348  if (BO.getParent() != Phi0->getParent() ||
1349  BO.getParent() != Phi1->getParent())
1350  return nullptr;
1351 
1352  // Match a pair of incoming constants for one of the predecessor blocks.
1353  BasicBlock *ConstBB, *OtherBB;
1354  Constant *C0, *C1;
1355  if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1356  ConstBB = Phi0->getIncomingBlock(0);
1357  OtherBB = Phi0->getIncomingBlock(1);
1358  } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1359  ConstBB = Phi0->getIncomingBlock(1);
1360  OtherBB = Phi0->getIncomingBlock(0);
1361  } else {
1362  return nullptr;
1363  }
1364  if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1365  return nullptr;
1366 
1367  // The block that we are hoisting to must reach here unconditionally.
1368  // Otherwise, we could be speculatively executing an expensive or
1369  // non-speculative op.
1370  auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1371  if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1372  !DT.isReachableFromEntry(OtherBB))
1373  return nullptr;
1374 
1375  // TODO: This check could be tightened to only apply to binops (div/rem) that
1376  // are not safe to speculatively execute. But that could allow hoisting
1377  // potentially expensive instructions (fdiv for example).
1378  for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1380  return nullptr;
1381 
1382  // Fold constants for the predecessor block with constant incoming values.
1383  Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1384  if (!NewC)
1385  return nullptr;
1386 
1387  // Make a new binop in the predecessor block with the non-constant incoming
1388  // values.
1389  Builder.SetInsertPoint(PredBlockBranch);
1390  Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1391  Phi0->getIncomingValueForBlock(OtherBB),
1392  Phi1->getIncomingValueForBlock(OtherBB));
1393  if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1394  NotFoldedNewBO->copyIRFlags(&BO);
1395 
1396  // Replace the binop with a phi of the new values. The old phis are dead.
1397  PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1398  NewPhi->addIncoming(NewBO, OtherBB);
1399  NewPhi->addIncoming(NewC, ConstBB);
1400  return NewPhi;
1401 }
1402 
1404  if (!isa<Constant>(I.getOperand(1)))
1405  return nullptr;
1406 
1407  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1408  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1409  return NewSel;
1410  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1411  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1412  return NewPhi;
1413  }
1414  return nullptr;
1415 }
1416 
1417 /// Given a pointer type and a constant offset, determine whether or not there
1418 /// is a sequence of GEP indices into the pointed type that will land us at the
1419 /// specified offset. If so, fill them into NewIndices and return the resultant
1420 /// element type, otherwise return null.
1421 static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
1422  SmallVectorImpl<Value *> &NewIndices,
1423  const DataLayout &DL) {
1424  // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
1425  Type *Ty = PtrTy->getNonOpaquePointerElementType();
1426  if (!Ty->isSized())
1427  return nullptr;
1428 
1429  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
1430  SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
1431  if (!Offset.isZero())
1432  return nullptr;
1433 
1434  for (const APInt &Index : Indices)
1435  NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
1436  return Ty;
1437 }
1438 
1440  // If this GEP has only 0 indices, it is the same pointer as
1441  // Src. If Src is not a trivial GEP too, don't combine
1442  // the indices.
1443  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1444  !Src.hasOneUse())
1445  return false;
1446  return true;
1447 }
1448 
1449 /// Return a value X such that Val = X * Scale, or null if none.
1450 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1451 Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1452  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1453  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1454  Scale.getBitWidth() && "Scale not compatible with value!");
1455 
1456  // If Val is zero or Scale is one then Val = Val * Scale.
1457  if (match(Val, m_Zero()) || Scale == 1) {
1458  NoSignedWrap = true;
1459  return Val;
1460  }
1461 
1462  // If Scale is zero then it does not divide Val.
1463  if (Scale.isMinValue())
1464  return nullptr;
1465 
1466  // Look through chains of multiplications, searching for a constant that is
1467  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1468  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1469  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1470  // down from Val:
1471  //
1472  // Val = M1 * X || Analysis starts here and works down
1473  // M1 = M2 * Y || Doesn't descend into terms with more
1474  // M2 = Z * 4 \/ than one use
1475  //
1476  // Then to modify a term at the bottom:
1477  //
1478  // Val = M1 * X
1479  // M1 = Z * Y || Replaced M2 with Z
1480  //
1481  // Then to work back up correcting nsw flags.
1482 
1483  // Op - the term we are currently analyzing. Starts at Val then drills down.
1484  // Replaced with its descaled value before exiting from the drill down loop.
1485  Value *Op = Val;
1486 
1487  // Parent - initially null, but after drilling down notes where Op came from.
1488  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1489  // 0'th operand of Val.
1490  std::pair<Instruction *, unsigned> Parent;
1491 
1492  // Set if the transform requires a descaling at deeper levels that doesn't
1493  // overflow.
1494  bool RequireNoSignedWrap = false;
1495 
1496  // Log base 2 of the scale. Negative if not a power of 2.
1497  int32_t logScale = Scale.exactLogBase2();
1498 
1499  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1500  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1501  // If Op is a constant divisible by Scale then descale to the quotient.
1502  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1503  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1504  if (!Remainder.isMinValue())
1505  // Not divisible by Scale.
1506  return nullptr;
1507  // Replace with the quotient in the parent.
1508  Op = ConstantInt::get(CI->getType(), Quotient);
1509  NoSignedWrap = true;
1510  break;
1511  }
1512 
1513  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1514  if (BO->getOpcode() == Instruction::Mul) {
1515  // Multiplication.
1516  NoSignedWrap = BO->hasNoSignedWrap();
1517  if (RequireNoSignedWrap && !NoSignedWrap)
1518  return nullptr;
1519 
1520  // There are three cases for multiplication: multiplication by exactly
1521  // the scale, multiplication by a constant different to the scale, and
1522  // multiplication by something else.
1523  Value *LHS = BO->getOperand(0);
1524  Value *RHS = BO->getOperand(1);
1525 
1526  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1527  // Multiplication by a constant.
1528  if (CI->getValue() == Scale) {
1529  // Multiplication by exactly the scale, replace the multiplication
1530  // by its left-hand side in the parent.
1531  Op = LHS;
1532  break;
1533  }
1534 
1535  // Otherwise drill down into the constant.
1536  if (!Op->hasOneUse())
1537  return nullptr;
1538 
1539  Parent = std::make_pair(BO, 1);
1540  continue;
1541  }
1542 
1543  // Multiplication by something else. Drill down into the left-hand side
1544  // since that's where the reassociate pass puts the good stuff.
1545  if (!Op->hasOneUse())
1546  return nullptr;
1547 
1548  Parent = std::make_pair(BO, 0);
1549  continue;
1550  }
1551 
1552  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1553  isa<ConstantInt>(BO->getOperand(1))) {
1554  // Multiplication by a power of 2.
1555  NoSignedWrap = BO->hasNoSignedWrap();
1556  if (RequireNoSignedWrap && !NoSignedWrap)
1557  return nullptr;
1558 
1559  Value *LHS = BO->getOperand(0);
1560  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1561  getLimitedValue(Scale.getBitWidth());
1562  // Op = LHS << Amt.
1563 
1564  if (Amt == logScale) {
1565  // Multiplication by exactly the scale, replace the multiplication
1566  // by its left-hand side in the parent.
1567  Op = LHS;
1568  break;
1569  }
1570  if (Amt < logScale || !Op->hasOneUse())
1571  return nullptr;
1572 
1573  // Multiplication by more than the scale. Reduce the multiplying amount
1574  // by the scale in the parent.
1575  Parent = std::make_pair(BO, 1);
1576  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1577  break;
1578  }
1579  }
1580 
1581  if (!Op->hasOneUse())
1582  return nullptr;
1583 
1584  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1585  if (Cast->getOpcode() == Instruction::SExt) {
1586  // Op is sign-extended from a smaller type, descale in the smaller type.
1587  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1588  APInt SmallScale = Scale.trunc(SmallSize);
1589  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1590  // descale Op as (sext Y) * Scale. In order to have
1591  // sext (Y * SmallScale) = (sext Y) * Scale
1592  // some conditions need to hold however: SmallScale must sign-extend to
1593  // Scale and the multiplication Y * SmallScale should not overflow.
1594  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1595  // SmallScale does not sign-extend to Scale.
1596  return nullptr;
1597  assert(SmallScale.exactLogBase2() == logScale);
1598  // Require that Y * SmallScale must not overflow.
1599  RequireNoSignedWrap = true;
1600 
1601  // Drill down through the cast.
1602  Parent = std::make_pair(Cast, 0);
1603  Scale = SmallScale;
1604  continue;
1605  }
1606 
1607  if (Cast->getOpcode() == Instruction::Trunc) {
1608  // Op is truncated from a larger type, descale in the larger type.
1609  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1610  // trunc (Y * sext Scale) = (trunc Y) * Scale
1611  // always holds. However (trunc Y) * Scale may overflow even if
1612  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1613  // from this point up in the expression (see later).
1614  if (RequireNoSignedWrap)
1615  return nullptr;
1616 
1617  // Drill down through the cast.
1618  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1619  Parent = std::make_pair(Cast, 0);
1620  Scale = Scale.sext(LargeSize);
1621  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1622  logScale = -1;
1623  assert(Scale.exactLogBase2() == logScale);
1624  continue;
1625  }
1626  }
1627 
1628  // Unsupported expression, bail out.
1629  return nullptr;
1630  }
1631 
1632  // If Op is zero then Val = Op * Scale.
1633  if (match(Op, m_Zero())) {
1634  NoSignedWrap = true;
1635  return Op;
1636  }
1637 
1638  // We know that we can successfully descale, so from here on we can safely
1639  // modify the IR. Op holds the descaled version of the deepest term in the
1640  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1641  // not to overflow.
1642 
1643  if (!Parent.first)
1644  // The expression only had one term.
1645  return Op;
1646 
1647  // Rewrite the parent using the descaled version of its operand.
1648  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1649  assert(Op != Parent.first->getOperand(Parent.second) &&
1650  "Descaling was a no-op?");
1651  replaceOperand(*Parent.first, Parent.second, Op);
1652  Worklist.push(Parent.first);
1653 
1654  // Now work back up the expression correcting nsw flags. The logic is based
1655  // on the following observation: if X * Y is known not to overflow as a signed
1656  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1657  // then X * Z will not overflow as a signed multiplication either. As we work
1658  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1659  // current level has strictly smaller absolute value than the original.
1660  Instruction *Ancestor = Parent.first;
1661  do {
1662  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1663  // If the multiplication wasn't nsw then we can't say anything about the
1664  // value of the descaled multiplication, and we have to clear nsw flags
1665  // from this point on up.
1666  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1667  NoSignedWrap &= OpNoSignedWrap;
1668  if (NoSignedWrap != OpNoSignedWrap) {
1669  BO->setHasNoSignedWrap(NoSignedWrap);
1670  Worklist.push(Ancestor);
1671  }
1672  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1673  // The fact that the descaled input to the trunc has smaller absolute
1674  // value than the original input doesn't tell us anything useful about
1675  // the absolute values of the truncations.
1676  NoSignedWrap = false;
1677  }
1678  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1679  "Failed to keep proper track of nsw flags while drilling down?");
1680 
1681  if (Ancestor == Val)
1682  // Got to the top, all done!
1683  return Val;
1684 
1685  // Move up one level in the expression.
1686  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1687  Ancestor = Ancestor->user_back();
1688  } while (true);
1689 }
1690 
1692  if (!isa<VectorType>(Inst.getType()))
1693  return nullptr;
1694 
1695  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1696  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1697  assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1698  cast<VectorType>(Inst.getType())->getElementCount());
1699  assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1700  cast<VectorType>(Inst.getType())->getElementCount());
1701 
1702  // If both operands of the binop are vector concatenations, then perform the
1703  // narrow binop on each pair of the source operands followed by concatenation
1704  // of the results.
1705  Value *L0, *L1, *R0, *R1;
1707  if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1709  LHS->hasOneUse() && RHS->hasOneUse() &&
1710  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1711  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1712  // This transform does not have the speculative execution constraint as
1713  // below because the shuffle is a concatenation. The new binops are
1714  // operating on exactly the same elements as the existing binop.
1715  // TODO: We could ease the mask requirement to allow different undef lanes,
1716  // but that requires an analysis of the binop-with-undef output value.
1717  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1718  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1719  BO->copyIRFlags(&Inst);
1720  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1721  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1722  BO->copyIRFlags(&Inst);
1723  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1724  }
1725 
1726  // It may not be safe to reorder shuffles and things like div, urem, etc.
1727  // because we may trap when executing those ops on unknown vector elements.
1728  // See PR20059.
1729  if (!isSafeToSpeculativelyExecute(&Inst))
1730  return nullptr;
1731 
1732  auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1733  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1734  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1735  BO->copyIRFlags(&Inst);
1736  return new ShuffleVectorInst(XY, M);
1737  };
1738 
1739  // If both arguments of the binary operation are shuffles that use the same
1740  // mask and shuffle within a single vector, move the shuffle after the binop.
1741  Value *V1, *V2;
1742  if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1744  V1->getType() == V2->getType() &&
1745  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1746  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1747  return createBinOpShuffle(V1, V2, Mask);
1748  }
1749 
1750  // If both arguments of a commutative binop are select-shuffles that use the
1751  // same mask with commuted operands, the shuffles are unnecessary.
1752  if (Inst.isCommutative() &&
1753  match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1754  match(RHS,
1756  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1757  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1758  // TODO: Allow shuffles that contain undefs in the mask?
1759  // That is legal, but it reduces undef knowledge.
1760  // TODO: Allow arbitrary shuffles by shuffling after binop?
1761  // That might be legal, but we have to deal with poison.
1762  if (LShuf->isSelect() &&
1763  !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
1764  RShuf->isSelect() &&
1765  !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
1766  // Example:
1767  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1768  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1769  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1770  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1771  NewBO->copyIRFlags(&Inst);
1772  return NewBO;
1773  }
1774  }
1775 
1776  // If one argument is a shuffle within one vector and the other is a constant,
1777  // try moving the shuffle after the binary operation. This canonicalization
1778  // intends to move shuffles closer to other shuffles and binops closer to
1779  // other binops, so they can be folded. It may also enable demanded elements
1780  // transforms.
1781  Constant *C;
1782  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1783  if (InstVTy &&
1784  match(&Inst,
1786  m_ImmConstant(C))) &&
1787  cast<FixedVectorType>(V1->getType())->getNumElements() <=
1788  InstVTy->getNumElements()) {
1789  assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1790  "Shuffle should not change scalar type");
1791 
1792  // Find constant NewC that has property:
1793  // shuffle(NewC, ShMask) = C
1794  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1795  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1796  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1797  bool ConstOp1 = isa<Constant>(RHS);
1798  ArrayRef<int> ShMask = Mask;
1799  unsigned SrcVecNumElts =
1800  cast<FixedVectorType>(V1->getType())->getNumElements();
1801  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1802  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1803  bool MayChange = true;
1804  unsigned NumElts = InstVTy->getNumElements();
1805  for (unsigned I = 0; I < NumElts; ++I) {
1806  Constant *CElt = C->getAggregateElement(I);
1807  if (ShMask[I] >= 0) {
1808  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1809  Constant *NewCElt = NewVecC[ShMask[I]];
1810  // Bail out if:
1811  // 1. The constant vector contains a constant expression.
1812  // 2. The shuffle needs an element of the constant vector that can't
1813  // be mapped to a new constant vector.
1814  // 3. This is a widening shuffle that copies elements of V1 into the
1815  // extended elements (extending with undef is allowed).
1816  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1817  I >= SrcVecNumElts) {
1818  MayChange = false;
1819  break;
1820  }
1821  NewVecC[ShMask[I]] = CElt;
1822  }
1823  // If this is a widening shuffle, we must be able to extend with undef
1824  // elements. If the original binop does not produce an undef in the high
1825  // lanes, then this transform is not safe.
1826  // Similarly for undef lanes due to the shuffle mask, we can only
1827  // transform binops that preserve undef.
1828  // TODO: We could shuffle those non-undef constant values into the
1829  // result by using a constant vector (rather than an undef vector)
1830  // as operand 1 of the new binop, but that might be too aggressive
1831  // for target-independent shuffle creation.
1832  if (I >= SrcVecNumElts || ShMask[I] < 0) {
1833  Constant *MaybeUndef =
1834  ConstOp1
1835  ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1836  : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1837  if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1838  MayChange = false;
1839  break;
1840  }
1841  }
1842  }
1843  if (MayChange) {
1844  Constant *NewC = ConstantVector::get(NewVecC);
1845  // It may not be safe to execute a binop on a vector with undef elements
1846  // because the entire instruction can be folded to undef or create poison
1847  // that did not exist in the original code.
1848  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1849  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1850 
1851  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1852  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1853  Value *NewLHS = ConstOp1 ? V1 : NewC;
1854  Value *NewRHS = ConstOp1 ? NewC : V1;
1855  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1856  }
1857  }
1858 
1859  // Try to reassociate to sink a splat shuffle after a binary operation.
1860  if (Inst.isAssociative() && Inst.isCommutative()) {
1861  // Canonicalize shuffle operand as LHS.
1862  if (isa<ShuffleVectorInst>(RHS))
1863  std::swap(LHS, RHS);
1864 
1865  Value *X;
1866  ArrayRef<int> MaskC;
1867  int SplatIndex;
1868  Value *Y, *OtherOp;
1869  if (!match(LHS,
1870  m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1871  !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1872  X->getType() != Inst.getType() ||
1873  !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1874  return nullptr;
1875 
1876  // FIXME: This may not be safe if the analysis allows undef elements. By
1877  // moving 'Y' before the splat shuffle, we are implicitly assuming
1878  // that it is not undef/poison at the splat index.
1879  if (isSplatValue(OtherOp, SplatIndex)) {
1880  std::swap(Y, OtherOp);
1881  } else if (!isSplatValue(Y, SplatIndex)) {
1882  return nullptr;
1883  }
1884 
1885  // X and Y are splatted values, so perform the binary operation on those
1886  // values followed by a splat followed by the 2nd binary operation:
1887  // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1888  Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1889  SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1890  Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1891  Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1892 
1893  // Intersect FMF on both new binops. Other (poison-generating) flags are
1894  // dropped to be safe.
1895  if (isa<FPMathOperator>(R)) {
1896  R->copyFastMathFlags(&Inst);
1897  R->andIRFlags(RHS);
1898  }
1899  if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1900  NewInstBO->copyIRFlags(R);
1901  return R;
1902  }
1903 
1904  return nullptr;
1905 }
1906 
1907 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1908 /// of a value. This requires a potentially expensive known bits check to make
1909 /// sure the narrow op does not overflow.
1910 Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1911  // We need at least one extended operand.
1912  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1913 
1914  // If this is a sub, we swap the operands since we always want an extension
1915  // on the RHS. The LHS can be an extension or a constant.
1916  if (BO.getOpcode() == Instruction::Sub)
1917  std::swap(Op0, Op1);
1918 
1919  Value *X;
1920  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1921  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1922  return nullptr;
1923 
1924  // If both operands are the same extension from the same source type and we
1925  // can eliminate at least one (hasOneUse), this might work.
1926  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1927  Value *Y;
1928  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1929  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1930  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1931  // If that did not match, see if we have a suitable constant operand.
1932  // Truncating and extending must produce the same constant.
1933  Constant *WideC;
1934  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1935  return nullptr;
1936  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1937  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1938  return nullptr;
1939  Y = NarrowC;
1940  }
1941 
1942  // Swap back now that we found our operands.
1943  if (BO.getOpcode() == Instruction::Sub)
1944  std::swap(X, Y);
1945 
1946  // Both operands have narrow versions. Last step: the math must not overflow
1947  // in the narrow width.
1948  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1949  return nullptr;
1950 
1951  // bo (ext X), (ext Y) --> ext (bo X, Y)
1952  // bo (ext X), C --> ext (bo X, C')
1953  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1954  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1955  if (IsSext)
1956  NewBinOp->setHasNoSignedWrap();
1957  else
1958  NewBinOp->setHasNoUnsignedWrap();
1959  }
1960  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1961 }
1962 
1963 static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
1964  // At least one GEP must be inbounds.
1965  if (!GEP1.isInBounds() && !GEP2.isInBounds())
1966  return false;
1967 
1968  return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1969  (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1970 }
1971 
1972 /// Thread a GEP operation with constant indices through the constant true/false
1973 /// arms of a select.
1976  if (!GEP.hasAllConstantIndices())
1977  return nullptr;
1978 
1979  Instruction *Sel;
1980  Value *Cond;
1981  Constant *TrueC, *FalseC;
1982  if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1983  !match(Sel,
1984  m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1985  return nullptr;
1986 
1987  // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1988  // Propagate 'inbounds' and metadata from existing instructions.
1989  // Note: using IRBuilder to create the constants for efficiency.
1990  SmallVector<Value *, 4> IndexC(GEP.indices());
1991  bool IsInBounds = GEP.isInBounds();
1992  Type *Ty = GEP.getSourceElementType();
1993  Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1994  Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1995  return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1996 }
1997 
1999  GEPOperator *Src) {
2000  // Combine Indices - If the source pointer to this getelementptr instruction
2001  // is a getelementptr instruction with matching element type, combine the
2002  // indices of the two getelementptr instructions into a single instruction.
2003  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2004  return nullptr;
2005 
2006  if (Src->getResultElementType() == GEP.getSourceElementType() &&
2007  Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
2008  Src->hasOneUse()) {
2009  Value *GO1 = GEP.getOperand(1);
2010  Value *SO1 = Src->getOperand(1);
2011 
2012  if (LI) {
2013  // Try to reassociate loop invariant GEP chains to enable LICM.
2014  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
2015  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
2016  // invariant: this breaks the dependence between GEPs and allows LICM
2017  // to hoist the invariant part out of the loop.
2018  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
2019  // The swapped GEPs are inbounds if both original GEPs are inbounds
2020  // and the sign of the offsets is the same. For simplicity, only
2021  // handle both offsets being non-negative.
2022  bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
2023  isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
2024  isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
2025  // Put NewSrc at same location as %src.
2026  Builder.SetInsertPoint(cast<Instruction>(Src));
2027  Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
2028  Src->getPointerOperand(), GO1,
2029  Src->getName(), IsInBounds);
2031  GEP.getSourceElementType(), NewSrc, {SO1});
2032  NewGEP->setIsInBounds(IsInBounds);
2033  return NewGEP;
2034  }
2035  }
2036  }
2037  }
2038 
2039  // Note that if our source is a gep chain itself then we wait for that
2040  // chain to be resolved before we perform this transformation. This
2041  // avoids us creating a TON of code in some cases.
2042  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
2043  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
2044  return nullptr; // Wait until our source is folded to completion.
2045 
2046  // For constant GEPs, use a more general offset-based folding approach.
2047  // Only do this for opaque pointers, as the result element type may change.
2048  Type *PtrTy = Src->getType()->getScalarType();
2049  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
2050  (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2051  // Split Src into a variable part and a constant suffix.
2052  gep_type_iterator GTI = gep_type_begin(*Src);
2053  Type *BaseType = GTI.getIndexedType();
2054  bool IsFirstType = true;
2055  unsigned NumVarIndices = 0;
2056  for (auto Pair : enumerate(Src->indices())) {
2057  if (!isa<ConstantInt>(Pair.value())) {
2058  BaseType = GTI.getIndexedType();
2059  IsFirstType = false;
2060  NumVarIndices = Pair.index() + 1;
2061  }
2062  ++GTI;
2063  }
2064 
2065  // Determine the offset for the constant suffix of Src.
2066  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
2067  if (NumVarIndices != Src->getNumIndices()) {
2068  // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2069  if (isa<ScalableVectorType>(BaseType))
2070  return nullptr;
2071 
2072  SmallVector<Value *> ConstantIndices;
2073  if (!IsFirstType)
2074  ConstantIndices.push_back(
2076  append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2077  Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2078  }
2079 
2080  // Add the offset for GEP (which is fully constant).
2081  if (!GEP.accumulateConstantOffset(DL, Offset))
2082  return nullptr;
2083 
2084  APInt OffsetOld = Offset;
2085  // Convert the total offset back into indices.
2086  SmallVector<APInt> ConstIndices =
2087  DL.getGEPIndicesForOffset(BaseType, Offset);
2088  if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2089  // If both GEP are constant-indexed, and cannot be merged in either way,
2090  // convert them to a GEP of i8.
2091  if (Src->hasAllConstantIndices())
2092  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2094  Builder.getInt8Ty(), Src->getOperand(0),
2095  Builder.getInt(OffsetOld), GEP.getName())
2097  Builder.getInt8Ty(), Src->getOperand(0),
2098  Builder.getInt(OffsetOld), GEP.getName());
2099  return nullptr;
2100  }
2101 
2102  bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2103  SmallVector<Value *> Indices;
2104  append_range(Indices, drop_end(Src->indices(),
2105  Src->getNumIndices() - NumVarIndices));
2106  for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2107  Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2108  // Even if the total offset is inbounds, we may end up representing it
2109  // by first performing a larger negative offset, and then a smaller
2110  // positive one. The large negative offset might go out of bounds. Only
2111  // preserve inbounds if all signs are the same.
2112  IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2113  }
2114 
2115  return IsInBounds
2116  ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
2117  Src->getOperand(0), Indices,
2118  GEP.getName())
2119  : GetElementPtrInst::Create(Src->getSourceElementType(),
2120  Src->getOperand(0), Indices,
2121  GEP.getName());
2122  }
2123 
2124  if (Src->getResultElementType() != GEP.getSourceElementType())
2125  return nullptr;
2126 
2127  SmallVector<Value*, 8> Indices;
2128 
2129  // Find out whether the last index in the source GEP is a sequential idx.
2130  bool EndsWithSequential = false;
2131  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2132  I != E; ++I)
2133  EndsWithSequential = I.isSequential();
2134 
2135  // Can we combine the two pointer arithmetics offsets?
2136  if (EndsWithSequential) {
2137  // Replace: gep (gep %P, long B), long A, ...
2138  // With: T = long A+B; gep %P, T, ...
2139  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2140  Value *GO1 = GEP.getOperand(1);
2141 
2142  // If they aren't the same type, then the input hasn't been processed
2143  // by the loop above yet (which canonicalizes sequential index types to
2144  // intptr_t). Just avoid transforming this until the input has been
2145  // normalized.
2146  if (SO1->getType() != GO1->getType())
2147  return nullptr;
2148 
2149  Value *Sum =
2150  simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2151  // Only do the combine when we are sure the cost after the
2152  // merge is never more than that before the merge.
2153  if (Sum == nullptr)
2154  return nullptr;
2155 
2156  // Update the GEP in place if possible.
2157  if (Src->getNumOperands() == 2) {
2158  GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2159  replaceOperand(GEP, 0, Src->getOperand(0));
2160  replaceOperand(GEP, 1, Sum);
2161  return &GEP;
2162  }
2163  Indices.append(Src->op_begin()+1, Src->op_end()-1);
2164  Indices.push_back(Sum);
2165  Indices.append(GEP.op_begin()+2, GEP.op_end());
2166  } else if (isa<Constant>(*GEP.idx_begin()) &&
2167  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2168  Src->getNumOperands() != 1) {
2169  // Otherwise we can do the fold if the first index of the GEP is a zero
2170  Indices.append(Src->op_begin()+1, Src->op_end());
2171  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2172  }
2173 
2174  if (!Indices.empty())
2175  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2177  Src->getSourceElementType(), Src->getOperand(0), Indices,
2178  GEP.getName())
2179  : GetElementPtrInst::Create(Src->getSourceElementType(),
2180  Src->getOperand(0), Indices,
2181  GEP.getName());
2182 
2183  return nullptr;
2184 }
2185 
2186 // Note that we may have also stripped an address space cast in between.
2189  // With opaque pointers, there is no pointer element type we can use to
2190  // adjust the GEP type.
2191  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2192  if (SrcType->isOpaque())
2193  return nullptr;
2194 
2195  Type *GEPEltType = GEP.getSourceElementType();
2196  Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
2197  Value *SrcOp = BCI->getOperand(0);
2198 
2199  // GEP directly using the source operand if this GEP is accessing an element
2200  // of a bitcasted pointer to vector or array of the same dimensions:
2201  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2202  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2203  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
2204  const DataLayout &DL) {
2205  auto *VecVTy = cast<FixedVectorType>(VecTy);
2206  return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
2207  ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
2208  DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
2209  };
2210  if (GEP.getNumOperands() == 3 &&
2211  ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
2212  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
2213  (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
2214  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
2215 
2216  // Create a new GEP here, as using `setOperand()` followed by
2217  // `setSourceElementType()` won't actually update the type of the
2218  // existing GEP Value. Causing issues if this Value is accessed when
2219  // constructing an AddrSpaceCastInst
2220  SmallVector<Value *, 8> Indices(GEP.indices());
2221  Value *NGEP =
2222  Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
2223  NGEP->takeName(&GEP);
2224 
2225  // Preserve GEP address space to satisfy users
2226  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2227  return new AddrSpaceCastInst(NGEP, GEP.getType());
2228 
2229  return replaceInstUsesWith(GEP, NGEP);
2230  }
2231 
2232  // See if we can simplify:
2233  // X = bitcast A* to B*
2234  // Y = gep X, <...constant indices...>
2235  // into a gep of the original struct. This is important for SROA and alias
2236  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2237  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
2238  APInt Offset(OffsetBits, 0);
2239 
2240  // If the bitcast argument is an allocation, The bitcast is for convertion
2241  // to actual type of allocation. Removing such bitcasts, results in having
2242  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
2243  // struct or array hierarchy.
2244  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
2245  // a better chance to succeed.
2246  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
2247  !isAllocationFn(SrcOp, &TLI)) {
2248  // If this GEP instruction doesn't move the pointer, just replace the GEP
2249  // with a bitcast of the real input to the dest type.
2250  if (!Offset) {
2251  // If the bitcast is of an allocation, and the allocation will be
2252  // converted to match the type of the cast, don't touch this.
2253  if (isa<AllocaInst>(SrcOp)) {
2254  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2255  if (Instruction *I = visitBitCast(*BCI)) {
2256  if (I != BCI) {
2257  I->takeName(BCI);
2258  BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2259  replaceInstUsesWith(*BCI, I);
2260  }
2261  return &GEP;
2262  }
2263  }
2264 
2265  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2266  return new AddrSpaceCastInst(SrcOp, GEP.getType());
2267  return new BitCastInst(SrcOp, GEP.getType());
2268  }
2269 
2270  // Otherwise, if the offset is non-zero, we need to find out if there is a
2271  // field at Offset in 'A's type. If so, we can pull the cast through the
2272  // GEP.
2273  SmallVector<Value *, 8> NewIndices;
2274  if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
2275  Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
2276  GEP.isInBounds());
2277 
2278  if (NGEP->getType() == GEP.getType())
2279  return replaceInstUsesWith(GEP, NGEP);
2280  NGEP->takeName(&GEP);
2281 
2282  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2283  return new AddrSpaceCastInst(NGEP, GEP.getType());
2284  return new BitCastInst(NGEP, GEP.getType());
2285  }
2286  }
2287 
2288  return nullptr;
2289 }
2290 
2292  Value *PtrOp = GEP.getOperand(0);
2293  SmallVector<Value *, 8> Indices(GEP.indices());
2294  Type *GEPType = GEP.getType();
2295  Type *GEPEltType = GEP.getSourceElementType();
2296  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2297  if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2298  SQ.getWithInstruction(&GEP)))
2299  return replaceInstUsesWith(GEP, V);
2300 
2301  // For vector geps, use the generic demanded vector support.
2302  // Skip if GEP return type is scalable. The number of elements is unknown at
2303  // compile-time.
2304  if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2305  auto VWidth = GEPFVTy->getNumElements();
2306  APInt UndefElts(VWidth, 0);
2307  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2308  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2309  UndefElts)) {
2310  if (V != &GEP)
2311  return replaceInstUsesWith(GEP, V);
2312  return &GEP;
2313  }
2314 
2315  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2316  // possible (decide on canonical form for pointer broadcast), 3) exploit
2317  // undef elements to decrease demanded bits
2318  }
2319 
2320  // Eliminate unneeded casts for indices, and replace indices which displace
2321  // by multiples of a zero size type with zero.
2322  bool MadeChange = false;
2323 
2324  // Index width may not be the same width as pointer width.
2325  // Data layout chooses the right type based on supported integer types.
2326  Type *NewScalarIndexTy =
2327  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2328 
2330  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2331  ++I, ++GTI) {
2332  // Skip indices into struct types.
2333  if (GTI.isStruct())
2334  continue;
2335 
2336  Type *IndexTy = (*I)->getType();
2337  Type *NewIndexType =
2338  IndexTy->isVectorTy()
2339  ? VectorType::get(NewScalarIndexTy,
2340  cast<VectorType>(IndexTy)->getElementCount())
2341  : NewScalarIndexTy;
2342 
2343  // If the element type has zero size then any index over it is equivalent
2344  // to an index of zero, so replace it with zero if it is not zero already.
2345  Type *EltTy = GTI.getIndexedType();
2346  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2347  if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2348  *I = Constant::getNullValue(NewIndexType);
2349  MadeChange = true;
2350  }
2351 
2352  if (IndexTy != NewIndexType) {
2353  // If we are using a wider index than needed for this platform, shrink
2354  // it to what we need. If narrower, sign-extend it to what we need.
2355  // This explicit cast can make subsequent optimizations more obvious.
2356  *I = Builder.CreateIntCast(*I, NewIndexType, true);
2357  MadeChange = true;
2358  }
2359  }
2360  if (MadeChange)
2361  return &GEP;
2362 
2363  // Check to see if the inputs to the PHI node are getelementptr instructions.
2364  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2365  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2366  if (!Op1)
2367  return nullptr;
2368 
2369  // Don't fold a GEP into itself through a PHI node. This can only happen
2370  // through the back-edge of a loop. Folding a GEP into itself means that
2371  // the value of the previous iteration needs to be stored in the meantime,
2372  // thus requiring an additional register variable to be live, but not
2373  // actually achieving anything (the GEP still needs to be executed once per
2374  // loop iteration).
2375  if (Op1 == &GEP)
2376  return nullptr;
2377 
2378  int DI = -1;
2379 
2380  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2381  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2382  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2383  Op1->getSourceElementType() != Op2->getSourceElementType())
2384  return nullptr;
2385 
2386  // As for Op1 above, don't try to fold a GEP into itself.
2387  if (Op2 == &GEP)
2388  return nullptr;
2389 
2390  // Keep track of the type as we walk the GEP.
2391  Type *CurTy = nullptr;
2392 
2393  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2394  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2395  return nullptr;
2396 
2397  if (Op1->getOperand(J) != Op2->getOperand(J)) {
2398  if (DI == -1) {
2399  // We have not seen any differences yet in the GEPs feeding the
2400  // PHI yet, so we record this one if it is allowed to be a
2401  // variable.
2402 
2403  // The first two arguments can vary for any GEP, the rest have to be
2404  // static for struct slots
2405  if (J > 1) {
2406  assert(CurTy && "No current type?");
2407  if (CurTy->isStructTy())
2408  return nullptr;
2409  }
2410 
2411  DI = J;
2412  } else {
2413  // The GEP is different by more than one input. While this could be
2414  // extended to support GEPs that vary by more than one variable it
2415  // doesn't make sense since it greatly increases the complexity and
2416  // would result in an R+R+R addressing mode which no backend
2417  // directly supports and would need to be broken into several
2418  // simpler instructions anyway.
2419  return nullptr;
2420  }
2421  }
2422 
2423  // Sink down a layer of the type for the next iteration.
2424  if (J > 0) {
2425  if (J == 1) {
2426  CurTy = Op1->getSourceElementType();
2427  } else {
2428  CurTy =
2429  GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2430  }
2431  }
2432  }
2433  }
2434 
2435  // If not all GEPs are identical we'll have to create a new PHI node.
2436  // Check that the old PHI node has only one use so that it will get
2437  // removed.
2438  if (DI != -1 && !PN->hasOneUse())
2439  return nullptr;
2440 
2441  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2442  if (DI == -1) {
2443  // All the GEPs feeding the PHI are identical. Clone one down into our
2444  // BB so that it can be merged with the current GEP.
2445  } else {
2446  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2447  // into the current block so it can be merged, and create a new PHI to
2448  // set that index.
2449  PHINode *NewPN;
2450  {
2452  Builder.SetInsertPoint(PN);
2453  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2454  PN->getNumOperands());
2455  }
2456 
2457  for (auto &I : PN->operands())
2458  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2459  PN->getIncomingBlock(I));
2460 
2461  NewGEP->setOperand(DI, NewPN);
2462  }
2463 
2464  GEP.getParent()->getInstList().insert(
2465  GEP.getParent()->getFirstInsertionPt(), NewGEP);
2466  replaceOperand(GEP, 0, NewGEP);
2467  PtrOp = NewGEP;
2468  }
2469 
2470  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2471  if (Instruction *I = visitGEPOfGEP(GEP, Src))
2472  return I;
2473 
2474  // Skip if GEP source element type is scalable. The type alloc size is unknown
2475  // at compile-time.
2476  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2477  unsigned AS = GEP.getPointerAddressSpace();
2478  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2479  DL.getIndexSizeInBits(AS)) {
2480  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2481 
2482  bool Matched = false;
2483  uint64_t C;
2484  Value *V = nullptr;
2485  if (TyAllocSize == 1) {
2486  V = GEP.getOperand(1);
2487  Matched = true;
2488  } else if (match(GEP.getOperand(1),
2489  m_AShr(m_Value(V), m_ConstantInt(C)))) {
2490  if (TyAllocSize == 1ULL << C)
2491  Matched = true;
2492  } else if (match(GEP.getOperand(1),
2493  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2494  if (TyAllocSize == C)
2495  Matched = true;
2496  }
2497 
2498  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2499  // only if both point to the same underlying object (otherwise provenance
2500  // is not necessarily retained).
2501  Value *Y;
2502  Value *X = GEP.getOperand(0);
2503  if (Matched &&
2507  }
2508  }
2509 
2510  // We do not handle pointer-vector geps here.
2511  if (GEPType->isVectorTy())
2512  return nullptr;
2513 
2514  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
2515  Value *StrippedPtr = PtrOp->stripPointerCasts();
2516  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
2517 
2518  // TODO: The basic approach of these folds is not compatible with opaque
2519  // pointers, because we can't use bitcasts as a hint for a desirable GEP
2520  // type. Instead, we should perform canonicalization directly on the GEP
2521  // type. For now, skip these.
2522  if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
2523  bool HasZeroPointerIndex = false;
2524  Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
2525 
2526  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
2527  HasZeroPointerIndex = C->isZero();
2528 
2529  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
2530  // into : GEP [10 x i8]* X, i32 0, ...
2531  //
2532  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
2533  // into : GEP i8* X, ...
2534  //
2535  // This occurs when the program declares an array extern like "int X[];"
2536  if (HasZeroPointerIndex) {
2537  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
2538  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
2539  if (CATy->getElementType() == StrippedPtrEltTy) {
2540  // -> GEP i8* X, ...
2541  SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
2543  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
2544  Res->setIsInBounds(GEP.isInBounds());
2545  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
2546  return Res;
2547  // Insert Res, and create an addrspacecast.
2548  // e.g.,
2549  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
2550  // ->
2551  // %0 = GEP i8 addrspace(1)* X, ...
2552  // addrspacecast i8 addrspace(1)* %0 to i8*
2553  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
2554  }
2555 
2556  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
2557  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
2558  if (CATy->getElementType() == XATy->getElementType()) {
2559  // -> GEP [10 x i8]* X, i32 0, ...
2560  // At this point, we know that the cast source type is a pointer
2561  // to an array of the same type as the destination pointer
2562  // array. Because the array type is never stepped over (there
2563  // is a leading zero) we can fold the cast into this GEP.
2564  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
2565  GEP.setSourceElementType(XATy);
2566  return replaceOperand(GEP, 0, StrippedPtr);
2567  }
2568  // Cannot replace the base pointer directly because StrippedPtr's
2569  // address space is different. Instead, create a new GEP followed by
2570  // an addrspacecast.
2571  // e.g.,
2572  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2573  // i32 0, ...
2574  // ->
2575  // %0 = GEP [10 x i8] addrspace(1)* X, ...
2576  // addrspacecast i8 addrspace(1)* %0 to i8*
2577  SmallVector<Value *, 8> Idx(GEP.indices());
2578  Value *NewGEP =
2579  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2580  GEP.getName(), GEP.isInBounds());
2581  return new AddrSpaceCastInst(NewGEP, GEPType);
2582  }
2583  }
2584  }
2585  } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
2586  // Skip if GEP source element type is scalable. The type alloc size is
2587  // unknown at compile-time.
2588  // Transform things like: %t = getelementptr i32*
2589  // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
2590  // x i32]* %str, i32 0, i32 %V; bitcast
2591  if (StrippedPtrEltTy->isArrayTy() &&
2592  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2593  DL.getTypeAllocSize(GEPEltType)) {
2594  Type *IdxType = DL.getIndexType(GEPType);
2595  Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
2596  Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2597  GEP.getName(), GEP.isInBounds());
2598 
2599  // V and GEP are both pointer types --> BitCast
2600  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2601  }
2602 
2603  // Transform things like:
2604  // %V = mul i64 %N, 4
2605  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2606  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2607  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2608  // Check that changing the type amounts to dividing the index by a scale
2609  // factor.
2610  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2611  uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
2612  if (ResSize && SrcSize % ResSize == 0) {
2613  Value *Idx = GEP.getOperand(1);
2614  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2615  uint64_t Scale = SrcSize / ResSize;
2616 
2617  // Earlier transforms ensure that the index has the right type
2618  // according to Data Layout, which considerably simplifies the
2619  // logic by eliminating implicit casts.
2620  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2621  "Index type does not match the Data Layout preferences");
2622 
2623  bool NSW;
2624  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2625  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2626  // If the multiplication NewIdx * Scale may overflow then the new
2627  // GEP may not be "inbounds".
2628  Value *NewGEP =
2629  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2630  GEP.getName(), GEP.isInBounds() && NSW);
2631 
2632  // The NewGEP must be pointer typed, so must the old one -> BitCast
2634  GEPType);
2635  }
2636  }
2637  }
2638 
2639  // Similarly, transform things like:
2640  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2641  // (where tmp = 8*tmp2) into:
2642  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2643  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2644  StrippedPtrEltTy->isArrayTy()) {
2645  // Check that changing to the array element type amounts to dividing the
2646  // index by a scale factor.
2647  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
2648  uint64_t ArrayEltSize =
2649  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
2650  .getFixedSize();
2651  if (ResSize && ArrayEltSize % ResSize == 0) {
2652  Value *Idx = GEP.getOperand(1);
2653  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2654  uint64_t Scale = ArrayEltSize / ResSize;
2655 
2656  // Earlier transforms ensure that the index has the right type
2657  // according to the Data Layout, which considerably simplifies
2658  // the logic by eliminating implicit casts.
2659  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2660  "Index type does not match the Data Layout preferences");
2661 
2662  bool NSW;
2663  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2664  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2665  // If the multiplication NewIdx * Scale may overflow then the new
2666  // GEP may not be "inbounds".
2667  Type *IndTy = DL.getIndexType(GEPType);
2668  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2669 
2670  Value *NewGEP =
2671  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2672  GEP.getName(), GEP.isInBounds() && NSW);
2673  // The NewGEP must be pointer typed, so must the old one -> BitCast
2675  GEPType);
2676  }
2677  }
2678  }
2679  }
2680  }
2681 
2682  // addrspacecast between types is canonicalized as a bitcast, then an
2683  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2684  // through the addrspacecast.
2685  Value *ASCStrippedPtrOp = PtrOp;
2686  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2687  // X = bitcast A addrspace(1)* to B addrspace(1)*
2688  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2689  // Z = gep Y, <...constant indices...>
2690  // Into an addrspacecasted GEP of the struct.
2691  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2692  ASCStrippedPtrOp = BC;
2693  }
2694 
2695  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
2696  if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
2697  return I;
2698 
2699  if (!GEP.isInBounds()) {
2700  unsigned IdxWidth =
2701  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2702  APInt BasePtrOffset(IdxWidth, 0);
2703  Value *UnderlyingPtrOp =
2705  BasePtrOffset);
2706  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2707  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2708  BasePtrOffset.isNonNegative()) {
2709  APInt AllocSize(
2710  IdxWidth,
2711  DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
2712  if (BasePtrOffset.ule(AllocSize)) {
2714  GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2715  }
2716  }
2717  }
2718  }
2719 
2720  if (Instruction *R = foldSelectGEP(GEP, Builder))
2721  return R;
2722 
2723  return nullptr;
2724 }
2725 
2727  Instruction *AI) {
2728  if (isa<ConstantPointerNull>(V))
2729  return true;
2730  if (auto *LI = dyn_cast<LoadInst>(V))
2731  return isa<GlobalVariable>(LI->getPointerOperand());
2732  // Two distinct allocations will never be equal.
2733  return isAllocLikeFn(V, &TLI) && V != AI;
2734 }
2735 
2736 /// Given a call CB which uses an address UsedV, return true if we can prove the
2737 /// call's only possible effect is storing to V.
2738 static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2739  const TargetLibraryInfo &TLI) {
2740  if (!CB.use_empty())
2741  // TODO: add recursion if returned attribute is present
2742  return false;
2743 
2744  if (CB.isTerminator())
2745  // TODO: remove implementation restriction
2746  return false;
2747 
2748  if (!CB.willReturn() || !CB.doesNotThrow())
2749  return false;
2750 
2751  // If the only possible side effect of the call is writing to the alloca,
2752  // and the result isn't used, we can safely remove any reads implied by the
2753  // call including those which might read the alloca itself.
2755  return Dest && Dest->Ptr == UsedV;
2756 }
2757 
2760  const TargetLibraryInfo &TLI) {
2762  const Optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2763  Worklist.push_back(AI);
2764 
2765  do {
2766  Instruction *PI = Worklist.pop_back_val();
2767  for (User *U : PI->users()) {
2768  Instruction *I = cast<Instruction>(U);
2769  switch (I->getOpcode()) {
2770  default:
2771  // Give up the moment we see something we can't handle.
2772  return false;
2773 
2774  case Instruction::AddrSpaceCast:
2775  case Instruction::BitCast:
2776  case Instruction::GetElementPtr:
2777  Users.emplace_back(I);
2778  Worklist.push_back(I);
2779  continue;
2780 
2781  case Instruction::ICmp: {
2782  ICmpInst *ICI = cast<ICmpInst>(I);
2783  // We can fold eq/ne comparisons with null to false/true, respectively.
2784  // We also fold comparisons in some conditions provided the alloc has
2785  // not escaped (see isNeverEqualToUnescapedAlloc).
2786  if (!ICI->isEquality())
2787  return false;
2788  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2789  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2790  return false;
2791  Users.emplace_back(I);
2792  continue;
2793  }
2794 
2795  case Instruction::Call:
2796  // Ignore no-op and store intrinsics.
2797  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2798  switch (II->getIntrinsicID()) {
2799  default:
2800  return false;
2801 
2802  case Intrinsic::memmove:
2803  case Intrinsic::memcpy:
2804  case Intrinsic::memset: {
2805  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2806  if (MI->isVolatile() || MI->getRawDest() != PI)
2807  return false;
2808  [[fallthrough]];
2809  }
2810  case Intrinsic::assume:
2811  case Intrinsic::invariant_start:
2812  case Intrinsic::invariant_end:
2813  case Intrinsic::lifetime_start:
2814  case Intrinsic::lifetime_end:
2815  case Intrinsic::objectsize:
2816  Users.emplace_back(I);
2817  continue;
2818  case Intrinsic::launder_invariant_group:
2819  case Intrinsic::strip_invariant_group:
2820  Users.emplace_back(I);
2821  Worklist.push_back(I);
2822  continue;
2823  }
2824  }
2825 
2826  if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2827  Users.emplace_back(I);
2828  continue;
2829  }
2830 
2831  if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2832  getAllocationFamily(I, &TLI) == Family) {
2833  assert(Family);
2834  Users.emplace_back(I);
2835  continue;
2836  }
2837 
2838  if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI &&
2839  getAllocationFamily(I, &TLI) == Family) {
2840  assert(Family);
2841  Users.emplace_back(I);
2842  Worklist.push_back(I);
2843  continue;
2844  }
2845 
2846  return false;
2847 
2848  case Instruction::Store: {
2849  StoreInst *SI = cast<StoreInst>(I);
2850  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2851  return false;
2852  Users.emplace_back(I);
2853  continue;
2854  }
2855  }
2856  llvm_unreachable("missing a return?");
2857  }
2858  } while (!Worklist.empty());
2859  return true;
2860 }
2861 
2863  assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2864 
2865  // If we have a malloc call which is only used in any amount of comparisons to
2866  // null and free calls, delete the calls and replace the comparisons with true
2867  // or false as appropriate.
2868 
2869  // This is based on the principle that we can substitute our own allocation
2870  // function (which will never return null) rather than knowledge of the
2871  // specific function being called. In some sense this can change the permitted
2872  // outputs of a program (when we convert a malloc to an alloca, the fact that
2873  // the allocation is now on the stack is potentially visible, for example),
2874  // but we believe in a permissible manner.
2876 
2877  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2878  // before each store.
2880  std::unique_ptr<DIBuilder> DIB;
2881  if (isa<AllocaInst>(MI)) {
2882  findDbgUsers(DVIs, &MI);
2883  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2884  }
2885 
2886  if (isAllocSiteRemovable(&MI, Users, TLI)) {
2887  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2888  // Lowering all @llvm.objectsize calls first because they may
2889  // use a bitcast/GEP of the alloca we are removing.
2890  if (!Users[i])
2891  continue;
2892 
2893  Instruction *I = cast<Instruction>(&*Users[i]);
2894 
2895  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2896  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2897  Value *Result =
2898  lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2899  replaceInstUsesWith(*I, Result);
2900  eraseInstFromFunction(*I);
2901  Users[i] = nullptr; // Skip examining in the next loop.
2902  }
2903  }
2904  }
2905  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2906  if (!Users[i])
2907  continue;
2908 
2909  Instruction *I = cast<Instruction>(&*Users[i]);
2910 
2911  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2912  replaceInstUsesWith(*C,
2913  ConstantInt::get(Type::getInt1Ty(C->getContext()),
2914  C->isFalseWhenEqual()));
2915  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2916  for (auto *DVI : DVIs)
2917  if (DVI->isAddressOfVariable())
2918  ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2919  } else {
2920  // Casts, GEP, or anything else: we're about to delete this instruction,
2921  // so it can not have any valid uses.
2922  replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2923  }
2924  eraseInstFromFunction(*I);
2925  }
2926 
2927  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2928  // Replace invoke with a NOP intrinsic to maintain the original CFG
2929  Module *M = II->getModule();
2930  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2931  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2932  None, "", II->getParent());
2933  }
2934 
2935  // Remove debug intrinsics which describe the value contained within the
2936  // alloca. In addition to removing dbg.{declare,addr} which simply point to
2937  // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2938  //
2939  // ```
2940  // define void @foo(i32 %0) {
2941  // %a = alloca i32 ; Deleted.
2942  // store i32 %0, i32* %a
2943  // dbg.value(i32 %0, "arg0") ; Not deleted.
2944  // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2945  // call void @trivially_inlinable_no_op(i32* %a)
2946  // ret void
2947  // }
2948  // ```
2949  //
2950  // This may not be required if we stop describing the contents of allocas
2951  // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2952  // the LowerDbgDeclare utility.
2953  //
2954  // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2955  // "arg0" dbg.value may be stale after the call. However, failing to remove
2956  // the DW_OP_deref dbg.value causes large gaps in location coverage.
2957  for (auto *DVI : DVIs)
2958  if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2959  DVI->eraseFromParent();
2960 
2961  return eraseInstFromFunction(MI);
2962  }
2963  return nullptr;
2964 }
2965 
2966 /// Move the call to free before a NULL test.
2967 ///
2968 /// Check if this free is accessed after its argument has been test
2969 /// against NULL (property 0).
2970 /// If yes, it is legal to move this call in its predecessor block.
2971 ///
2972 /// The move is performed only if the block containing the call to free
2973 /// will be removed, i.e.:
2974 /// 1. it has only one predecessor P, and P has two successors
2975 /// 2. it contains the call, noops, and an unconditional branch
2976 /// 3. its successor is the same as its predecessor's successor
2977 ///
2978 /// The profitability is out-of concern here and this function should
2979 /// be called only if the caller knows this transformation would be
2980 /// profitable (e.g., for code size).
2982  const DataLayout &DL) {
2983  Value *Op = FI.getArgOperand(0);
2984  BasicBlock *FreeInstrBB = FI.getParent();
2985  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2986 
2987  // Validate part of constraint #1: Only one predecessor
2988  // FIXME: We can extend the number of predecessor, but in that case, we
2989  // would duplicate the call to free in each predecessor and it may
2990  // not be profitable even for code size.
2991  if (!PredBB)
2992  return nullptr;
2993 
2994  // Validate constraint #2: Does this block contains only the call to
2995  // free, noops, and an unconditional branch?
2996  BasicBlock *SuccBB;
2997  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2998  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2999  return nullptr;
3000 
3001  // If there are only 2 instructions in the block, at this point,
3002  // this is the call to free and unconditional.
3003  // If there are more than 2 instructions, check that they are noops
3004  // i.e., they won't hurt the performance of the generated code.
3005  if (FreeInstrBB->size() != 2) {
3006  for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
3007  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
3008  continue;
3009  auto *Cast = dyn_cast<CastInst>(&Inst);
3010  if (!Cast || !Cast->isNoopCast(DL))
3011  return nullptr;
3012  }
3013  }
3014  // Validate the rest of constraint #1 by matching on the pred branch.
3015  Instruction *TI = PredBB->getTerminator();
3016  BasicBlock *TrueBB, *FalseBB;
3017  ICmpInst::Predicate Pred;
3018  if (!match(TI, m_Br(m_ICmp(Pred,
3020  m_Specific(Op->stripPointerCasts())),
3021  m_Zero()),
3022  TrueBB, FalseBB)))
3023  return nullptr;
3024  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3025  return nullptr;
3026 
3027  // Validate constraint #3: Ensure the null case just falls through.
3028  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3029  return nullptr;
3030  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3031  "Broken CFG: missing edge from predecessor to successor");
3032 
3033  // At this point, we know that everything in FreeInstrBB can be moved
3034  // before TI.
3035  for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3036  if (&Instr == FreeInstrBBTerminator)
3037  break;
3038  Instr.moveBefore(TI);
3039  }
3040  assert(FreeInstrBB->size() == 1 &&
3041  "Only the branch instruction should remain");
3042 
3043  // Now that we've moved the call to free before the NULL check, we have to
3044  // remove any attributes on its parameter that imply it's non-null, because
3045  // those attributes might have only been valid because of the NULL check, and
3046  // we can get miscompiles if we keep them. This is conservative if non-null is
3047  // also implied by something other than the NULL check, but it's guaranteed to
3048  // be correct, and the conservativeness won't matter in practice, since the
3049  // attributes are irrelevant for the call to free itself and the pointer
3050  // shouldn't be used after the call.
3052  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3053  Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3054  if (Dereferenceable.isValid()) {
3055  uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3056  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3057  Attribute::Dereferenceable);
3059  }
3060  FI.setAttributes(Attrs);
3061 
3062  return &FI;
3063 }
3064 
3066  // free undef -> unreachable.
3067  if (isa<UndefValue>(Op)) {
3068  // Leave a marker since we can't modify the CFG here.
3069  CreateNonTerminatorUnreachable(&FI);
3070  return eraseInstFromFunction(FI);
3071  }
3072 
3073  // If we have 'free null' delete the instruction. This can happen in stl code
3074  // when lots of inlining happens.
3075  if (isa<ConstantPointerNull>(Op))
3076  return eraseInstFromFunction(FI);
3077 
3078  // If we had free(realloc(...)) with no intervening uses, then eliminate the
3079  // realloc() entirely.
3080  CallInst *CI = dyn_cast<CallInst>(Op);
3081  if (CI && CI->hasOneUse())
3082  if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI))
3083  return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3084 
3085  // If we optimize for code size, try to move the call to free before the null
3086  // test so that simplify cfg can remove the empty block and dead code
3087  // elimination the branch. I.e., helps to turn something like:
3088  // if (foo) free(foo);
3089  // into
3090  // free(foo);
3091  //
3092  // Note that we can only do this for 'free' and not for any flavor of
3093  // 'operator delete'; there is no 'operator delete' symbol for which we are
3094  // permitted to invent a call, even if we're passing in a null pointer.
3095  if (MinimizeSize) {
3096  LibFunc Func;
3097  if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3099  return I;
3100  }
3101 
3102  return nullptr;
3103 }
3104 
3105 static bool isMustTailCall(Value *V) {
3106  if (auto *CI = dyn_cast<CallInst>(V))
3107  return CI->isMustTailCall();
3108  return false;
3109 }
3110 
3112  if (RI.getNumOperands() == 0) // ret void
3113  return nullptr;
3114 
3115  Value *ResultOp = RI.getOperand(0);
3116  Type *VTy = ResultOp->getType();
3117  if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
3118  return nullptr;
3119 
3120  // Don't replace result of musttail calls.
3121  if (isMustTailCall(ResultOp))
3122  return nullptr;
3123 
3124  // There might be assume intrinsics dominating this return that completely
3125  // determine the value. If so, constant fold it.
3126  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
3127  if (Known.isConstant())
3128  return replaceOperand(RI, 0,
3129  Constant::getIntegerValue(VTy, Known.getConstant()));
3130 
3131  return nullptr;
3132 }
3133 
3134 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3136  // Try to remove the previous instruction if it must lead to unreachable.
3137  // This includes instructions like stores and "llvm.assume" that may not get
3138  // removed by simple dead code elimination.
3139  while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3140  // While we theoretically can erase EH, that would result in a block that
3141  // used to start with an EH no longer starting with EH, which is invalid.
3142  // To make it valid, we'd need to fixup predecessors to no longer refer to
3143  // this block, but that changes CFG, which is not allowed in InstCombine.
3144  if (Prev->isEHPad())
3145  return nullptr; // Can not drop any more instructions. We're done here.
3146 
3148  return nullptr; // Can not drop any more instructions. We're done here.
3149  // Otherwise, this instruction can be freely erased,
3150  // even if it is not side-effect free.
3151 
3152  // A value may still have uses before we process it here (for example, in
3153  // another unreachable block), so convert those to poison.
3154  replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3155  eraseInstFromFunction(*Prev);
3156  }
3157  assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
3158  // FIXME: recurse into unconditional predecessors?
3159  return nullptr;
3160 }
3161 
3163  assert(BI.isUnconditional() && "Only for unconditional branches.");
3164 
3165  // If this store is the second-to-last instruction in the basic block
3166  // (excluding debug info and bitcasts of pointers) and if the block ends with
3167  // an unconditional branch, try to move the store to the successor block.
3168 
3169  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3170  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3171  return BBI->isDebugOrPseudoInst() ||
3172  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3173  };
3174 
3175  BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3176  do {
3177  if (BBI != FirstInstr)
3178  --BBI;
3179  } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3180 
3181  return dyn_cast<StoreInst>(BBI);
3182  };
3183 
3184  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3185  if (mergeStoreIntoSuccessor(*SI))
3186  return &BI;
3187 
3188  return nullptr;
3189 }
3190 
3192  if (BI.isUnconditional())
3193  return visitUnconditionalBranchInst(BI);
3194 
3195  // Change br (not X), label True, label False to: br X, label False, True
3196  Value *X = nullptr;
3197  if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
3198  !isa<Constant>(X)) {
3199  // Swap Destinations and condition...
3200  BI.swapSuccessors();
3201  return replaceOperand(BI, 0, X);
3202  }
3203 
3204  // If the condition is irrelevant, remove the use so that other
3205  // transforms on the condition become more effective.
3206  if (!isa<ConstantInt>(BI.getCondition()) &&
3207  BI.getSuccessor(0) == BI.getSuccessor(1))
3208  return replaceOperand(
3209  BI, 0, ConstantInt::getFalse(BI.getCondition()->getType()));
3210 
3211  // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3212  CmpInst::Predicate Pred;
3213  if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())),
3214  m_BasicBlock(), m_BasicBlock())) &&
3215  !isCanonicalPredicate(Pred)) {
3216  // Swap destinations and condition.
3217  CmpInst *Cond = cast<CmpInst>(BI.getCondition());
3218  Cond->setPredicate(CmpInst::getInversePredicate(Pred));
3219  BI.swapSuccessors();
3220  Worklist.push(Cond);
3221  return &BI;
3222  }
3223 
3224  return nullptr;
3225 }
3226 
3228  Value *Cond = SI.getCondition();
3229  Value *Op0;
3230  ConstantInt *AddRHS;
3231  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3232  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3233  for (auto Case : SI.cases()) {
3234  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3235  assert(isa<ConstantInt>(NewCase) &&
3236  "Result of expression should be constant");
3237  Case.setValue(cast<ConstantInt>(NewCase));
3238  }
3239  return replaceOperand(SI, 0, Op0);
3240  }
3241 
3242  KnownBits Known = computeKnownBits(Cond, 0, &SI);
3243  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3244  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3245 
3246  // Compute the number of leading bits we can ignore.
3247  // TODO: A better way to determine this would use ComputeNumSignBits().
3248  for (const auto &C : SI.cases()) {
3249  LeadingKnownZeros = std::min(
3250  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
3251  LeadingKnownOnes = std::min(
3252  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
3253  }
3254 
3255  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3256 
3257  // Shrink the condition operand if the new type is smaller than the old type.
3258  // But do not shrink to a non-standard type, because backend can't generate
3259  // good code for that yet.
3260  // TODO: We can make it aggressive again after fixing PR39569.
3261  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3262  shouldChangeType(Known.getBitWidth(), NewWidth)) {
3263  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3264  Builder.SetInsertPoint(&SI);
3265  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3266 
3267  for (auto Case : SI.cases()) {
3268  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3269  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3270  }
3271  return replaceOperand(SI, 0, NewCond);
3272  }
3273 
3274  return nullptr;
3275 }
3276 
3277 Instruction *
3278 InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3279  auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3280  if (!WO)
3281  return nullptr;
3282 
3283  Intrinsic::ID OvID = WO->getIntrinsicID();
3284  const APInt *C = nullptr;
3285  if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
3286  if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3287  OvID == Intrinsic::umul_with_overflow)) {
3288  // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3289  if (C->isAllOnes())
3290  return BinaryOperator::CreateNeg(WO->getLHS());
3291  // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3292  if (C->isPowerOf2()) {
3293  return BinaryOperator::CreateShl(
3294  WO->getLHS(),
3295  ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3296  }
3297  }
3298  }
3299 
3300  // We're extracting from an overflow intrinsic. See if we're the only user.
3301  // That allows us to simplify multiple result intrinsics to simpler things
3302  // that just get one value.
3303  if (!WO->hasOneUse())
3304  return nullptr;
3305 
3306  // Check if we're grabbing only the result of a 'with overflow' intrinsic
3307  // and replace it with a traditional binary instruction.
3308  if (*EV.idx_begin() == 0) {
3309  Instruction::BinaryOps BinOp = WO->getBinaryOp();
3310  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3311  // Replace the old instruction's uses with poison.
3312  replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3313  eraseInstFromFunction(*WO);
3314  return BinaryOperator::Create(BinOp, LHS, RHS);
3315  }
3316 
3317  assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3318 
3319  // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3320  if (OvID == Intrinsic::usub_with_overflow)
3321  return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3322 
3323  // If only the overflow result is used, and the right hand side is a
3324  // constant (or constant splat), we can remove the intrinsic by directly
3325  // checking for overflow.
3326  if (C) {
3327  // Compute the no-wrap range for LHS given RHS=C, then construct an
3328  // equivalent icmp, potentially using an offset.
3330  WO->getBinaryOp(), *C, WO->getNoWrapKind());
3331 
3332  CmpInst::Predicate Pred;
3333  APInt NewRHSC, Offset;
3334  NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3335  auto *OpTy = WO->getRHS()->getType();
3336  auto *NewLHS = WO->getLHS();
3337  if (Offset != 0)
3338  NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3339  return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3340  ConstantInt::get(OpTy, NewRHSC));
3341  }
3342 
3343  return nullptr;
3344 }
3345 
3347  Value *Agg = EV.getAggregateOperand();
3348 
3349  if (!EV.hasIndices())
3350  return replaceInstUsesWith(EV, Agg);
3351 
3352  if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3353  SQ.getWithInstruction(&EV)))
3354  return replaceInstUsesWith(EV, V);
3355 
3356  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3357  // We're extracting from an insertvalue instruction, compare the indices
3358  const unsigned *exti, *exte, *insi, *inse;
3359  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3360  exte = EV.idx_end(), inse = IV->idx_end();
3361  exti != exte && insi != inse;
3362  ++exti, ++insi) {
3363  if (*insi != *exti)
3364  // The insert and extract both reference distinctly different elements.
3365  // This means the extract is not influenced by the insert, and we can
3366  // replace the aggregate operand of the extract with the aggregate
3367  // operand of the insert. i.e., replace
3368  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3369  // %E = extractvalue { i32, { i32 } } %I, 0
3370  // with
3371  // %E = extractvalue { i32, { i32 } } %A, 0
3372  return ExtractValueInst::Create(IV->getAggregateOperand(),
3373  EV.getIndices());
3374  }
3375  if (exti == exte && insi == inse)
3376  // Both iterators are at the end: Index lists are identical. Replace
3377  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3378  // %C = extractvalue { i32, { i32 } } %B, 1, 0
3379  // with "i32 42"
3380  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3381  if (exti == exte) {
3382  // The extract list is a prefix of the insert list. i.e. replace
3383  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3384  // %E = extractvalue { i32, { i32 } } %I, 1
3385  // with
3386  // %X = extractvalue { i32, { i32 } } %A, 1
3387  // %E = insertvalue { i32 } %X, i32 42, 0
3388  // by switching the order of the insert and extract (though the
3389  // insertvalue should be left in, since it may have other uses).
3390  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3391  EV.getIndices());
3392  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3393  makeArrayRef(insi, inse));
3394  }
3395  if (insi == inse)
3396  // The insert list is a prefix of the extract list
3397  // We can simply remove the common indices from the extract and make it
3398  // operate on the inserted value instead of the insertvalue result.
3399  // i.e., replace
3400  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3401  // %E = extractvalue { i32, { i32 } } %I, 1, 0
3402  // with
3403  // %E extractvalue { i32 } { i32 42 }, 0
3404  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3405  makeArrayRef(exti, exte));
3406  }
3407 
3408  if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3409  return R;
3410 
3411  if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3412  // If the (non-volatile) load only has one use, we can rewrite this to a
3413  // load from a GEP. This reduces the size of the load. If a load is used
3414  // only by extractvalue instructions then this either must have been
3415  // optimized before, or it is a struct with padding, in which case we
3416  // don't want to do the transformation as it loses padding knowledge.
3417  if (L->isSimple() && L->hasOneUse()) {
3418  // extractvalue has integer indices, getelementptr has Value*s. Convert.
3419  SmallVector<Value*, 4> Indices;
3420  // Prefix an i32 0 since we need the first element.
3421  Indices.push_back(Builder.getInt32(0));
3422  for (unsigned Idx : EV.indices())
3423  Indices.push_back(Builder.getInt32(Idx));
3424 
3425  // We need to insert these at the location of the old load, not at that of
3426  // the extractvalue.
3427  Builder.SetInsertPoint(L);
3428  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3429  L->getPointerOperand(), Indices);
3430  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
3431  // Whatever aliasing information we had for the orignal load must also
3432  // hold for the smaller load, so propagate the annotations.
3433  NL->setAAMetadata(L->getAAMetadata());
3434  // Returning the load directly will cause the main loop to insert it in
3435  // the wrong spot, so use replaceInstUsesWith().
3436  return replaceInstUsesWith(EV, NL);
3437  }
3438  }
3439 
3440  if (auto *PN = dyn_cast<PHINode>(Agg))
3441  if (Instruction *Res = foldOpIntoPhi(EV, PN))
3442  return Res;
3443 
3444  // We could simplify extracts from other values. Note that nested extracts may
3445  // already be simplified implicitly by the above: extract (extract (insert) )
3446  // will be translated into extract ( insert ( extract ) ) first and then just
3447  // the value inserted, if appropriate. Similarly for extracts from single-use
3448  // loads: extract (extract (load)) will be translated to extract (load (gep))
3449  // and if again single-use then via load (gep (gep)) to load (gep).
3450  // However, double extracts from e.g. function arguments or return values
3451  // aren't handled yet.
3452  return nullptr;
3453 }
3454 
3455 /// Return 'true' if the given typeinfo will match anything.
3456 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3457  switch (Personality) {
3458  case EHPersonality::GNU_C:
3460  case EHPersonality::Rust:
3461  // The GCC C EH and Rust personality only exists to support cleanups, so
3462  // it's not clear what the semantics of catch clauses are.
3463  return false;
3465  return false;
3467  // While __gnat_all_others_value will match any Ada exception, it doesn't
3468  // match foreign exceptions (or didn't, before gcc-4.7).
3469  return false;
3478  case EHPersonality::XL_CXX:
3479  return TypeInfo->isNullValue();
3480  }
3481  llvm_unreachable("invalid enum");
3482 }
3483 
3484 static bool shorter_filter(const Value *LHS, const Value *RHS) {
3485  return
3486  cast<ArrayType>(LHS->getType())->getNumElements()
3487  <
3488  cast<ArrayType>(RHS->getType())->getNumElements();
3489 }
3490 
3492  // The logic here should be correct for any real-world personality function.
3493  // However if that turns out not to be true, the offending logic can always
3494  // be conditioned on the personality function, like the catch-all logic is.
3495  EHPersonality Personality =
3497 
3498  // Simplify the list of clauses, eg by removing repeated catch clauses
3499  // (these are often created by inlining).
3500  bool MakeNewInstruction = false; // If true, recreate using the following:
3501  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3502  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3503 
3504  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3505  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3506  bool isLastClause = i + 1 == e;
3507  if (LI.isCatch(i)) {
3508  // A catch clause.
3509  Constant *CatchClause = LI.getClause(i);
3510  Constant *TypeInfo = CatchClause->stripPointerCasts();
3511 
3512  // If we already saw this clause, there is no point in having a second
3513  // copy of it.
3514  if (AlreadyCaught.insert(TypeInfo).second) {
3515  // This catch clause was not already seen.
3516  NewClauses.push_back(CatchClause);
3517  } else {
3518  // Repeated catch clause - drop the redundant copy.
3519  MakeNewInstruction = true;
3520  }
3521 
3522  // If this is a catch-all then there is no point in keeping any following
3523  // clauses or marking the landingpad as having a cleanup.
3524  if (isCatchAll(Personality, TypeInfo)) {
3525  if (!isLastClause)
3526  MakeNewInstruction = true;
3527  CleanupFlag = false;
3528  break;
3529  }
3530  } else {
3531  // A filter clause. If any of the filter elements were already caught
3532  // then they can be dropped from the filter. It is tempting to try to
3533  // exploit the filter further by saying that any typeinfo that does not
3534  // occur in the filter can't be caught later (and thus can be dropped).
3535  // However this would be wrong, since typeinfos can match without being
3536  // equal (for example if one represents a C++ class, and the other some
3537  // class derived from it).
3538  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3539  Constant *FilterClause = LI.getClause(i);
3540  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3541  unsigned NumTypeInfos = FilterType->getNumElements();
3542 
3543  // An empty filter catches everything, so there is no point in keeping any
3544  // following clauses or marking the landingpad as having a cleanup. By
3545  // dealing with this case here the following code is made a bit simpler.
3546  if (!NumTypeInfos) {
3547  NewClauses.push_back(FilterClause);
3548  if (!isLastClause)
3549  MakeNewInstruction = true;
3550  CleanupFlag = false;
3551  break;
3552  }
3553 
3554  bool MakeNewFilter = false; // If true, make a new filter.
3555  SmallVector<Constant *, 16> NewFilterElts; // New elements.
3556  if (isa<ConstantAggregateZero>(FilterClause)) {
3557  // Not an empty filter - it contains at least one null typeinfo.
3558  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3559  Constant *TypeInfo =
3560  Constant::getNullValue(FilterType->getElementType());
3561  // If this typeinfo is a catch-all then the filter can never match.
3562  if (isCatchAll(Personality, TypeInfo)) {
3563  // Throw the filter away.
3564  MakeNewInstruction = true;
3565  continue;
3566  }
3567 
3568  // There is no point in having multiple copies of this typeinfo, so
3569  // discard all but the first copy if there is more than one.
3570  NewFilterElts.push_back(TypeInfo);
3571  if (NumTypeInfos > 1)
3572  MakeNewFilter = true;
3573  } else {
3574  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3575  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3576  NewFilterElts.reserve(NumTypeInfos);
3577 
3578  // Remove any filter elements that were already caught or that already
3579  // occurred in the filter. While there, see if any of the elements are
3580  // catch-alls. If so, the filter can be discarded.
3581  bool SawCatchAll = false;
3582  for (unsigned j = 0; j != NumTypeInfos; ++j) {
3583  Constant *Elt = Filter->getOperand(j);
3584  Constant *TypeInfo = Elt->stripPointerCasts();
3585  if (isCatchAll(Personality, TypeInfo)) {
3586  // This element is a catch-all. Bail out, noting this fact.
3587  SawCatchAll = true;
3588  break;
3589  }
3590 
3591  // Even if we've seen a type in a catch clause, we don't want to
3592  // remove it from the filter. An unexpected type handler may be
3593  // set up for a call site which throws an exception of the same
3594  // type caught. In order for the exception thrown by the unexpected
3595  // handler to propagate correctly, the filter must be correctly
3596  // described for the call site.
3597  //
3598  // Example:
3599  //
3600  // void unexpected() { throw 1;}
3601  // void foo() throw (int) {
3602  // std::set_unexpected(unexpected);
3603  // try {
3604  // throw 2.0;
3605  // } catch (int i) {}
3606  // }
3607 
3608  // There is no point in having multiple copies of the same typeinfo in
3609  // a filter, so only add it if we didn't already.
3610  if (SeenInFilter.insert(TypeInfo).second)
3611  NewFilterElts.push_back(cast<Constant>(Elt));
3612  }
3613  // A filter containing a catch-all cannot match anything by definition.
3614  if (SawCatchAll) {
3615  // Throw the filter away.
3616  MakeNewInstruction = true;
3617  continue;
3618  }
3619 
3620  // If we dropped something from the filter, make a new one.
3621  if (NewFilterElts.size() < NumTypeInfos)
3622  MakeNewFilter = true;
3623  }
3624  if (MakeNewFilter) {
3625  FilterType = ArrayType::get(FilterType->getElementType(),
3626  NewFilterElts.size());
3627  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3628  MakeNewInstruction = true;
3629  }
3630 
3631  NewClauses.push_back(FilterClause);
3632 
3633  // If the new filter is empty then it will catch everything so there is
3634  // no point in keeping any following clauses or marking the landingpad
3635  // as having a cleanup. The case of the original filter being empty was
3636  // already handled above.
3637  if (MakeNewFilter && !NewFilterElts.size()) {
3638  assert(MakeNewInstruction && "New filter but not a new instruction!");
3639  CleanupFlag = false;
3640  break;
3641  }
3642  }
3643  }
3644 
3645  // If several filters occur in a row then reorder them so that the shortest
3646  // filters come first (those with the smallest number of elements). This is
3647  // advantageous because shorter filters are more likely to match, speeding up
3648  // unwinding, but mostly because it increases the effectiveness of the other
3649  // filter optimizations below.
3650  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3651  unsigned j;
3652  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3653  for (j = i; j != e; ++j)
3654  if (!isa<ArrayType>(NewClauses[j]->getType()))
3655  break;
3656 
3657  // Check whether the filters are already sorted by length. We need to know
3658  // if sorting them is actually going to do anything so that we only make a
3659  // new landingpad instruction if it does.
3660  for (unsigned k = i; k + 1 < j; ++k)
3661  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3662  // Not sorted, so sort the filters now. Doing an unstable sort would be
3663  // correct too but reordering filters pointlessly might confuse users.
3664  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3665  shorter_filter);
3666  MakeNewInstruction = true;
3667  break;
3668  }
3669 
3670  // Look for the next batch of filters.
3671  i = j + 1;
3672  }
3673 
3674  // If typeinfos matched if and only if equal, then the elements of a filter L
3675  // that occurs later than a filter F could be replaced by the intersection of
3676  // the elements of F and L. In reality two typeinfos can match without being
3677  // equal (for example if one represents a C++ class, and the other some class
3678  // derived from it) so it would be wrong to perform this transform in general.
3679  // However the transform is correct and useful if F is a subset of L. In that
3680  // case L can be replaced by F, and thus removed altogether since repeating a
3681  // filter is pointless. So here we look at all pairs of filters F and L where
3682  // L follows F in the list of clauses, and remove L if every element of F is
3683  // an element of L. This can occur when inlining C++ functions with exception
3684  // specifications.
3685  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3686  // Examine each filter in turn.
3687  Value *Filter = NewClauses[i];
3688  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3689  if (!FTy)
3690  // Not a filter - skip it.
3691  continue;
3692  unsigned FElts = FTy->getNumElements();
3693  // Examine each filter following this one. Doing this backwards means that
3694  // we don't have to worry about filters disappearing under us when removed.
3695  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3696  Value *LFilter = NewClauses[j];
3697  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3698  if (!LTy)
3699  // Not a filter - skip it.
3700  continue;
3701  // If Filter is a subset of LFilter, i.e. every element of Filter is also
3702  // an element of LFilter, then discard LFilter.
3703  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3704  // If Filter is empty then it is a subset of LFilter.
3705  if (!FElts) {
3706  // Discard LFilter.
3707  NewClauses.erase(J);
3708  MakeNewInstruction = true;
3709  // Move on to the next filter.
3710  continue;
3711  }
3712  unsigned LElts = LTy->getNumElements();
3713  // If Filter is longer than LFilter then it cannot be a subset of it.
3714  if (FElts > LElts)
3715  // Move on to the next filter.
3716  continue;
3717  // At this point we know that LFilter has at least one element.
3718  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3719  // Filter is a subset of LFilter iff Filter contains only zeros (as we
3720  // already know that Filter is not longer than LFilter).
3721  if (isa<ConstantAggregateZero>(Filter)) {
3722  assert(FElts <= LElts && "Should have handled this case earlier!");
3723  // Discard LFilter.
3724  NewClauses.erase(J);
3725  MakeNewInstruction = true;
3726  }
3727  // Move on to the next filter.
3728  continue;
3729  }
3730  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3731  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3732  // Since Filter is non-empty and contains only zeros, it is a subset of
3733  // LFilter iff LFilter contains a zero.
3734  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3735  for (unsigned l = 0; l != LElts; ++l)
3736  if (LArray->getOperand(l)->isNullValue()) {
3737  // LFilter contains a zero - discard it.
3738  NewClauses.erase(J);
3739  MakeNewInstruction = true;
3740  break;
3741  }
3742  // Move on to the next filter.
3743  continue;
3744  }
3745  // At this point we know that both filters are ConstantArrays. Loop over
3746  // operands to see whether every element of Filter is also an element of
3747  // LFilter. Since filters tend to be short this is probably faster than
3748  // using a method that scales nicely.
3749  ConstantArray *FArray = cast<ConstantArray>(Filter);
3750  bool AllFound = true;
3751  for (unsigned f = 0; f != FElts; ++f) {
3752  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3753  AllFound = false;
3754  for (unsigned l = 0; l != LElts; ++l) {
3755  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3756  if (LTypeInfo == FTypeInfo) {
3757  AllFound = true;
3758  break;
3759  }
3760  }
3761  if (!AllFound)
3762  break;
3763  }
3764  if (AllFound) {
3765  // Discard LFilter.
3766  NewClauses.erase(J);
3767  MakeNewInstruction = true;
3768  }
3769  // Move on to the next filter.
3770  }
3771  }
3772 
3773  // If we changed any of the clauses, replace the old landingpad instruction
3774  // with a new one.
3775  if (MakeNewInstruction) {
3777  NewClauses.size());
3778  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3779  NLI->addClause(NewClauses[i]);
3780  // A landing pad with no clauses must have the cleanup flag set. It is
3781  // theoretically possible, though highly unlikely, that we eliminated all
3782  // clauses. If so, force the cleanup flag to true.
3783  if (NewClauses.empty())
3784  CleanupFlag = true;
3785  NLI->setCleanup(CleanupFlag);
3786  return NLI;
3787  }
3788 
3789  // Even if none of the clauses changed, we may nonetheless have understood
3790  // that the cleanup flag is pointless. Clear it if so.
3791  if (LI.isCleanup() != CleanupFlag) {
3792  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3793  LI.setCleanup(CleanupFlag);
3794  return &LI;
3795  }
3796 
3797  return nullptr;
3798 }
3799 
3800 Value *
3802  // Try to push freeze through instructions that propagate but don't produce
3803  // poison as far as possible. If an operand of freeze follows three
3804  // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3805  // guaranteed-non-poison operands then push the freeze through to the one
3806  // operand that is not guaranteed non-poison. The actual transform is as
3807  // follows.
3808  // Op1 = ... ; Op1 can be posion
3809  // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3810  // ; single guaranteed-non-poison operands
3811  // ... = Freeze(Op0)
3812  // =>
3813  // Op1 = ...
3814  // Op1.fr = Freeze(Op1)
3815  // ... = Inst(Op1.fr, NonPoisonOps...)
3816  auto *OrigOp = OrigFI.getOperand(0);
3817  auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3818 
3819  // While we could change the other users of OrigOp to use freeze(OrigOp), that
3820  // potentially reduces their optimization potential, so let's only do this iff
3821  // the OrigOp is only used by the freeze.
3822  if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3823  return nullptr;
3824 
3825  // We can't push the freeze through an instruction which can itself create
3826  // poison. If the only source of new poison is flags, we can simply
3827  // strip them (since we know the only use is the freeze and nothing can
3828  // benefit from them.)
3829  if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
3830  return nullptr;
3831 
3832  // If operand is guaranteed not to be poison, there is no need to add freeze
3833  // to the operand. So we first find the operand that is not guaranteed to be
3834  // poison.
3835  Use *MaybePoisonOperand = nullptr;
3836  for (Use &U : OrigOpInst->operands()) {
3837  if (isGuaranteedNotToBeUndefOrPoison(U.get()))
3838  continue;
3839  if (!MaybePoisonOperand)
3840  MaybePoisonOperand = &U;
3841  else
3842  return nullptr;
3843  }
3844 
3845  OrigOpInst->dropPoisonGeneratingFlags();
3846 
3847  // If all operands are guaranteed to be non-poison, we can drop freeze.
3848  if (!MaybePoisonOperand)
3849  return OrigOp;
3850 
3851  Builder.SetInsertPoint(OrigOpInst);
3852  auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3853  MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3854 
3855  replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3856  return OrigOp;
3857 }
3858 
3860  PHINode *PN) {
3861  // Detect whether this is a recurrence with a start value and some number of
3862  // backedge values. We'll check whether we can push the freeze through the
3863  // backedge values (possibly dropping poison flags along the way) until we
3864  // reach the phi again. In that case, we can move the freeze to the start
3865  // value.
3866  Use *StartU = nullptr;
3867  SmallVector<Value *> Worklist;
3868  for (Use &U : PN->incoming_values()) {
3869  if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3870  // Add backedge value to worklist.
3871  Worklist.push_back(U.get());
3872  continue;
3873  }
3874 
3875  // Don't bother handling multiple start values.
3876  if (StartU)
3877  return nullptr;
3878  StartU = &U;
3879  }
3880 
3881  if (!StartU || Worklist.empty())
3882  return nullptr; // Not a recurrence.
3883 
3884  Value *StartV = StartU->get();
3885  BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3886  bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3887  // We can't insert freeze if the the start value is the result of the
3888  // terminator (e.g. an invoke).
3889  if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3890  return nullptr;
3891 
3892  SmallPtrSet<Value *, 32> Visited;
3893  SmallVector<Instruction *> DropFlags;
3894  while (!Worklist.empty()) {
3895  Value *V = Worklist.pop_back_val();
3896  if (!Visited.insert(V).second)
3897  continue;
3898 
3899  if (Visited.size() > 32)
3900  return nullptr; // Limit the total number of values we inspect.
3901 
3902  // Assume that PN is non-poison, because it will be after the transform.
3903  if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3904  continue;
3905 
3906  Instruction *I = dyn_cast<Instruction>(V);
3907  if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3908  /*ConsiderFlags*/ false))
3909  return nullptr;
3910 
3911  DropFlags.push_back(I);
3912  append_range(Worklist, I->operands());
3913  }
3914 
3915  for (Instruction *I : DropFlags)
3916  I->dropPoisonGeneratingFlags();
3917 
3918  if (StartNeedsFreeze) {
3919  Builder.SetInsertPoint(StartBB->getTerminator());
3920  Value *FrozenStartV = Builder.CreateFreeze(StartV,
3921  StartV->getName() + ".fr");
3922  replaceUse(*StartU, FrozenStartV);
3923  }
3924  return replaceInstUsesWith(FI, PN);
3925 }
3926 
3928  Value *Op = FI.getOperand(0);
3929 
3930  if (isa<Constant>(Op) || Op->hasOneUse())
3931  return false;
3932 
3933  // Move the freeze directly after the definition of its operand, so that
3934  // it dominates the maximum number of uses. Note that it may not dominate
3935  // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3936  // the normal/default destination. This is why the domination check in the
3937  // replacement below is still necessary.
3938  Instruction *MoveBefore;
3939  if (isa<Argument>(Op)) {
3940  MoveBefore =
3942  } else {
3943  MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3944  if (!MoveBefore)
3945  return false;
3946  }
3947 
3948  bool Changed = false;
3949  if (&FI != MoveBefore) {
3950  FI.moveBefore(MoveBefore);
3951  Changed = true;
3952  }
3953 
3954  Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3955  bool Dominates = DT.dominates(&FI, U);
3956  Changed |= Dominates;
3957  return Dominates;
3958  });
3959 
3960  return Changed;
3961 }
3962 
3964  Value *Op0 = I.getOperand(0);
3965 
3966  if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
3967  return replaceInstUsesWith(I, V);
3968 
3969  // freeze (phi const, x) --> phi const, (freeze x)
3970  if (auto *PN = dyn_cast<PHINode>(Op0)) {
3971  if (Instruction *NV = foldOpIntoPhi(I, PN))
3972  return NV;
3973  if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3974  return NV;
3975  }
3976 
3977  if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
3978  return replaceInstUsesWith(I, NI);
3979 
3980  // If I is freeze(undef), check its uses and fold it to a fixed constant.
3981  // - or: pick -1
3982  // - select's condition: if the true value is constant, choose it by making
3983  // the condition true.
3984  // - default: pick 0
3985  //
3986  // Note that this transform is intentionally done here rather than
3987  // via an analysis in InstSimplify or at individual user sites. That is
3988  // because we must produce the same value for all uses of the freeze -
3989  // it's the reason "freeze" exists!
3990  //
3991  // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3992  // duplicating logic for binops at least.
3993  auto getUndefReplacement = [&I](Type *Ty) {
3994  Constant *BestValue = nullptr;
3995  Constant *NullValue = Constant::getNullValue(Ty);
3996  for (const auto *U : I.users()) {
3997  Constant *C = NullValue;
3998  if (match(U, m_Or(m_Value(), m_Value())))
4000  else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4001  C = ConstantInt::getTrue(Ty);
4002 
4003  if (!BestValue)
4004  BestValue = C;
4005  else if (BestValue != C)
4006  BestValue = NullValue;
4007  }
4008  assert(BestValue && "Must have at least one use");
4009  return BestValue;
4010  };
4011 
4012  if (match(Op0, m_Undef()))
4013  return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4014 
4015  Constant *C;
4016  if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4017  Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4018  return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
4019  }
4020 
4021  // Replace uses of Op with freeze(Op).
4022  if (freezeOtherUses(I))
4023  return &I;
4024 
4025  return nullptr;
4026 }
4027 
4028 /// Check for case where the call writes to an otherwise dead alloca. This
4029 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
4030 /// helper *only* analyzes the write; doesn't check any other legality aspect.
4032  auto *CB = dyn_cast<CallBase>(I);
4033  if (!CB)
4034  // TODO: handle e.g. store to alloca here - only worth doing if we extend
4035  // to allow reload along used path as described below. Otherwise, this
4036  // is simply a store to a dead allocation which will be removed.
4037  return false;
4039  if (!Dest)
4040  return false;
4041  auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4042  if (!AI)
4043  // TODO: allow malloc?
4044  return false;
4045  // TODO: allow memory access dominated by move point? Note that since AI
4046  // could have a reference to itself captured by the call, we would need to
4047  // account for cycles in doing so.
4048  SmallVector<const User *> AllocaUsers;
4050  auto pushUsers = [&](const Instruction &I) {
4051  for (const User *U : I.users()) {
4052  if (Visited.insert(U).second)
4053  AllocaUsers.push_back(U);
4054  }
4055  };
4056  pushUsers(*AI);
4057  while (!AllocaUsers.empty()) {
4058  auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4059  if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4060  isa<AddrSpaceCastInst>(UserI)) {
4061  pushUsers(*UserI);
4062  continue;
4063  }
4064  if (UserI == CB)
4065  continue;
4066  // TODO: support lifetime.start/end here
4067  return false;
4068  }
4069  return true;
4070 }
4071 
4072 /// Try to move the specified instruction from its current block into the
4073 /// beginning of DestBlock, which can only happen if it's safe to move the
4074 /// instruction past all of the instructions between it and the end of its
4075 /// block.
4077  TargetLibraryInfo &TLI) {
4078  BasicBlock *SrcBlock = I->getParent();
4079 
4080  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4081  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4082  I->isTerminator())
4083  return false;
4084 
4085  // Do not sink static or dynamic alloca instructions. Static allocas must
4086  // remain in the entry block, and dynamic allocas must not be sunk in between
4087  // a stacksave / stackrestore pair, which would incorrectly shorten its
4088  // lifetime.
4089  if (isa<AllocaInst>(I))
4090  return false;
4091 
4092  // Do not sink into catchswitch blocks.
4093  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4094  return false;
4095 
4096  // Do not sink convergent call instructions.
4097  if (auto *CI = dyn_cast<CallInst>(I)) {
4098  if (CI->isConvergent())
4099  return false;
4100  }
4101 
4102  // Unless we can prove that the memory write isn't visibile except on the
4103  // path we're sinking to, we must bail.
4104  if (I->mayWriteToMemory()) {
4105  if (!SoleWriteToDeadLocal(I, TLI))
4106  return false;
4107  }
4108 
4109  // We can only sink load instructions if there is nothing between the load and
4110  // the end of block that could change the value.
4111  if (I->mayReadFromMemory()) {
4112  // We don't want to do any sophisticated alias analysis, so we only check
4113  // the instructions after I in I's parent block if we try to sink to its
4114  // successor block.
4115  if (DestBlock->getUniquePredecessor() != I->getParent())
4116  return false;
4117  for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4118  E = I->getParent()->end();
4119  Scan != E; ++Scan)
4120  if (Scan->mayWriteToMemory())
4121  return false;
4122  }
4123 
4124  I->dropDroppableUses([DestBlock](const Use *U) {
4125  if (auto *I = dyn_cast<Instruction>(U->getUser()))
4126  return I->getParent() != DestBlock;
4127  return true;
4128  });
4129  /// FIXME: We could remove droppable uses that are not dominated by
4130  /// the new position.
4131 
4132  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4133  I->moveBefore(&*InsertPos);
4134  ++NumSunkInst;
4135 
4136  // Also sink all related debug uses from the source basic block. Otherwise we
4137  // get debug use before the def. Attempt to salvage debug uses first, to
4138  // maximise the range variables have location for. If we cannot salvage, then
4139  // mark the location undef: we know it was supposed to receive a new location
4140  // here, but that computation has been sunk.
4142  findDbgUsers(DbgUsers, I);
4143  // Process the sinking DbgUsers in reverse order, as we only want to clone the
4144  // last appearing debug intrinsic for each given variable.
4146  for (DbgVariableIntrinsic *DVI : DbgUsers)
4147  if (DVI->getParent() == SrcBlock)
4148  DbgUsersToSink.push_back(DVI);
4149  llvm::sort(DbgUsersToSink,
4150  [](auto *A, auto *B) { return B->comesBefore(A); });
4151 
4153  SmallSet<DebugVariable, 4> SunkVariables;
4154  for (auto *User : DbgUsersToSink) {
4155  // A dbg.declare instruction should not be cloned, since there can only be
4156  // one per variable fragment. It should be left in the original place
4157  // because the sunk instruction is not an alloca (otherwise we could not be
4158  // here).
4159  if (isa<DbgDeclareInst>(User))
4160  continue;
4161 
4162  DebugVariable DbgUserVariable =
4163  DebugVariable(User->getVariable(), User->getExpression(),
4164  User->getDebugLoc()->getInlinedAt());
4165 
4166  if (!SunkVariables.insert(DbgUserVariable).second)
4167  continue;
4168 
4169  DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4170  if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4171  DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4172  LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4173  }
4174 
4175  // Perform salvaging without the clones, then sink the clones.
4176  if (!DIIClones.empty()) {
4177  salvageDebugInfoForDbgValues(*I, DbgUsers);
4178  // The clones are in reverse order of original appearance, reverse again to
4179  // maintain the original order.
4180  for (auto &DIIClone : llvm::reverse(DIIClones)) {
4181  DIIClone->insertBefore(&*InsertPos);
4182  LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4183  }
4184  }
4185 
4186  return true;
4187 }
4188 
4190  while (!Worklist.isEmpty()) {
4191  // Walk deferred instructions in reverse order, and push them to the
4192  // worklist, which means they'll end up popped from the worklist in-order.
4193  while (Instruction *I = Worklist.popDeferred()) {
4194  // Check to see if we can DCE the instruction. We do this already here to
4195  // reduce the number of uses and thus allow other folds to trigger.
4196  // Note that eraseInstFromFunction() may push additional instructions on
4197  // the deferred worklist, so this will DCE whole instruction chains.
4198  if (isInstructionTriviallyDead(I, &TLI)) {
4199  eraseInstFromFunction(*I);
4200  ++NumDeadInst;
4201  continue;
4202  }
4203 
4204  Worklist.push(I);
4205  }
4206 
4207  Instruction *I = Worklist.removeOne();
4208  if (I == nullptr) continue; // skip null values.
4209 
4210  // Check to see if we can DCE the instruction.
4211  if (isInstructionTriviallyDead(I, &TLI)) {
4212  eraseInstFromFunction(*I);
4213  ++NumDeadInst;
4214  continue;
4215  }
4216 
4217  if (!DebugCounter::shouldExecute(VisitCounter))
4218  continue;
4219 
4220  // Instruction isn't dead, see if we can constant propagate it.
4221  if (!I->use_empty() &&
4222  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
4223  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
4224  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
4225  << '\n');
4226 
4227  // Add operands to the worklist.
4228  replaceInstUsesWith(*I, C);
4229  ++NumConstProp;
4230  if (isInstructionTriviallyDead(I, &TLI))
4231  eraseInstFromFunction(*I);
4232  MadeIRChange = true;
4233  continue;
4234  }
4235  }
4236 
4237  // See if we can trivially sink this instruction to its user if we can
4238  // prove that the successor is not executed more frequently than our block.
4239  // Return the UserBlock if successful.
4240  auto getOptionalSinkBlockForInst =
4241  [this](Instruction *I) -> Optional<BasicBlock *> {
4242  if (!EnableCodeSinking)
4243  return None;
4244 
4245  BasicBlock *BB = I->getParent();
4246  BasicBlock *UserParent = nullptr;
4247  unsigned NumUsers = 0;
4248 
4249  for (auto *U : I->users()) {
4250  if (U->isDroppable())
4251  continue;
4252  if (NumUsers > MaxSinkNumUsers)
4253  return None;
4254 
4255  Instruction *UserInst = cast<Instruction>(U);
4256  // Special handling for Phi nodes - get the block the use occurs in.
4257  if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4258  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4259  if (PN->getIncomingValue(i) == I) {
4260  // Bail out if we have uses in different blocks. We don't do any
4261  // sophisticated analysis (i.e finding NearestCommonDominator of
4262  // these use blocks).
4263  if (UserParent && UserParent != PN->getIncomingBlock(i))
4264  return None;
4265  UserParent = PN->getIncomingBlock(i);
4266  }
4267  }
4268  assert(UserParent && "expected to find user block!");
4269  } else {
4270  if (UserParent && UserParent != UserInst->getParent())
4271  return None;
4272  UserParent = UserInst->getParent();
4273  }
4274 
4275  // Make sure these checks are done only once, naturally we do the checks
4276  // the first time we get the userparent, this will save compile time.
4277  if (NumUsers == 0) {
4278  // Try sinking to another block. If that block is unreachable, then do
4279  // not bother. SimplifyCFG should handle it.
4280  if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4281  return None;
4282 
4283  auto *Term = UserParent->getTerminator();
4284  // See if the user is one of our successors that has only one
4285  // predecessor, so that we don't have to split the critical edge.
4286  // Another option where we can sink is a block that ends with a
4287  // terminator that does not pass control to other block (such as
4288  // return or unreachable or resume). In this case:
4289  // - I dominates the User (by SSA form);
4290  // - the User will be executed at most once.
4291  // So sinking I down to User is always profitable or neutral.
4292  if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4293  return None;
4294 
4295  assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4296  }
4297 
4298  NumUsers++;
4299  }
4300 
4301  // No user or only has droppable users.
4302  if (!UserParent)
4303  return None;
4304 
4305  return UserParent;
4306  };
4307 
4308  auto OptBB = getOptionalSinkBlockForInst(I);
4309  if (OptBB) {
4310  auto *UserParent = *OptBB;
4311  // Okay, the CFG is simple enough, try to sink this instruction.
4312  if (TryToSinkInstruction(I, UserParent, TLI)) {
4313  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4314  MadeIRChange = true;
4315  // We'll add uses of the sunk instruction below, but since
4316  // sinking can expose opportunities for it's *operands* add
4317  // them to the worklist
4318  for (Use &U : I->operands())
4319  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4320  Worklist.push(OpI);
4321  }
4322  }
4323 
4324  // Now that we have an instruction, try combining it to simplify it.
4325  Builder.SetInsertPoint(I);
4326  Builder.CollectMetadataToCopy(
4327  I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4328 
4329 #ifndef NDEBUG
4330  std::string OrigI;
4331 #endif
4332  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4333  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4334 
4335  if (Instruction *Result = visit(*I)) {
4336  ++NumCombined;
4337  // Should we replace the old instruction with a new one?
4338  if (Result != I) {
4339  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4340  << " New = " << *Result << '\n');
4341 
4342  Result->copyMetadata(*I,
4343  {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4344  // Everything uses the new instruction now.
4345  I->replaceAllUsesWith(Result);
4346 
4347  // Move the name to the new instruction first.
4348  Result->takeName(I);
4349 
4350  // Insert the new instruction into the basic block...
4351  BasicBlock *InstParent = I->getParent();
4352  BasicBlock::iterator InsertPos = I->getIterator();
4353 
4354  // Are we replace a PHI with something that isn't a PHI, or vice versa?
4355  if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4356  // We need to fix up the insertion point.
4357  if (isa<PHINode>(I)) // PHI -> Non-PHI
4358  InsertPos = InstParent->getFirstInsertionPt();
4359  else // Non-PHI -> PHI
4360  InsertPos = InstParent->getFirstNonPHI()->getIterator();
4361  }
4362 
4363  InstParent->getInstList().insert(InsertPos, Result);
4364 
4365  // Push the new instruction and any users onto the worklist.
4366  Worklist.pushUsersToWorkList(*Result);
4367  Worklist.push(Result);
4368 
4369  eraseInstFromFunction(*I);
4370  } else {
4371  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4372  << " New = " << *I << '\n');
4373 
4374  // If the instruction was modified, it's possible that it is now dead.
4375  // if so, remove it.
4376  if (isInstructionTriviallyDead(I, &TLI)) {
4377  eraseInstFromFunction(*I);
4378  } else {
4379  Worklist.pushUsersToWorkList(*I);
4380  Worklist.push(I);
4381  }
4382  }
4383  MadeIRChange = true;
4384  }
4385  }
4386 
4387  Worklist.zap();
4388  return MadeIRChange;
4389 }
4390 
4391 // Track the scopes used by !alias.scope and !noalias. In a function, a
4392 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4393 // by both sets. If not, the declaration of the scope can be safely omitted.
4394 // The MDNode of the scope can be omitted as well for the instructions that are
4395 // part of this function. We do not do that at this point, as this might become
4396 // too time consuming to do.
4398  SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4399  SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4400 
4401 public:
4403  // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4404  if (!I->hasMetadataOtherThanDebugLoc())
4405  return;
4406 
4407  auto Track = [](Metadata *ScopeList, auto &Container) {
4408  const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4409  if (!MDScopeList || !Container.insert(MDScopeList).second)
4410  return;
4411  for (const auto &MDOperand : MDScopeList->operands())
4412  if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4413  Container.insert(MDScope);
4414  };
4415 
4416  Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4417  Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4418  }
4419 
4421  NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4422  if (!Decl)
4423  return false;
4424 
4425  assert(Decl->use_empty() &&
4426  "llvm.experimental.noalias.scope.decl in use ?");
4427  const MDNode *MDSL = Decl->getScopeList();
4428  assert(MDSL->getNumOperands() == 1 &&
4429  "llvm.experimental.noalias.scope should refer to a single scope");
4430  auto &MDOperand = MDSL->getOperand(0);
4431  if (auto *MD = dyn_cast<MDNode>(MDOperand))
4432  return !UsedAliasScopesAndLists.contains(MD) ||
4433  !UsedNoAliasScopesAndLists.contains(MD);
4434 
4435  // Not an MDNode ? throw away.
4436  return true;
4437  }
4438 };
4439 
4440 /// Populate the IC worklist from a function, by walking it in depth-first
4441 /// order and adding all reachable code to the worklist.
4442 ///
4443 /// This has a couple of tricks to make the code faster and more powerful. In
4444 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4445 /// them to the worklist (this significantly speeds up instcombine on code where
4446 /// many instructions are dead or constant). Additionally, if we find a branch
4447 /// whose condition is a known constant, we only visit the reachable successors.
4449  const TargetLibraryInfo *TLI,
4450  InstructionWorklist &ICWorklist) {
4451  bool MadeIRChange = false;
4454  Worklist.push_back(&F.front());
4455 
4456  SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4457  DenseMap<Constant *, Constant *> FoldedConstants;
4458  AliasScopeTracker SeenAliasScopes;
4459 
4460  do {
4461  BasicBlock *BB = Worklist.pop_back_val();
4462 
4463  // We have now visited this block! If we've already been here, ignore it.
4464  if (!Visited.insert(BB).second)
4465  continue;
4466 
4467  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4468  // ConstantProp instruction if trivially constant.
4469  if (!Inst.use_empty() &&
4470  (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4471  if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
4472  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4473  << '\n');
4474  Inst.replaceAllUsesWith(C);
4475  ++NumConstProp;
4476  if (isInstructionTriviallyDead(&Inst, TLI))
4477  Inst.eraseFromParent();
4478  MadeIRChange = true;
4479  continue;
4480  }
4481 
4482  // See if we can constant fold its operands.
4483  for (Use &U : Inst.operands()) {
4484  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4485  continue;
4486 
4487  auto *C = cast<Constant>(U);
4488  Constant *&FoldRes = FoldedConstants[C];
4489  if (!FoldRes)
4490  FoldRes = ConstantFoldConstant(C, DL, TLI);
4491 
4492  if (FoldRes != C) {
4493  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4494  << "\n Old = " << *C
4495  << "\n New = " << *FoldRes << '\n');
4496  U = FoldRes;
4497  MadeIRChange = true;
4498  }
4499  }
4500 
4501  // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4502  // these call instructions consumes non-trivial amount of time and
4503  // provides no value for the optimization.
4504  if (!Inst.isDebugOrPseudoInst()) {
4505  InstrsForInstructionWorklist.push_back(&Inst);
4506  SeenAliasScopes.analyse(&Inst);
4507  }
4508  }
4509 
4510  // Recursively visit successors. If this is a branch or switch on a
4511  // constant, only visit the reachable successor.
4512  Instruction *TI = BB->getTerminator();
4513  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
4514  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
4515  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
4516  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
4517  Worklist.push_back(ReachableBB);
4518  continue;
4519  }
4520  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4521  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4522  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
4523  continue;
4524  }
4525  }
4526 
4527  append_range(Worklist, successors(TI));
4528  } while (!Worklist.empty());
4529 
4530  // Remove instructions inside unreachable blocks. This prevents the
4531  // instcombine code from having to deal with some bad special cases, and
4532  // reduces use counts of instructions.
4533  for (BasicBlock &BB : F) {
4534  if (Visited.count(&BB))
4535  continue;
4536 
4537  unsigned NumDeadInstInBB;
4538  unsigned NumDeadDbgInstInBB;
4539  std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4541 
4542  MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4543  NumDeadInst += NumDeadInstInBB;
4544  }
4545 
4546  // Once we've found all of the instructions to add to instcombine's worklist,
4547  // add them in reverse order. This way instcombine will visit from the top
4548  // of the function down. This jives well with the way that it adds all uses
4549  // of instructions to the worklist after doing a transformation, thus avoiding
4550  // some N^2 behavior in pathological cases.
4551  ICWorklist.reserve(InstrsForInstructionWorklist.size());
4552  for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4553  // DCE instruction if trivially dead. As we iterate in reverse program
4554  // order here, we will clean up whole chains of dead instructions.
4555  if (isInstructionTriviallyDead(Inst, TLI) ||
4556  SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4557  ++NumDeadInst;
4558  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4559  salvageDebugInfo(*Inst);
4560  Inst->eraseFromParent();
4561  MadeIRChange = true;
4562  continue;
4563  }
4564 
4565  ICWorklist.push(Inst);
4566  }
4567 
4568  return MadeIRChange;
4569 }
4570 
4575  ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4576  auto &DL = F.getParent()->getDataLayout();
4577  MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
4578 
4579  /// Builder - This is an IRBuilder that automatically inserts new
4580  /// instructions into the worklist when they are created.
4582  F.getContext(), TargetFolder(DL),
4583  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4584  Worklist.add(I);
4585  if (auto *Assume = dyn_cast<AssumeInst>(I))
4586  AC.registerAssumption(Assume);
4587  }));
4588 
4589  // Lower dbg.declare intrinsics otherwise their value may be clobbered
4590  // by instcombiner.
4591  bool MadeIRChange = false;
4593  MadeIRChange = LowerDbgDeclare(F);
4594 
4595  // Iterate while there is work to do.
4596  unsigned Iteration = 0;
4597  while (true) {
4598  ++NumWorklistIterations;
4599  ++Iteration;
4600 
4601  if (Iteration > InfiniteLoopDetectionThreshold) {
4603  "Instruction Combining seems stuck in an infinite loop after " +
4604  Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4605  }
4606 
4607  if (Iteration > MaxIterations) {
4608  LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4609  << " on " << F.getName()
4610  << " reached; stopping before reaching a fixpoint\n");
4611  break;
4612  }
4613 
4614  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4615  << F.getName() << "\n");
4616 
4617  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4618 
4619  InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4620  ORE, BFI, PSI, DL, LI);
4622 
4623  if (!IC.run())
4624  break;
4625 
4626  MadeIRChange = true;
4627  }
4628 
4629  return MadeIRChange;
4630 }
4631 
4633 
4634 InstCombinePass::InstCombinePass(unsigned MaxIterations)
4635  : MaxIterations(MaxIterations) {}
4636 
4639  auto &AC = AM.getResult<AssumptionAnalysis>(F);
4640  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4641  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4643  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4644 
4645  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4646 
4647  auto *AA = &AM.getResult<AAManager>(F);
4648  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4649  ProfileSummaryInfo *PSI =
4650  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4651  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4652  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4653 
4654  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4655  BFI, PSI, MaxIterations, LI))
4656  // No changes, all analyses are preserved.
4657  return PreservedAnalyses::all();
4658 
4659  // Mark all the analyses that instcombine updates as preserved.
4660  PreservedAnalyses PA;
4661  PA.preserveSet<CFGAnalyses>();
4662  return PA;
4663 }
4664 
4666  AU.setPreservesCFG();
4679 }
4680 
4682  if (skipFunction(F))
4683  return false;
4684 
4685  // Required analyses.
4686  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4687  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4688  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4689  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4690  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4691  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4692 
4693  // Optional analyses.
4694  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4695  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4696  ProfileSummaryInfo *PSI =
4697  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4699  (PSI && PSI->hasProfileSummary()) ?
4700  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4701  nullptr;
4702 
4703  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4704  BFI, PSI, MaxIterations, LI);
4705 }
4706 
4708 
4710  : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
4712 }
4713 
4715  : FunctionPass(ID), MaxIterations(MaxIterations) {
4717 }
4718 
4720  "Combine redundant instructions", false, false)
4732 
4733 // Initialization Routines
4736 }
4737 
4740 }
4741 
4743  return new InstructionCombiningPass();
4744 }
4745 
4747  return new InstructionCombiningPass(MaxIterations);
4748 }
4749 
4752 }
llvm::EHPersonality::MSVC_CXX
@ MSVC_CXX
llvm::NoAliasScopeDeclInst
Definition: IntrinsicInst.h:1341
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::ExtractValueInst::hasIndices
bool hasIndices() const
Definition: Instructions.h:2519
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:167
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1056
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1260
foldSelectGEP
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
Definition: InstructionCombining.cpp:1974
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:269
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2570
llvm::LandingPadInst::Create
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
Definition: Instructions.cpp:215
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:781
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1834
llvm::InstCombinerImpl::tryFactorization
Value * tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, Value *, Value *, Value *)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:635
llvm::getReallocatedOperand
Value * getReallocatedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this is a call to a realloc function, return the reallocated operand.
Definition: MemoryBuiltins.cpp:350
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::simplifyGEPInst
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4723
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1926
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
prepareICWorklistFromFunction
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
Definition: InstructionCombining.cpp:4448
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:723
llvm::LandingPadInst::isCleanup
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
Definition: Instructions.h:2998
llvm::TargetTransformInfo::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:308
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:182
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3855
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
llvm::InstCombinerImpl::visitGetElementPtrInst
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Definition: InstructionCombining.cpp:2291
foldOperationIntoSelectOperand
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:1037
llvm::CastInst::CreatePointerBitCastOrAddrSpaceCast
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Definition: Instructions.cpp:3379
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConvertDebugDeclareToDebugValue
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1515
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:347
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:268
llvm::AssumptionCache::registerAssumption
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition: AssumptionCache.cpp:224
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3052
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::EHPersonality::GNU_C_SjLj
@ GNU_C_SjLj
leftDistributesOverRight
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
Definition: InstructionCombining.cpp:567
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2785
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:215
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::Instruction::getInsertionPointAfterDef
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
Definition: Instruction.cpp:119
llvm::InstCombinerImpl::visitBranchInst
Instruction * visitBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3191
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::InstructionCombiningPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: InstructionCombining.cpp:4665
llvm::InstCombinerImpl::run
bool run()
Run the combiner over the entire worklist until it is empty.
Definition: InstructionCombining.cpp:4189
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::EHPersonality::GNU_Ada
@ GNU_Ada
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::DIBuilder
Definition: DIBuilder.h:41
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::InstCombiner::targetInstCombineIntrinsic
Optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
Definition: InstructionCombining.cpp:171
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:218
llvm::InstCombinerImpl::foldBinOpIntoSelectOrPhi
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
Definition: InstructionCombining.cpp:1403
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::Attribute
Definition: Attributes.h:65
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1100
TargetFolder.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
llvm::PatternMatch::m_SplatOrUndefMask
Definition: PatternMatch.h:1535
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5256
llvm::InstCombinerImpl::visitExtractValueInst
Instruction * visitExtractValueInst(ExtractValueInst &EV)
Definition: InstructionCombining.cpp:3346
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
GetElementPtrTypeIterator.h
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5442
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:314
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1529
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2951
llvm::InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic
Optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: InstructionCombining.cpp:190
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2068
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:173
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
LazyBlockFrequencyInfo.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:710
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2596
getBinOpsForFactorization
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Definition: InstructionCombining.cpp:616
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3250
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::findDbgUsers
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:101
llvm::ConstantRange::getEquivalentICmp
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
Definition: ConstantRange.cpp:234
ValueTracking.h
Local.h
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::InstructionWorklist::reserve
void reserve(size_t Size)
Definition: InstructionWorklist.h:79
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::NoAliasScopeDeclInst::getScopeList
MDNode * getScopeList() const
Definition: IntrinsicInst.h:1351
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
llvm::InstCombiner::MaxArraySizeForCombine
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:53
APInt.h
llvm::InstCombinerImpl::freezeOtherUses
bool freezeOtherUses(FreezeInst &FI)
Definition: InstructionCombining.cpp:3927
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::InstructionCombiningPass::InstructionCombiningPass
InstructionCombiningPass()
Definition: InstructionCombining.cpp:4709
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:31
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
llvm::InstCombinerImpl::visitAllocSite
Instruction * visitAllocSite(Instruction &FI)
Definition: InstructionCombining.cpp:2862
MemoryBuiltins.h
llvm::AttributeList
Definition: Attributes.h:425
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1474
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:171
EHPersonalities.h
InfiniteLoopDetectionThreshold
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition: Instructions.h:2497
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:961
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:3105
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InstCombinerImpl::Descale
Value * Descale(Value *Val, APInt Scale, bool &NoSignedWrap)
Returns a value X such that Val = X * Scale, or null if none.
Definition: InstructionCombining.cpp:1451
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:285
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
LLVMInitializeInstCombine
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
Definition: InstructionCombining.cpp:4738
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2862
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2289
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
BaseType
llvm::succ_empty
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
isRemovableWrite
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
Definition: InstructionCombining.cpp:2738
BasicAliasAnalysis.h
LegacyPassManager.h
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:387
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
Use.h
llvm::EHPersonality::GNU_ObjC
@ GNU_ObjC
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:402
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::InstCombinePass::InstCombinePass
InstCombinePass()
Definition: InstructionCombining.cpp:4632
llvm::InstructionWorklist::add
void add(Instruction *I)
Add instruction to the worklist.
Definition: InstructionWorklist.h:44
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1327
llvm::InstCombinerImpl::SimplifySelectsFeedingBinaryOp
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
Definition: InstructionCombining.cpp:855
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::initializeInstCombine
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
Definition: InstructionCombining.cpp:4734
KnownBits.h
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55