LLVM  17.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/Statistic.h"
48 #include "llvm/Analysis/CFG.h"
53 #include "llvm/Analysis/LoopInfo.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/CFG.h"
65 #include "llvm/IR/Constant.h"
66 #include "llvm/IR/Constants.h"
67 #include "llvm/IR/DIBuilder.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugInfo.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Dominators.h"
73 #include "llvm/IR/Function.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InstrTypes.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/Metadata.h"
83 #include "llvm/IR/Operator.h"
84 #include "llvm/IR/PassManager.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/Use.h"
88 #include "llvm/IR/User.h"
89 #include "llvm/IR/Value.h"
90 #include "llvm/IR/ValueHandle.h"
91 #include "llvm/InitializePasses.h"
92 #include "llvm/Support/Casting.h"
94 #include "llvm/Support/Compiler.h"
95 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
103 #include <algorithm>
104 #include <cassert>
105 #include <cstdint>
106 #include <memory>
107 #include <optional>
108 #include <string>
109 #include <utility>
110 
111 #define DEBUG_TYPE "instcombine"
113 #include <optional>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 STATISTIC(NumWorklistIterations,
119  "Number of instruction combining iterations performed");
120 
121 STATISTIC(NumCombined , "Number of insts combined");
122 STATISTIC(NumConstProp, "Number of constant folds");
123 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
124 STATISTIC(NumSunkInst , "Number of instructions sunk");
125 STATISTIC(NumExpand, "Number of expansions");
126 STATISTIC(NumFactor , "Number of factorizations");
127 STATISTIC(NumReassoc , "Number of reassociations");
128 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
129  "Controls which instructions are visited");
130 
131 // FIXME: these limits eventually should be as low as 2.
132 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
133 #ifndef NDEBUG
134 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
135 #else
136 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
137 #endif
138 
139 static cl::opt<bool>
140 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
141  cl::init(true));
142 
144  "instcombine-max-sink-users", cl::init(32),
145  cl::desc("Maximum number of undroppable users for instruction sinking"));
146 
148  "instcombine-max-iterations",
149  cl::desc("Limit the maximum number of instruction combining iterations"),
151 
153  "instcombine-infinite-loop-threshold",
154  cl::desc("Number of instruction combining iterations considered an "
155  "infinite loop"),
157 
158 static cl::opt<unsigned>
159 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
160  cl::desc("Maximum array size considered when doing a combine"));
161 
162 // FIXME: Remove this flag when it is no longer necessary to convert
163 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
164 // increases variable availability at the cost of accuracy. Variables that
165 // cannot be promoted by mem2reg or SROA will be described as living in memory
166 // for their entire lifetime. However, passes like DSE and instcombine can
167 // delete stores to the alloca, leading to misleading and inaccurate debug
168 // information. This flag can be removed when those passes are fixed.
169 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
170  cl::Hidden, cl::init(true));
171 
172 std::optional<Instruction *>
174  // Handle target specific intrinsics
175  if (II.getCalledFunction()->isTargetIntrinsic()) {
176  return TTI.instCombineIntrinsic(*this, II);
177  }
178  return std::nullopt;
179 }
180 
182  IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
183  bool &KnownBitsComputed) {
184  // Handle target specific intrinsics
185  if (II.getCalledFunction()->isTargetIntrinsic()) {
186  return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
187  KnownBitsComputed);
188  }
189  return std::nullopt;
190 }
191 
193  IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
194  APInt &UndefElts3,
195  std::function<void(Instruction *, unsigned, APInt, APInt &)>
196  SimplifyAndSetOp) {
197  // Handle target specific intrinsics
198  if (II.getCalledFunction()->isTargetIntrinsic()) {
200  *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
201  SimplifyAndSetOp);
202  }
203  return std::nullopt;
204 }
205 
206 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
207  return llvm::emitGEPOffset(&Builder, DL, GEP);
208 }
209 
210 /// Legal integers and common types are considered desirable. This is used to
211 /// avoid creating instructions with types that may not be supported well by the
212 /// the backend.
213 /// NOTE: This treats i8, i16 and i32 specially because they are common
214 /// types in frontend languages.
215 bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
216  switch (BitWidth) {
217  case 8:
218  case 16:
219  case 32:
220  return true;
221  default:
222  return DL.isLegalInteger(BitWidth);
223  }
224 }
225 
226 /// Return true if it is desirable to convert an integer computation from a
227 /// given bit width to a new bit width.
228 /// We don't want to convert from a legal or desirable type (like i8) to an
229 /// illegal type or from a smaller to a larger illegal type. A width of '1'
230 /// is always treated as a desirable type because i1 is a fundamental type in
231 /// IR, and there are many specialized optimizations for i1 types.
232 /// Common/desirable widths are equally treated as legal to convert to, in
233 /// order to open up more combining opportunities.
234 bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
235  unsigned ToWidth) const {
236  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
237  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
238 
239  // Convert to desirable widths even if they are not legal types.
240  // Only shrink types, to prevent infinite loops.
241  if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
242  return true;
243 
244  // If this is a legal or desiable integer from type, and the result would be
245  // an illegal type, don't do the transformation.
246  if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
247  return false;
248 
249  // Otherwise, if both are illegal, do not increase the size of the result. We
250  // do allow things like i160 -> i64, but not i64 -> i160.
251  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
252  return false;
253 
254  return true;
255 }
256 
257 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
258 /// We don't want to convert from a legal to an illegal type or from a smaller
259 /// to a larger illegal type. i1 is always treated as a legal type because it is
260 /// a fundamental type in IR, and there are many specialized optimizations for
261 /// i1 types.
262 bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
263  // TODO: This could be extended to allow vectors. Datalayout changes might be
264  // needed to properly support that.
265  if (!From->isIntegerTy() || !To->isIntegerTy())
266  return false;
267 
268  unsigned FromWidth = From->getPrimitiveSizeInBits();
269  unsigned ToWidth = To->getPrimitiveSizeInBits();
270  return shouldChangeType(FromWidth, ToWidth);
271 }
272 
273 // Return true, if No Signed Wrap should be maintained for I.
274 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
275 // where both B and C should be ConstantInts, results in a constant that does
276 // not overflow. This function only handles the Add and Sub opcodes. For
277 // all other opcodes, the function conservatively returns false.
279  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
280  if (!OBO || !OBO->hasNoSignedWrap())
281  return false;
282 
283  // We reason about Add and Sub Only.
284  Instruction::BinaryOps Opcode = I.getOpcode();
285  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
286  return false;
287 
288  const APInt *BVal, *CVal;
289  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
290  return false;
291 
292  bool Overflow = false;
293  if (Opcode == Instruction::Add)
294  (void)BVal->sadd_ov(*CVal, Overflow);
295  else
296  (void)BVal->ssub_ov(*CVal, Overflow);
297 
298  return !Overflow;
299 }
300 
302  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
303  return OBO && OBO->hasNoUnsignedWrap();
304 }
305 
307  auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
308  return OBO && OBO->hasNoSignedWrap();
309 }
310 
311 /// Conservatively clears subclassOptionalData after a reassociation or
312 /// commutation. We preserve fast-math flags when applicable as they can be
313 /// preserved.
315  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
316  if (!FPMO) {
317  I.clearSubclassOptionalData();
318  return;
319  }
320 
321  FastMathFlags FMF = I.getFastMathFlags();
322  I.clearSubclassOptionalData();
323  I.setFastMathFlags(FMF);
324 }
325 
326 /// Combine constant operands of associative operations either before or after a
327 /// cast to eliminate one of the associative operations:
328 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
329 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
331  InstCombinerImpl &IC) {
332  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
333  if (!Cast || !Cast->hasOneUse())
334  return false;
335 
336  // TODO: Enhance logic for other casts and remove this check.
337  auto CastOpcode = Cast->getOpcode();
338  if (CastOpcode != Instruction::ZExt)
339  return false;
340 
341  // TODO: Enhance logic for other BinOps and remove this check.
342  if (!BinOp1->isBitwiseLogicOp())
343  return false;
344 
345  auto AssocOpcode = BinOp1->getOpcode();
346  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
347  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
348  return false;
349 
350  Constant *C1, *C2;
351  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
352  !match(BinOp2->getOperand(1), m_Constant(C2)))
353  return false;
354 
355  // TODO: This assumes a zext cast.
356  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
357  // to the destination type might lose bits.
358 
359  // Fold the constants together in the destination type:
360  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
361  Type *DestTy = C1->getType();
362  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
363  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
364  IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
365  IC.replaceOperand(*BinOp1, 1, FoldedC);
366  return true;
367 }
368 
369 // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
370 // inttoptr ( ptrtoint (x) ) --> x
371 Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
372  auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
373  if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
374  DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
375  auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
376  Type *CastTy = IntToPtr->getDestTy();
377  if (PtrToInt &&
378  CastTy->getPointerAddressSpace() ==
379  PtrToInt->getSrcTy()->getPointerAddressSpace() &&
380  DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
381  DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
382  return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
383  "", PtrToInt);
384  }
385  }
386  return nullptr;
387 }
388 
389 /// This performs a few simplifications for operators that are associative or
390 /// commutative:
391 ///
392 /// Commutative operators:
393 ///
394 /// 1. Order operands such that they are listed from right (least complex) to
395 /// left (most complex). This puts constants before unary operators before
396 /// binary operators.
397 ///
398 /// Associative operators:
399 ///
400 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
401 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
402 ///
403 /// Associative and commutative operators:
404 ///
405 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
406 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
407 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
408 /// if C1 and C2 are constants.
410  Instruction::BinaryOps Opcode = I.getOpcode();
411  bool Changed = false;
412 
413  do {
414  // Order operands such that they are listed from right (least complex) to
415  // left (most complex). This puts constants before unary operators before
416  // binary operators.
417  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
418  getComplexity(I.getOperand(1)))
419  Changed = !I.swapOperands();
420 
421  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
422  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
423 
424  if (I.isAssociative()) {
425  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
426  if (Op0 && Op0->getOpcode() == Opcode) {
427  Value *A = Op0->getOperand(0);
428  Value *B = Op0->getOperand(1);
429  Value *C = I.getOperand(1);
430 
431  // Does "B op C" simplify?
432  if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
433  // It simplifies to V. Form "A op V".
434  replaceOperand(I, 0, A);
435  replaceOperand(I, 1, V);
436  bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
437  bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
438 
439  // Conservatively clear all optional flags since they may not be
440  // preserved by the reassociation. Reset nsw/nuw based on the above
441  // analysis.
443 
444  // Note: this is only valid because SimplifyBinOp doesn't look at
445  // the operands to Op0.
446  if (IsNUW)
447  I.setHasNoUnsignedWrap(true);
448 
449  if (IsNSW)
450  I.setHasNoSignedWrap(true);
451 
452  Changed = true;
453  ++NumReassoc;
454  continue;
455  }
456  }
457 
458  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
459  if (Op1 && Op1->getOpcode() == Opcode) {
460  Value *A = I.getOperand(0);
461  Value *B = Op1->getOperand(0);
462  Value *C = Op1->getOperand(1);
463 
464  // Does "A op B" simplify?
465  if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
466  // It simplifies to V. Form "V op C".
467  replaceOperand(I, 0, V);
468  replaceOperand(I, 1, C);
469  // Conservatively clear the optional flags, since they may not be
470  // preserved by the reassociation.
472  Changed = true;
473  ++NumReassoc;
474  continue;
475  }
476  }
477  }
478 
479  if (I.isAssociative() && I.isCommutative()) {
480  if (simplifyAssocCastAssoc(&I, *this)) {
481  Changed = true;
482  ++NumReassoc;
483  continue;
484  }
485 
486  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
487  if (Op0 && Op0->getOpcode() == Opcode) {
488  Value *A = Op0->getOperand(0);
489  Value *B = Op0->getOperand(1);
490  Value *C = I.getOperand(1);
491 
492  // Does "C op A" simplify?
493  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
494  // It simplifies to V. Form "V op B".
495  replaceOperand(I, 0, V);
496  replaceOperand(I, 1, B);
497  // Conservatively clear the optional flags, since they may not be
498  // preserved by the reassociation.
500  Changed = true;
501  ++NumReassoc;
502  continue;
503  }
504  }
505 
506  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
507  if (Op1 && Op1->getOpcode() == Opcode) {
508  Value *A = I.getOperand(0);
509  Value *B = Op1->getOperand(0);
510  Value *C = Op1->getOperand(1);
511 
512  // Does "C op A" simplify?
513  if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
514  // It simplifies to V. Form "B op V".
515  replaceOperand(I, 0, B);
516  replaceOperand(I, 1, V);
517  // Conservatively clear the optional flags, since they may not be
518  // preserved by the reassociation.
520  Changed = true;
521  ++NumReassoc;
522  continue;
523  }
524  }
525 
526  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
527  // if C1 and C2 are constants.
528  Value *A, *B;
529  Constant *C1, *C2, *CRes;
530  if (Op0 && Op1 &&
531  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
532  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
533  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
534  (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
535  bool IsNUW = hasNoUnsignedWrap(I) &&
536  hasNoUnsignedWrap(*Op0) &&
537  hasNoUnsignedWrap(*Op1);
538  BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
539  BinaryOperator::CreateNUW(Opcode, A, B) :
540  BinaryOperator::Create(Opcode, A, B);
541 
542  if (isa<FPMathOperator>(NewBO)) {
543  FastMathFlags Flags = I.getFastMathFlags();
544  Flags &= Op0->getFastMathFlags();
545  Flags &= Op1->getFastMathFlags();
546  NewBO->setFastMathFlags(Flags);
547  }
548  InsertNewInstWith(NewBO, I);
549  NewBO->takeName(Op1);
550  replaceOperand(I, 0, NewBO);
551  replaceOperand(I, 1, CRes);
552  // Conservatively clear the optional flags, since they may not be
553  // preserved by the reassociation.
555  if (IsNUW)
556  I.setHasNoUnsignedWrap(true);
557 
558  Changed = true;
559  continue;
560  }
561  }
562 
563  // No further simplifications.
564  return Changed;
565  } while (true);
566 }
567 
568 /// Return whether "X LOp (Y ROp Z)" is always equal to
569 /// "(X LOp Y) ROp (X LOp Z)".
572  // X & (Y | Z) <--> (X & Y) | (X & Z)
573  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
574  if (LOp == Instruction::And)
575  return ROp == Instruction::Or || ROp == Instruction::Xor;
576 
577  // X | (Y & Z) <--> (X | Y) & (X | Z)
578  if (LOp == Instruction::Or)
579  return ROp == Instruction::And;
580 
581  // X * (Y + Z) <--> (X * Y) + (X * Z)
582  // X * (Y - Z) <--> (X * Y) - (X * Z)
583  if (LOp == Instruction::Mul)
584  return ROp == Instruction::Add || ROp == Instruction::Sub;
585 
586  return false;
587 }
588 
589 /// Return whether "(X LOp Y) ROp Z" is always equal to
590 /// "(X ROp Z) LOp (Y ROp Z)".
594  return leftDistributesOverRight(ROp, LOp);
595 
596  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
598 
599  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
600  // but this requires knowing that the addition does not overflow and other
601  // such subtleties.
602 }
603 
604 /// This function returns identity value for given opcode, which can be used to
605 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
607  if (isa<Constant>(V))
608  return nullptr;
609 
610  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
611 }
612 
613 /// This function predicates factorization using distributive laws. By default,
614 /// it just returns the 'Op' inputs. But for special-cases like
615 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
616 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
617 /// allow more factorization opportunities.
620  Value *&LHS, Value *&RHS) {
621  assert(Op && "Expected a binary operator");
622  LHS = Op->getOperand(0);
623  RHS = Op->getOperand(1);
624  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
625  Constant *C;
626  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
627  // X << C --> X * (1 << C)
628  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
629  return Instruction::Mul;
630  }
631  // TODO: We can add other conversions e.g. shr => div etc.
632  }
633  return Op->getOpcode();
634 }
635 
636 /// This tries to simplify binary operations by factorizing out common terms
637 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
640  Instruction::BinaryOps InnerOpcode, Value *A,
641  Value *B, Value *C, Value *D) {
642  assert(A && B && C && D && "All values must be provided");
643 
644  Value *V = nullptr;
645  Value *RetVal = nullptr;
646  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
647  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
648 
649  // Does "X op' Y" always equal "Y op' X"?
650  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
651 
652  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
653  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
654  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
655  // commutative case, "(A op' B) op (C op' A)"?
656  if (A == C || (InnerCommutative && A == D)) {
657  if (A != C)
658  std::swap(C, D);
659  // Consider forming "A op' (B op D)".
660  // If "B op D" simplifies then it can be formed with no cost.
661  V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
662 
663  // If "B op D" doesn't simplify then only go on if one of the existing
664  // operations "A op' B" and "C op' D" will be zapped as no longer used.
665  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
666  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
667  if (V)
668  RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
669  }
670  }
671 
672  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
673  if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
674  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
675  // commutative case, "(A op' B) op (B op' D)"?
676  if (B == D || (InnerCommutative && B == C)) {
677  if (B != D)
678  std::swap(C, D);
679  // Consider forming "(A op C) op' B".
680  // If "A op C" simplifies then it can be formed with no cost.
681  V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
682 
683  // If "A op C" doesn't simplify then only go on if one of the existing
684  // operations "A op' B" and "C op' D" will be zapped as no longer used.
685  if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
686  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
687  if (V)
688  RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
689  }
690  }
691 
692  if (!RetVal)
693  return nullptr;
694 
695  ++NumFactor;
696  RetVal->takeName(&I);
697 
698  // Try to add no-overflow flags to the final value.
699  if (isa<OverflowingBinaryOperator>(RetVal)) {
700  bool HasNSW = false;
701  bool HasNUW = false;
702  if (isa<OverflowingBinaryOperator>(&I)) {
703  HasNSW = I.hasNoSignedWrap();
704  HasNUW = I.hasNoUnsignedWrap();
705  }
706  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
707  HasNSW &= LOBO->hasNoSignedWrap();
708  HasNUW &= LOBO->hasNoUnsignedWrap();
709  }
710 
711  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
712  HasNSW &= ROBO->hasNoSignedWrap();
713  HasNUW &= ROBO->hasNoUnsignedWrap();
714  }
715 
716  if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
717  // We can propagate 'nsw' if we know that
718  // %Y = mul nsw i16 %X, C
719  // %Z = add nsw i16 %Y, %X
720  // =>
721  // %Z = mul nsw i16 %X, C+1
722  //
723  // iff C+1 isn't INT_MIN
724  const APInt *CInt;
725  if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
726  cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
727 
728  // nuw can be propagated with any constant or nuw value.
729  cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
730  }
731  }
732  return RetVal;
733 }
734 
736  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
737  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
738  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
739  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
740  Value *A, *B, *C, *D;
741  Instruction::BinaryOps LHSOpcode, RHSOpcode;
742 
743  if (Op0)
744  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
745  if (Op1)
746  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
747 
748  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
749  // a common term.
750  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
751  if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
752  return V;
753 
754  // The instruction has the form "(A op' B) op (C)". Try to factorize common
755  // term.
756  if (Op0)
757  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
758  if (Value *V =
759  tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
760  return V;
761 
762  // The instruction has the form "(B) op (C op' D)". Try to factorize common
763  // term.
764  if (Op1)
765  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
766  if (Value *V =
767  tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
768  return V;
769 
770  return nullptr;
771 }
772 
773 /// This tries to simplify binary operations which some other binary operation
774 /// distributes over either by factorizing out common terms
775 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
776 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
777 /// Returns the simplified value, or null if it didn't simplify.
779  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
780  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
781  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
782  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
783 
784  // Factorization.
785  if (Value *R = tryFactorizationFolds(I))
786  return R;
787 
788  // Expansion.
789  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
790  // The instruction has the form "(A op' B) op C". See if expanding it out
791  // to "(A op C) op' (B op C)" results in simplifications.
792  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
793  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
794 
795  // Disable the use of undef because it's not safe to distribute undef.
796  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
797  Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
798  Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
799 
800  // Do "A op C" and "B op C" both simplify?
801  if (L && R) {
802  // They do! Return "L op' R".
803  ++NumExpand;
804  C = Builder.CreateBinOp(InnerOpcode, L, R);
805  C->takeName(&I);
806  return C;
807  }
808 
809  // Does "A op C" simplify to the identity value for the inner opcode?
810  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
811  // They do! Return "B op C".
812  ++NumExpand;
813  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
814  C->takeName(&I);
815  return C;
816  }
817 
818  // Does "B op C" simplify to the identity value for the inner opcode?
819  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
820  // They do! Return "A op C".
821  ++NumExpand;
822  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
823  C->takeName(&I);
824  return C;
825  }
826  }
827 
828  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
829  // The instruction has the form "A op (B op' C)". See if expanding it out
830  // to "(A op B) op' (A op C)" results in simplifications.
831  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
832  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
833 
834  // Disable the use of undef because it's not safe to distribute undef.
835  auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
836  Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
837  Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
838 
839  // Do "A op B" and "A op C" both simplify?
840  if (L && R) {
841  // They do! Return "L op' R".
842  ++NumExpand;
843  A = Builder.CreateBinOp(InnerOpcode, L, R);
844  A->takeName(&I);
845  return A;
846  }
847 
848  // Does "A op B" simplify to the identity value for the inner opcode?
849  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
850  // They do! Return "A op C".
851  ++NumExpand;
852  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
853  A->takeName(&I);
854  return A;
855  }
856 
857  // Does "A op C" simplify to the identity value for the inner opcode?
858  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
859  // They do! Return "A op B".
860  ++NumExpand;
861  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
862  A->takeName(&I);
863  return A;
864  }
865  }
866 
867  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
868 }
869 
871  Value *LHS,
872  Value *RHS) {
873  Value *A, *B, *C, *D, *E, *F;
874  bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
875  bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
876  if (!LHSIsSelect && !RHSIsSelect)
877  return nullptr;
878 
879  FastMathFlags FMF;
881  if (isa<FPMathOperator>(&I)) {
882  FMF = I.getFastMathFlags();
883  Builder.setFastMathFlags(FMF);
884  }
885 
886  Instruction::BinaryOps Opcode = I.getOpcode();
888 
889  Value *Cond, *True = nullptr, *False = nullptr;
890 
891  // Special-case for add/negate combination. Replace the zero in the negation
892  // with the trailing add operand:
893  // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
894  // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
895  auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
896  // We need an 'add' and exactly 1 arm of the select to have been simplified.
897  if (Opcode != Instruction::Add || (!True && !False) || (True && False))
898  return nullptr;
899 
900  Value *N;
901  if (True && match(FVal, m_Neg(m_Value(N)))) {
902  Value *Sub = Builder.CreateSub(Z, N);
903  return Builder.CreateSelect(Cond, True, Sub, I.getName());
904  }
905  if (False && match(TVal, m_Neg(m_Value(N)))) {
906  Value *Sub = Builder.CreateSub(Z, N);
907  return Builder.CreateSelect(Cond, Sub, False, I.getName());
908  }
909  return nullptr;
910  };
911 
912  if (LHSIsSelect && RHSIsSelect && A == D) {
913  // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
914  Cond = A;
915  True = simplifyBinOp(Opcode, B, E, FMF, Q);
916  False = simplifyBinOp(Opcode, C, F, FMF, Q);
917 
918  if (LHS->hasOneUse() && RHS->hasOneUse()) {
919  if (False && !True)
920  True = Builder.CreateBinOp(Opcode, B, E);
921  else if (True && !False)
922  False = Builder.CreateBinOp(Opcode, C, F);
923  }
924  } else if (LHSIsSelect && LHS->hasOneUse()) {
925  // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
926  Cond = A;
927  True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
928  False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
929  if (Value *NewSel = foldAddNegate(B, C, RHS))
930  return NewSel;
931  } else if (RHSIsSelect && RHS->hasOneUse()) {
932  // X op (D ? E : F) -> D ? (X op E) : (X op F)
933  Cond = D;
934  True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
935  False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
936  if (Value *NewSel = foldAddNegate(E, F, LHS))
937  return NewSel;
938  }
939 
940  if (!True || !False)
941  return nullptr;
942 
943  Value *SI = Builder.CreateSelect(Cond, True, False);
944  SI->takeName(&I);
945  return SI;
946 }
947 
948 /// Freely adapt every user of V as-if V was changed to !V.
949 /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
950 void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
951  for (User *U : make_early_inc_range(I->users())) {
952  if (U == IgnoredUser)
953  continue; // Don't consider this user.
954  switch (cast<Instruction>(U)->getOpcode()) {
955  case Instruction::Select: {
956  auto *SI = cast<SelectInst>(U);
957  SI->swapValues();
958  SI->swapProfMetadata();
959  break;
960  }
961  case Instruction::Br:
962  cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
963  break;
964  case Instruction::Xor:
965  replaceInstUsesWith(cast<Instruction>(*U), I);
966  break;
967  default:
968  llvm_unreachable("Got unexpected user - out of sync with "
969  "canFreelyInvertAllUsersOf() ?");
970  }
971  }
972 }
973 
974 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
975 /// constant zero (which is the 'negate' form).
976 Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
977  Value *NegV;
978  if (match(V, m_Neg(m_Value(NegV))))
979  return NegV;
980 
981  // Constants can be considered to be negated values if they can be folded.
982  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
983  return ConstantExpr::getNeg(C);
984 
985  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
986  if (C->getType()->getElementType()->isIntegerTy())
987  return ConstantExpr::getNeg(C);
988 
989  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
990  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
991  Constant *Elt = CV->getAggregateElement(i);
992  if (!Elt)
993  return nullptr;
994 
995  if (isa<UndefValue>(Elt))
996  continue;
997 
998  if (!isa<ConstantInt>(Elt))
999  return nullptr;
1000  }
1001  return ConstantExpr::getNeg(CV);
1002  }
1003 
1004  // Negate integer vector splats.
1005  if (auto *CV = dyn_cast<Constant>(V))
1006  if (CV->getType()->isVectorTy() &&
1007  CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1008  return ConstantExpr::getNeg(CV);
1009 
1010  return nullptr;
1011 }
1012 
1013 /// A binop with a constant operand and a sign-extended boolean operand may be
1014 /// converted into a select of constants by applying the binary operation to
1015 /// the constant with the two possible values of the extended boolean (0 or -1).
1016 Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1017  // TODO: Handle non-commutative binop (constant is operand 0).
1018  // TODO: Handle zext.
1019  // TODO: Peek through 'not' of cast.
1020  Value *BO0 = BO.getOperand(0);
1021  Value *BO1 = BO.getOperand(1);
1022  Value *X;
1023  Constant *C;
1024  if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1025  !X->getType()->isIntOrIntVectorTy(1))
1026  return nullptr;
1027 
1028  // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1031  Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1032  Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1033  return SelectInst::Create(X, TVal, FVal);
1034 }
1035 
1037  Instruction &I, SelectInst *SI, Value *SO) {
1038  auto *ConstSO = dyn_cast<Constant>(SO);
1039  if (!ConstSO)
1040  return nullptr;
1041 
1042  SmallVector<Constant *> ConstOps;
1043  for (Value *Op : I.operands()) {
1044  if (Op == SI)
1045  ConstOps.push_back(ConstSO);
1046  else if (auto *C = dyn_cast<Constant>(Op))
1047  ConstOps.push_back(C);
1048  else
1049  llvm_unreachable("Operands should be select or constant");
1050  }
1051  return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
1052 }
1053 
1056  if (auto *Cast = dyn_cast<CastInst>(&I))
1057  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
1058 
1059  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1060  assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
1061  "Expected constant-foldable intrinsic");
1062  Intrinsic::ID IID = II->getIntrinsicID();
1063  if (II->arg_size() == 1)
1064  return Builder.CreateUnaryIntrinsic(IID, SO);
1065 
1066  // This works for real binary ops like min/max (where we always expect the
1067  // constant operand to be canonicalized as op1) and unary ops with a bonus
1068  // constant argument like ctlz/cttz.
1069  // TODO: Handle non-commutative binary intrinsics as below for binops.
1070  assert(II->arg_size() == 2 && "Expected binary intrinsic");
1071  assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
1072  return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
1073  }
1074 
1075  if (auto *EI = dyn_cast<ExtractElementInst>(&I))
1076  return Builder.CreateExtractElement(SO, EI->getIndexOperand());
1077 
1078  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
1079 
1080  // Figure out if the constant is the left or the right argument.
1081  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
1082  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
1083 
1084  Value *Op0 = SO, *Op1 = ConstOperand;
1085  if (!ConstIsRHS)
1086  std::swap(Op0, Op1);
1087 
1088  Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
1089  Op1, SO->getName() + ".op");
1090  if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
1091  NewBOI->copyIRFlags(&I);
1092  return NewBO;
1093 }
1094 
1096  bool FoldWithMultiUse) {
1097  // Don't modify shared select instructions unless set FoldWithMultiUse
1098  if (!SI->hasOneUse() && !FoldWithMultiUse)
1099  return nullptr;
1100 
1101  Value *TV = SI->getTrueValue();
1102  Value *FV = SI->getFalseValue();
1103  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
1104  return nullptr;
1105 
1106  // Bool selects with constant operands can be folded to logical ops.
1107  if (SI->getType()->isIntOrIntVectorTy(1))
1108  return nullptr;
1109 
1110  // If it's a bitcast involving vectors, make sure it has the same number of
1111  // elements on both sides.
1112  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
1113  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
1114  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
1115 
1116  // Verify that either both or neither are vectors.
1117  if ((SrcTy == nullptr) != (DestTy == nullptr))
1118  return nullptr;
1119 
1120  // If vectors, verify that they have the same number of elements.
1121  if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
1122  return nullptr;
1123  }
1124 
1125  // Test if a CmpInst instruction is used exclusively by a select as
1126  // part of a minimum or maximum operation. If so, refrain from doing
1127  // any other folding. This helps out other analyses which understand
1128  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
1129  // and CodeGen. And in this case, at least one of the comparison
1130  // operands has at least one user besides the compare (the select),
1131  // which would often largely negate the benefit of folding anyway.
1132  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
1133  if (CI->hasOneUse()) {
1134  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1135 
1136  // FIXME: This is a hack to avoid infinite looping with min/max patterns.
1137  // We have to ensure that vector constants that only differ with
1138  // undef elements are treated as equivalent.
1139  auto areLooselyEqual = [](Value *A, Value *B) {
1140  if (A == B)
1141  return true;
1142 
1143  // Test for vector constants.
1144  Constant *ConstA, *ConstB;
1145  if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
1146  return false;
1147 
1148  // TODO: Deal with FP constants?
1149  if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
1150  return false;
1151 
1152  // Compare for equality including undefs as equal.
1153  auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
1154  const APInt *C;
1155  return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
1156  };
1157 
1158  if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
1159  (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
1160  return nullptr;
1161  }
1162  }
1163 
1164  // Make sure that one of the select arms constant folds successfully.
1167  if (!NewTV && !NewFV)
1168  return nullptr;
1169 
1170  // Create an instruction for the arm that did not fold.
1171  if (!NewTV)
1173  if (!NewFV)
1175  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1176 }
1177 
1179  unsigned NumPHIValues = PN->getNumIncomingValues();
1180  if (NumPHIValues == 0)
1181  return nullptr;
1182 
1183  // We normally only transform phis with a single use. However, if a PHI has
1184  // multiple uses and they are all the same operation, we can fold *all* of the
1185  // uses into the PHI.
1186  if (!PN->hasOneUse()) {
1187  // Walk the use list for the instruction, comparing them to I.
1188  for (User *U : PN->users()) {
1189  Instruction *UI = cast<Instruction>(U);
1190  if (UI != &I && !I.isIdenticalTo(UI))
1191  return nullptr;
1192  }
1193  // Otherwise, we can replace *all* users with the new PHI we form.
1194  }
1195 
1196  // Check to see whether the instruction can be folded into each phi operand.
1197  // If there is one operand that does not fold, remember the BB it is in.
1198  // If there is more than one or if *it* is a PHI, bail out.
1199  SmallVector<Value *> NewPhiValues;
1200  BasicBlock *NonSimplifiedBB = nullptr;
1201  Value *NonSimplifiedInVal = nullptr;
1202  for (unsigned i = 0; i != NumPHIValues; ++i) {
1203  Value *InVal = PN->getIncomingValue(i);
1204  BasicBlock *InBB = PN->getIncomingBlock(i);
1205 
1206  // NB: It is a precondition of this transform that the operands be
1207  // phi translatable! This is usually trivially satisfied by limiting it
1208  // to constant ops, and for selects we do a more sophisticated check.
1210  for (Value *Op : I.operands()) {
1211  if (Op == PN)
1212  Ops.push_back(InVal);
1213  else
1214  Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1215  }
1216 
1217  // Don't consider the simplification successful if we get back a constant
1218  // expression. That's just an instruction in hiding.
1219  // Also reject the case where we simplify back to the phi node. We wouldn't
1220  // be able to remove it in that case.
1222  &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1223  if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
1224  NewPhiValues.push_back(NewVal);
1225  continue;
1226  }
1227 
1228  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
1229  if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
1230 
1231  NonSimplifiedBB = InBB;
1232  NonSimplifiedInVal = InVal;
1233  NewPhiValues.push_back(nullptr);
1234 
1235  // If the InVal is an invoke at the end of the pred block, then we can't
1236  // insert a computation after it without breaking the edge.
1237  if (isa<InvokeInst>(InVal))
1238  if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
1239  return nullptr;
1240 
1241  // If the incoming non-constant value is reachable from the phis block,
1242  // we'll push the operation across a loop backedge. This could result in
1243  // an infinite combine loop, and is generally non-profitable (especially
1244  // if the operation was originally outside the loop).
1245  if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
1246  LI))
1247  return nullptr;
1248  }
1249 
1250  // If there is exactly one non-simplified value, we can insert a copy of the
1251  // operation in that block. However, if this is a critical edge, we would be
1252  // inserting the computation on some other paths (e.g. inside a loop). Only
1253  // do this if the pred block is unconditionally branching into the phi block.
1254  // Also, make sure that the pred block is not dead code.
1255  if (NonSimplifiedBB != nullptr) {
1256  BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
1257  if (!BI || !BI->isUnconditional() ||
1258  !DT.isReachableFromEntry(NonSimplifiedBB))
1259  return nullptr;
1260  }
1261 
1262  // Okay, we can do the transformation: create the new PHI node.
1263  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
1264  InsertNewInstBefore(NewPN, *PN);
1265  NewPN->takeName(PN);
1266 
1267  // If we are going to have to insert a new computation, do so right before the
1268  // predecessor's terminator.
1269  Instruction *Clone = nullptr;
1270  if (NonSimplifiedBB) {
1271  Clone = I.clone();
1272  for (Use &U : Clone->operands()) {
1273  if (U == PN)
1274  U = NonSimplifiedInVal;
1275  else
1276  U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
1277  }
1278  InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
1279  }
1280 
1281  for (unsigned i = 0; i != NumPHIValues; ++i) {
1282  if (NewPhiValues[i])
1283  NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
1284  else
1285  NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
1286  }
1287 
1288  for (User *U : make_early_inc_range(PN->users())) {
1289  Instruction *User = cast<Instruction>(U);
1290  if (User == &I) continue;
1291  replaceInstUsesWith(*User, NewPN);
1292  eraseInstFromFunction(*User);
1293  }
1294  return replaceInstUsesWith(I, NewPN);
1295 }
1296 
1298  // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
1299  // we are guarding against replicating the binop in >1 predecessor.
1300  // This could miss matching a phi with 2 constant incoming values.
1301  auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
1302  auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
1303  if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
1304  Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
1305  return nullptr;
1306 
1307  // TODO: Remove the restriction for binop being in the same block as the phis.
1308  if (BO.getParent() != Phi0->getParent() ||
1309  BO.getParent() != Phi1->getParent())
1310  return nullptr;
1311 
1312  // Match a pair of incoming constants for one of the predecessor blocks.
1313  BasicBlock *ConstBB, *OtherBB;
1314  Constant *C0, *C1;
1315  if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
1316  ConstBB = Phi0->getIncomingBlock(0);
1317  OtherBB = Phi0->getIncomingBlock(1);
1318  } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
1319  ConstBB = Phi0->getIncomingBlock(1);
1320  OtherBB = Phi0->getIncomingBlock(0);
1321  } else {
1322  return nullptr;
1323  }
1324  if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
1325  return nullptr;
1326 
1327  // The block that we are hoisting to must reach here unconditionally.
1328  // Otherwise, we could be speculatively executing an expensive or
1329  // non-speculative op.
1330  auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
1331  if (!PredBlockBranch || PredBlockBranch->isConditional() ||
1332  !DT.isReachableFromEntry(OtherBB))
1333  return nullptr;
1334 
1335  // TODO: This check could be tightened to only apply to binops (div/rem) that
1336  // are not safe to speculatively execute. But that could allow hoisting
1337  // potentially expensive instructions (fdiv for example).
1338  for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
1340  return nullptr;
1341 
1342  // Fold constants for the predecessor block with constant incoming values.
1343  Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
1344  if (!NewC)
1345  return nullptr;
1346 
1347  // Make a new binop in the predecessor block with the non-constant incoming
1348  // values.
1349  Builder.SetInsertPoint(PredBlockBranch);
1350  Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
1351  Phi0->getIncomingValueForBlock(OtherBB),
1352  Phi1->getIncomingValueForBlock(OtherBB));
1353  if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
1354  NotFoldedNewBO->copyIRFlags(&BO);
1355 
1356  // Replace the binop with a phi of the new values. The old phis are dead.
1357  PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
1358  NewPhi->addIncoming(NewBO, OtherBB);
1359  NewPhi->addIncoming(NewC, ConstBB);
1360  return NewPhi;
1361 }
1362 
1364  if (!isa<Constant>(I.getOperand(1)))
1365  return nullptr;
1366 
1367  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1368  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1369  return NewSel;
1370  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1371  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1372  return NewPhi;
1373  }
1374  return nullptr;
1375 }
1376 
1377 /// Given a pointer type and a constant offset, determine whether or not there
1378 /// is a sequence of GEP indices into the pointed type that will land us at the
1379 /// specified offset. If so, fill them into NewIndices and return the resultant
1380 /// element type, otherwise return null.
1381 static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
1382  SmallVectorImpl<Value *> &NewIndices,
1383  const DataLayout &DL) {
1384  // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
1385  Type *Ty = PtrTy->getNonOpaquePointerElementType();
1386  if (!Ty->isSized())
1387  return nullptr;
1388 
1389  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
1390  SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
1391  if (!Offset.isZero())
1392  return nullptr;
1393 
1394  for (const APInt &Index : Indices)
1395  NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
1396  return Ty;
1397 }
1398 
1400  // If this GEP has only 0 indices, it is the same pointer as
1401  // Src. If Src is not a trivial GEP too, don't combine
1402  // the indices.
1403  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1404  !Src.hasOneUse())
1405  return false;
1406  return true;
1407 }
1408 
1409 /// Return a value X such that Val = X * Scale, or null if none.
1410 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1411 Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1412  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1413  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1414  Scale.getBitWidth() && "Scale not compatible with value!");
1415 
1416  // If Val is zero or Scale is one then Val = Val * Scale.
1417  if (match(Val, m_Zero()) || Scale == 1) {
1418  NoSignedWrap = true;
1419  return Val;
1420  }
1421 
1422  // If Scale is zero then it does not divide Val.
1423  if (Scale.isMinValue())
1424  return nullptr;
1425 
1426  // Look through chains of multiplications, searching for a constant that is
1427  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1428  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1429  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1430  // down from Val:
1431  //
1432  // Val = M1 * X || Analysis starts here and works down
1433  // M1 = M2 * Y || Doesn't descend into terms with more
1434  // M2 = Z * 4 \/ than one use
1435  //
1436  // Then to modify a term at the bottom:
1437  //
1438  // Val = M1 * X
1439  // M1 = Z * Y || Replaced M2 with Z
1440  //
1441  // Then to work back up correcting nsw flags.
1442 
1443  // Op - the term we are currently analyzing. Starts at Val then drills down.
1444  // Replaced with its descaled value before exiting from the drill down loop.
1445  Value *Op = Val;
1446 
1447  // Parent - initially null, but after drilling down notes where Op came from.
1448  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1449  // 0'th operand of Val.
1450  std::pair<Instruction *, unsigned> Parent;
1451 
1452  // Set if the transform requires a descaling at deeper levels that doesn't
1453  // overflow.
1454  bool RequireNoSignedWrap = false;
1455 
1456  // Log base 2 of the scale. Negative if not a power of 2.
1457  int32_t logScale = Scale.exactLogBase2();
1458 
1459  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1460  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1461  // If Op is a constant divisible by Scale then descale to the quotient.
1462  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1463  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1464  if (!Remainder.isMinValue())
1465  // Not divisible by Scale.
1466  return nullptr;
1467  // Replace with the quotient in the parent.
1468  Op = ConstantInt::get(CI->getType(), Quotient);
1469  NoSignedWrap = true;
1470  break;
1471  }
1472 
1473  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1474  if (BO->getOpcode() == Instruction::Mul) {
1475  // Multiplication.
1476  NoSignedWrap = BO->hasNoSignedWrap();
1477  if (RequireNoSignedWrap && !NoSignedWrap)
1478  return nullptr;
1479 
1480  // There are three cases for multiplication: multiplication by exactly
1481  // the scale, multiplication by a constant different to the scale, and
1482  // multiplication by something else.
1483  Value *LHS = BO->getOperand(0);
1484  Value *RHS = BO->getOperand(1);
1485 
1486  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1487  // Multiplication by a constant.
1488  if (CI->getValue() == Scale) {
1489  // Multiplication by exactly the scale, replace the multiplication
1490  // by its left-hand side in the parent.
1491  Op = LHS;
1492  break;
1493  }
1494 
1495  // Otherwise drill down into the constant.
1496  if (!Op->hasOneUse())
1497  return nullptr;
1498 
1499  Parent = std::make_pair(BO, 1);
1500  continue;
1501  }
1502 
1503  // Multiplication by something else. Drill down into the left-hand side
1504  // since that's where the reassociate pass puts the good stuff.
1505  if (!Op->hasOneUse())
1506  return nullptr;
1507 
1508  Parent = std::make_pair(BO, 0);
1509  continue;
1510  }
1511 
1512  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1513  isa<ConstantInt>(BO->getOperand(1))) {
1514  // Multiplication by a power of 2.
1515  NoSignedWrap = BO->hasNoSignedWrap();
1516  if (RequireNoSignedWrap && !NoSignedWrap)
1517  return nullptr;
1518 
1519  Value *LHS = BO->getOperand(0);
1520  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1521  getLimitedValue(Scale.getBitWidth());
1522  // Op = LHS << Amt.
1523 
1524  if (Amt == logScale) {
1525  // Multiplication by exactly the scale, replace the multiplication
1526  // by its left-hand side in the parent.
1527  Op = LHS;
1528  break;
1529  }
1530  if (Amt < logScale || !Op->hasOneUse())
1531  return nullptr;
1532 
1533  // Multiplication by more than the scale. Reduce the multiplying amount
1534  // by the scale in the parent.
1535  Parent = std::make_pair(BO, 1);
1536  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1537  break;
1538  }
1539  }
1540 
1541  if (!Op->hasOneUse())
1542  return nullptr;
1543 
1544  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1545  if (Cast->getOpcode() == Instruction::SExt) {
1546  // Op is sign-extended from a smaller type, descale in the smaller type.
1547  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1548  APInt SmallScale = Scale.trunc(SmallSize);
1549  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1550  // descale Op as (sext Y) * Scale. In order to have
1551  // sext (Y * SmallScale) = (sext Y) * Scale
1552  // some conditions need to hold however: SmallScale must sign-extend to
1553  // Scale and the multiplication Y * SmallScale should not overflow.
1554  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1555  // SmallScale does not sign-extend to Scale.
1556  return nullptr;
1557  assert(SmallScale.exactLogBase2() == logScale);
1558  // Require that Y * SmallScale must not overflow.
1559  RequireNoSignedWrap = true;
1560 
1561  // Drill down through the cast.
1562  Parent = std::make_pair(Cast, 0);
1563  Scale = SmallScale;
1564  continue;
1565  }
1566 
1567  if (Cast->getOpcode() == Instruction::Trunc) {
1568  // Op is truncated from a larger type, descale in the larger type.
1569  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1570  // trunc (Y * sext Scale) = (trunc Y) * Scale
1571  // always holds. However (trunc Y) * Scale may overflow even if
1572  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1573  // from this point up in the expression (see later).
1574  if (RequireNoSignedWrap)
1575  return nullptr;
1576 
1577  // Drill down through the cast.
1578  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1579  Parent = std::make_pair(Cast, 0);
1580  Scale = Scale.sext(LargeSize);
1581  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1582  logScale = -1;
1583  assert(Scale.exactLogBase2() == logScale);
1584  continue;
1585  }
1586  }
1587 
1588  // Unsupported expression, bail out.
1589  return nullptr;
1590  }
1591 
1592  // If Op is zero then Val = Op * Scale.
1593  if (match(Op, m_Zero())) {
1594  NoSignedWrap = true;
1595  return Op;
1596  }
1597 
1598  // We know that we can successfully descale, so from here on we can safely
1599  // modify the IR. Op holds the descaled version of the deepest term in the
1600  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1601  // not to overflow.
1602 
1603  if (!Parent.first)
1604  // The expression only had one term.
1605  return Op;
1606 
1607  // Rewrite the parent using the descaled version of its operand.
1608  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1609  assert(Op != Parent.first->getOperand(Parent.second) &&
1610  "Descaling was a no-op?");
1611  replaceOperand(*Parent.first, Parent.second, Op);
1612  Worklist.push(Parent.first);
1613 
1614  // Now work back up the expression correcting nsw flags. The logic is based
1615  // on the following observation: if X * Y is known not to overflow as a signed
1616  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1617  // then X * Z will not overflow as a signed multiplication either. As we work
1618  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1619  // current level has strictly smaller absolute value than the original.
1620  Instruction *Ancestor = Parent.first;
1621  do {
1622  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1623  // If the multiplication wasn't nsw then we can't say anything about the
1624  // value of the descaled multiplication, and we have to clear nsw flags
1625  // from this point on up.
1626  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1627  NoSignedWrap &= OpNoSignedWrap;
1628  if (NoSignedWrap != OpNoSignedWrap) {
1629  BO->setHasNoSignedWrap(NoSignedWrap);
1630  Worklist.push(Ancestor);
1631  }
1632  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1633  // The fact that the descaled input to the trunc has smaller absolute
1634  // value than the original input doesn't tell us anything useful about
1635  // the absolute values of the truncations.
1636  NoSignedWrap = false;
1637  }
1638  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1639  "Failed to keep proper track of nsw flags while drilling down?");
1640 
1641  if (Ancestor == Val)
1642  // Got to the top, all done!
1643  return Val;
1644 
1645  // Move up one level in the expression.
1646  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1647  Ancestor = Ancestor->user_back();
1648  } while (true);
1649 }
1650 
1652  if (!isa<VectorType>(Inst.getType()))
1653  return nullptr;
1654 
1655  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1656  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1657  assert(cast<VectorType>(LHS->getType())->getElementCount() ==
1658  cast<VectorType>(Inst.getType())->getElementCount());
1659  assert(cast<VectorType>(RHS->getType())->getElementCount() ==
1660  cast<VectorType>(Inst.getType())->getElementCount());
1661 
1662  // If both operands of the binop are vector concatenations, then perform the
1663  // narrow binop on each pair of the source operands followed by concatenation
1664  // of the results.
1665  Value *L0, *L1, *R0, *R1;
1667  if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
1669  LHS->hasOneUse() && RHS->hasOneUse() &&
1670  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1671  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1672  // This transform does not have the speculative execution constraint as
1673  // below because the shuffle is a concatenation. The new binops are
1674  // operating on exactly the same elements as the existing binop.
1675  // TODO: We could ease the mask requirement to allow different undef lanes,
1676  // but that requires an analysis of the binop-with-undef output value.
1677  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1678  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1679  BO->copyIRFlags(&Inst);
1680  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1681  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1682  BO->copyIRFlags(&Inst);
1683  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1684  }
1685 
1686  auto createBinOpReverse = [&](Value *X, Value *Y) {
1687  Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
1688  if (auto *BO = dyn_cast<BinaryOperator>(V))
1689  BO->copyIRFlags(&Inst);
1690  Module *M = Inst.getModule();
1692  M, Intrinsic::experimental_vector_reverse, V->getType());
1693  return CallInst::Create(F, V);
1694  };
1695 
1696  // NOTE: Reverse shuffles don't require the speculative execution protection
1697  // below because they don't affect which lanes take part in the computation.
1698 
1699  Value *V1, *V2;
1700  if (match(LHS, m_VecReverse(m_Value(V1)))) {
1701  // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
1702  if (match(RHS, m_VecReverse(m_Value(V2))) &&
1703  (LHS->hasOneUse() || RHS->hasOneUse() ||
1704  (LHS == RHS && LHS->hasNUses(2))))
1705  return createBinOpReverse(V1, V2);
1706 
1707  // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
1708  if (LHS->hasOneUse() && isSplatValue(RHS))
1709  return createBinOpReverse(V1, RHS);
1710  }
1711  // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
1713  return createBinOpReverse(LHS, V2);
1714 
1715  // It may not be safe to reorder shuffles and things like div, urem, etc.
1716  // because we may trap when executing those ops on unknown vector elements.
1717  // See PR20059.
1718  if (!isSafeToSpeculativelyExecute(&Inst))
1719  return nullptr;
1720 
1721  auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
1722  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1723  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1724  BO->copyIRFlags(&Inst);
1725  return new ShuffleVectorInst(XY, M);
1726  };
1727 
1728  // If both arguments of the binary operation are shuffles that use the same
1729  // mask and shuffle within a single vector, move the shuffle after the binop.
1730  if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
1732  V1->getType() == V2->getType() &&
1733  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1734  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1735  return createBinOpShuffle(V1, V2, Mask);
1736  }
1737 
1738  // If both arguments of a commutative binop are select-shuffles that use the
1739  // same mask with commuted operands, the shuffles are unnecessary.
1740  if (Inst.isCommutative() &&
1741  match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
1742  match(RHS,
1744  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1745  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1746  // TODO: Allow shuffles that contain undefs in the mask?
1747  // That is legal, but it reduces undef knowledge.
1748  // TODO: Allow arbitrary shuffles by shuffling after binop?
1749  // That might be legal, but we have to deal with poison.
1750  if (LShuf->isSelect() &&
1751  !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
1752  RShuf->isSelect() &&
1753  !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
1754  // Example:
1755  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1756  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1757  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1758  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1759  NewBO->copyIRFlags(&Inst);
1760  return NewBO;
1761  }
1762  }
1763 
1764  // If one argument is a shuffle within one vector and the other is a constant,
1765  // try moving the shuffle after the binary operation. This canonicalization
1766  // intends to move shuffles closer to other shuffles and binops closer to
1767  // other binops, so they can be folded. It may also enable demanded elements
1768  // transforms.
1769  Constant *C;
1770  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
1771  if (InstVTy &&
1772  match(&Inst,
1774  m_ImmConstant(C))) &&
1775  cast<FixedVectorType>(V1->getType())->getNumElements() <=
1776  InstVTy->getNumElements()) {
1777  assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
1778  "Shuffle should not change scalar type");
1779 
1780  // Find constant NewC that has property:
1781  // shuffle(NewC, ShMask) = C
1782  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1783  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1784  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1785  bool ConstOp1 = isa<Constant>(RHS);
1786  ArrayRef<int> ShMask = Mask;
1787  unsigned SrcVecNumElts =
1788  cast<FixedVectorType>(V1->getType())->getNumElements();
1789  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1790  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1791  bool MayChange = true;
1792  unsigned NumElts = InstVTy->getNumElements();
1793  for (unsigned I = 0; I < NumElts; ++I) {
1794  Constant *CElt = C->getAggregateElement(I);
1795  if (ShMask[I] >= 0) {
1796  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1797  Constant *NewCElt = NewVecC[ShMask[I]];
1798  // Bail out if:
1799  // 1. The constant vector contains a constant expression.
1800  // 2. The shuffle needs an element of the constant vector that can't
1801  // be mapped to a new constant vector.
1802  // 3. This is a widening shuffle that copies elements of V1 into the
1803  // extended elements (extending with undef is allowed).
1804  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1805  I >= SrcVecNumElts) {
1806  MayChange = false;
1807  break;
1808  }
1809  NewVecC[ShMask[I]] = CElt;
1810  }
1811  // If this is a widening shuffle, we must be able to extend with undef
1812  // elements. If the original binop does not produce an undef in the high
1813  // lanes, then this transform is not safe.
1814  // Similarly for undef lanes due to the shuffle mask, we can only
1815  // transform binops that preserve undef.
1816  // TODO: We could shuffle those non-undef constant values into the
1817  // result by using a constant vector (rather than an undef vector)
1818  // as operand 1 of the new binop, but that might be too aggressive
1819  // for target-independent shuffle creation.
1820  if (I >= SrcVecNumElts || ShMask[I] < 0) {
1821  Constant *MaybeUndef =
1822  ConstOp1
1823  ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
1824  : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
1825  if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
1826  MayChange = false;
1827  break;
1828  }
1829  }
1830  }
1831  if (MayChange) {
1832  Constant *NewC = ConstantVector::get(NewVecC);
1833  // It may not be safe to execute a binop on a vector with undef elements
1834  // because the entire instruction can be folded to undef or create poison
1835  // that did not exist in the original code.
1836  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1837  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1838 
1839  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1840  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1841  Value *NewLHS = ConstOp1 ? V1 : NewC;
1842  Value *NewRHS = ConstOp1 ? NewC : V1;
1843  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1844  }
1845  }
1846 
1847  // Try to reassociate to sink a splat shuffle after a binary operation.
1848  if (Inst.isAssociative() && Inst.isCommutative()) {
1849  // Canonicalize shuffle operand as LHS.
1850  if (isa<ShuffleVectorInst>(RHS))
1851  std::swap(LHS, RHS);
1852 
1853  Value *X;
1854  ArrayRef<int> MaskC;
1855  int SplatIndex;
1856  Value *Y, *OtherOp;
1857  if (!match(LHS,
1858  m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
1859  !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
1860  X->getType() != Inst.getType() ||
1861  !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
1862  return nullptr;
1863 
1864  // FIXME: This may not be safe if the analysis allows undef elements. By
1865  // moving 'Y' before the splat shuffle, we are implicitly assuming
1866  // that it is not undef/poison at the splat index.
1867  if (isSplatValue(OtherOp, SplatIndex)) {
1868  std::swap(Y, OtherOp);
1869  } else if (!isSplatValue(Y, SplatIndex)) {
1870  return nullptr;
1871  }
1872 
1873  // X and Y are splatted values, so perform the binary operation on those
1874  // values followed by a splat followed by the 2nd binary operation:
1875  // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
1876  Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
1877  SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
1878  Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
1879  Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
1880 
1881  // Intersect FMF on both new binops. Other (poison-generating) flags are
1882  // dropped to be safe.
1883  if (isa<FPMathOperator>(R)) {
1884  R->copyFastMathFlags(&Inst);
1885  R->andIRFlags(RHS);
1886  }
1887  if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
1888  NewInstBO->copyIRFlags(R);
1889  return R;
1890  }
1891 
1892  return nullptr;
1893 }
1894 
1895 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1896 /// of a value. This requires a potentially expensive known bits check to make
1897 /// sure the narrow op does not overflow.
1898 Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
1899  // We need at least one extended operand.
1900  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1901 
1902  // If this is a sub, we swap the operands since we always want an extension
1903  // on the RHS. The LHS can be an extension or a constant.
1904  if (BO.getOpcode() == Instruction::Sub)
1905  std::swap(Op0, Op1);
1906 
1907  Value *X;
1908  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1909  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1910  return nullptr;
1911 
1912  // If both operands are the same extension from the same source type and we
1913  // can eliminate at least one (hasOneUse), this might work.
1914  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1915  Value *Y;
1916  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1917  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1918  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1919  // If that did not match, see if we have a suitable constant operand.
1920  // Truncating and extending must produce the same constant.
1921  Constant *WideC;
1922  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1923  return nullptr;
1924  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1925  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1926  return nullptr;
1927  Y = NarrowC;
1928  }
1929 
1930  // Swap back now that we found our operands.
1931  if (BO.getOpcode() == Instruction::Sub)
1932  std::swap(X, Y);
1933 
1934  // Both operands have narrow versions. Last step: the math must not overflow
1935  // in the narrow width.
1936  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1937  return nullptr;
1938 
1939  // bo (ext X), (ext Y) --> ext (bo X, Y)
1940  // bo (ext X), C --> ext (bo X, C')
1941  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1942  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1943  if (IsSext)
1944  NewBinOp->setHasNoSignedWrap();
1945  else
1946  NewBinOp->setHasNoUnsignedWrap();
1947  }
1948  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1949 }
1950 
1951 static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
1952  // At least one GEP must be inbounds.
1953  if (!GEP1.isInBounds() && !GEP2.isInBounds())
1954  return false;
1955 
1956  return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
1957  (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
1958 }
1959 
1960 /// Thread a GEP operation with constant indices through the constant true/false
1961 /// arms of a select.
1964  if (!GEP.hasAllConstantIndices())
1965  return nullptr;
1966 
1967  Instruction *Sel;
1968  Value *Cond;
1969  Constant *TrueC, *FalseC;
1970  if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
1971  !match(Sel,
1972  m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
1973  return nullptr;
1974 
1975  // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
1976  // Propagate 'inbounds' and metadata from existing instructions.
1977  // Note: using IRBuilder to create the constants for efficiency.
1978  SmallVector<Value *, 4> IndexC(GEP.indices());
1979  bool IsInBounds = GEP.isInBounds();
1980  Type *Ty = GEP.getSourceElementType();
1981  Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
1982  Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
1983  return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
1984 }
1985 
1987  GEPOperator *Src) {
1988  // Combine Indices - If the source pointer to this getelementptr instruction
1989  // is a getelementptr instruction with matching element type, combine the
1990  // indices of the two getelementptr instructions into a single instruction.
1991  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1992  return nullptr;
1993 
1994  if (Src->getResultElementType() == GEP.getSourceElementType() &&
1995  Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
1996  Src->hasOneUse()) {
1997  Value *GO1 = GEP.getOperand(1);
1998  Value *SO1 = Src->getOperand(1);
1999 
2000  if (LI) {
2001  // Try to reassociate loop invariant GEP chains to enable LICM.
2002  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
2003  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
2004  // invariant: this breaks the dependence between GEPs and allows LICM
2005  // to hoist the invariant part out of the loop.
2006  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
2007  // The swapped GEPs are inbounds if both original GEPs are inbounds
2008  // and the sign of the offsets is the same. For simplicity, only
2009  // handle both offsets being non-negative.
2010  bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
2011  isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
2012  isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
2013  // Put NewSrc at same location as %src.
2014  Builder.SetInsertPoint(cast<Instruction>(Src));
2015  Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
2016  Src->getPointerOperand(), GO1,
2017  Src->getName(), IsInBounds);
2019  GEP.getSourceElementType(), NewSrc, {SO1});
2020  NewGEP->setIsInBounds(IsInBounds);
2021  return NewGEP;
2022  }
2023  }
2024  }
2025  }
2026 
2027  // Note that if our source is a gep chain itself then we wait for that
2028  // chain to be resolved before we perform this transformation. This
2029  // avoids us creating a TON of code in some cases.
2030  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
2031  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
2032  return nullptr; // Wait until our source is folded to completion.
2033 
2034  // For constant GEPs, use a more general offset-based folding approach.
2035  // Only do this for opaque pointers, as the result element type may change.
2036  Type *PtrTy = Src->getType()->getScalarType();
2037  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
2038  (Src->hasOneUse() || Src->hasAllConstantIndices())) {
2039  // Split Src into a variable part and a constant suffix.
2040  gep_type_iterator GTI = gep_type_begin(*Src);
2041  Type *BaseType = GTI.getIndexedType();
2042  bool IsFirstType = true;
2043  unsigned NumVarIndices = 0;
2044  for (auto Pair : enumerate(Src->indices())) {
2045  if (!isa<ConstantInt>(Pair.value())) {
2046  BaseType = GTI.getIndexedType();
2047  IsFirstType = false;
2048  NumVarIndices = Pair.index() + 1;
2049  }
2050  ++GTI;
2051  }
2052 
2053  // Determine the offset for the constant suffix of Src.
2054  APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
2055  if (NumVarIndices != Src->getNumIndices()) {
2056  // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2057  if (isa<ScalableVectorType>(BaseType))
2058  return nullptr;
2059 
2060  SmallVector<Value *> ConstantIndices;
2061  if (!IsFirstType)
2062  ConstantIndices.push_back(
2064  append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
2065  Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
2066  }
2067 
2068  // Add the offset for GEP (which is fully constant).
2069  if (!GEP.accumulateConstantOffset(DL, Offset))
2070  return nullptr;
2071 
2072  APInt OffsetOld = Offset;
2073  // Convert the total offset back into indices.
2074  SmallVector<APInt> ConstIndices =
2075  DL.getGEPIndicesForOffset(BaseType, Offset);
2076  if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
2077  // If both GEP are constant-indexed, and cannot be merged in either way,
2078  // convert them to a GEP of i8.
2079  if (Src->hasAllConstantIndices())
2080  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2082  Builder.getInt8Ty(), Src->getOperand(0),
2083  Builder.getInt(OffsetOld), GEP.getName())
2085  Builder.getInt8Ty(), Src->getOperand(0),
2086  Builder.getInt(OffsetOld), GEP.getName());
2087  return nullptr;
2088  }
2089 
2090  bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
2091  SmallVector<Value *> Indices;
2092  append_range(Indices, drop_end(Src->indices(),
2093  Src->getNumIndices() - NumVarIndices));
2094  for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
2095  Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
2096  // Even if the total offset is inbounds, we may end up representing it
2097  // by first performing a larger negative offset, and then a smaller
2098  // positive one. The large negative offset might go out of bounds. Only
2099  // preserve inbounds if all signs are the same.
2100  IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
2101  }
2102 
2103  return IsInBounds
2104  ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
2105  Src->getOperand(0), Indices,
2106  GEP.getName())
2107  : GetElementPtrInst::Create(Src->getSourceElementType(),
2108  Src->getOperand(0), Indices,
2109  GEP.getName());
2110  }
2111 
2112  if (Src->getResultElementType() != GEP.getSourceElementType())
2113  return nullptr;
2114 
2115  SmallVector<Value*, 8> Indices;
2116 
2117  // Find out whether the last index in the source GEP is a sequential idx.
2118  bool EndsWithSequential = false;
2119  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2120  I != E; ++I)
2121  EndsWithSequential = I.isSequential();
2122 
2123  // Can we combine the two pointer arithmetics offsets?
2124  if (EndsWithSequential) {
2125  // Replace: gep (gep %P, long B), long A, ...
2126  // With: T = long A+B; gep %P, T, ...
2127  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
2128  Value *GO1 = GEP.getOperand(1);
2129 
2130  // If they aren't the same type, then the input hasn't been processed
2131  // by the loop above yet (which canonicalizes sequential index types to
2132  // intptr_t). Just avoid transforming this until the input has been
2133  // normalized.
2134  if (SO1->getType() != GO1->getType())
2135  return nullptr;
2136 
2137  Value *Sum =
2138  simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2139  // Only do the combine when we are sure the cost after the
2140  // merge is never more than that before the merge.
2141  if (Sum == nullptr)
2142  return nullptr;
2143 
2144  // Update the GEP in place if possible.
2145  if (Src->getNumOperands() == 2) {
2146  GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
2147  replaceOperand(GEP, 0, Src->getOperand(0));
2148  replaceOperand(GEP, 1, Sum);
2149  return &GEP;
2150  }
2151  Indices.append(Src->op_begin()+1, Src->op_end()-1);
2152  Indices.push_back(Sum);
2153  Indices.append(GEP.op_begin()+2, GEP.op_end());
2154  } else if (isa<Constant>(*GEP.idx_begin()) &&
2155  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
2156  Src->getNumOperands() != 1) {
2157  // Otherwise we can do the fold if the first index of the GEP is a zero
2158  Indices.append(Src->op_begin()+1, Src->op_end());
2159  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
2160  }
2161 
2162  if (!Indices.empty())
2163  return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
2165  Src->getSourceElementType(), Src->getOperand(0), Indices,
2166  GEP.getName())
2167  : GetElementPtrInst::Create(Src->getSourceElementType(),
2168  Src->getOperand(0), Indices,
2169  GEP.getName());
2170 
2171  return nullptr;
2172 }
2173 
2174 // Note that we may have also stripped an address space cast in between.
2177  // With opaque pointers, there is no pointer element type we can use to
2178  // adjust the GEP type.
2179  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2180  if (SrcType->isOpaque())
2181  return nullptr;
2182 
2183  Type *GEPEltType = GEP.getSourceElementType();
2184  Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
2185  Value *SrcOp = BCI->getOperand(0);
2186 
2187  // GEP directly using the source operand if this GEP is accessing an element
2188  // of a bitcasted pointer to vector or array of the same dimensions:
2189  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2190  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2191  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
2192  const DataLayout &DL) {
2193  auto *VecVTy = cast<FixedVectorType>(VecTy);
2194  return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
2195  ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
2196  DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
2197  };
2198  if (GEP.getNumOperands() == 3 &&
2199  ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
2200  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
2201  (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
2202  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
2203 
2204  // Create a new GEP here, as using `setOperand()` followed by
2205  // `setSourceElementType()` won't actually update the type of the
2206  // existing GEP Value. Causing issues if this Value is accessed when
2207  // constructing an AddrSpaceCastInst
2208  SmallVector<Value *, 8> Indices(GEP.indices());
2209  Value *NGEP =
2210  Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
2211  NGEP->takeName(&GEP);
2212 
2213  // Preserve GEP address space to satisfy users
2214  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2215  return new AddrSpaceCastInst(NGEP, GEP.getType());
2216 
2217  return replaceInstUsesWith(GEP, NGEP);
2218  }
2219 
2220  // See if we can simplify:
2221  // X = bitcast A* to B*
2222  // Y = gep X, <...constant indices...>
2223  // into a gep of the original struct. This is important for SROA and alias
2224  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2225  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
2226  APInt Offset(OffsetBits, 0);
2227 
2228  // If the bitcast argument is an allocation, The bitcast is for convertion
2229  // to actual type of allocation. Removing such bitcasts, results in having
2230  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
2231  // struct or array hierarchy.
2232  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
2233  // a better chance to succeed.
2234  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
2235  !isAllocationFn(SrcOp, &TLI)) {
2236  // If this GEP instruction doesn't move the pointer, just replace the GEP
2237  // with a bitcast of the real input to the dest type.
2238  if (!Offset) {
2239  // If the bitcast is of an allocation, and the allocation will be
2240  // converted to match the type of the cast, don't touch this.
2241  if (isa<AllocaInst>(SrcOp)) {
2242  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2243  if (Instruction *I = visitBitCast(*BCI)) {
2244  if (I != BCI) {
2245  I->takeName(BCI);
2246  I->insertInto(BCI->getParent(), BCI->getIterator());
2247  replaceInstUsesWith(*BCI, I);
2248  }
2249  return &GEP;
2250  }
2251  }
2252 
2253  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2254  return new AddrSpaceCastInst(SrcOp, GEP.getType());
2255  return new BitCastInst(SrcOp, GEP.getType());
2256  }
2257 
2258  // Otherwise, if the offset is non-zero, we need to find out if there is a
2259  // field at Offset in 'A's type. If so, we can pull the cast through the
2260  // GEP.
2261  SmallVector<Value *, 8> NewIndices;
2262  if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
2263  Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
2264  GEP.isInBounds());
2265 
2266  if (NGEP->getType() == GEP.getType())
2267  return replaceInstUsesWith(GEP, NGEP);
2268  NGEP->takeName(&GEP);
2269 
2270  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2271  return new AddrSpaceCastInst(NGEP, GEP.getType());
2272  return new BitCastInst(NGEP, GEP.getType());
2273  }
2274  }
2275 
2276  return nullptr;
2277 }
2278 
2280  Value *PtrOp = GEP.getOperand(0);
2281  SmallVector<Value *, 8> Indices(GEP.indices());
2282  Type *GEPType = GEP.getType();
2283  Type *GEPEltType = GEP.getSourceElementType();
2284  bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2285  if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
2286  SQ.getWithInstruction(&GEP)))
2287  return replaceInstUsesWith(GEP, V);
2288 
2289  // For vector geps, use the generic demanded vector support.
2290  // Skip if GEP return type is scalable. The number of elements is unknown at
2291  // compile-time.
2292  if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
2293  auto VWidth = GEPFVTy->getNumElements();
2294  APInt UndefElts(VWidth, 0);
2295  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2296  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
2297  UndefElts)) {
2298  if (V != &GEP)
2299  return replaceInstUsesWith(GEP, V);
2300  return &GEP;
2301  }
2302 
2303  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2304  // possible (decide on canonical form for pointer broadcast), 3) exploit
2305  // undef elements to decrease demanded bits
2306  }
2307 
2308  // Eliminate unneeded casts for indices, and replace indices which displace
2309  // by multiples of a zero size type with zero.
2310  bool MadeChange = false;
2311 
2312  // Index width may not be the same width as pointer width.
2313  // Data layout chooses the right type based on supported integer types.
2314  Type *NewScalarIndexTy =
2315  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
2316 
2318  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
2319  ++I, ++GTI) {
2320  // Skip indices into struct types.
2321  if (GTI.isStruct())
2322  continue;
2323 
2324  Type *IndexTy = (*I)->getType();
2325  Type *NewIndexType =
2326  IndexTy->isVectorTy()
2327  ? VectorType::get(NewScalarIndexTy,
2328  cast<VectorType>(IndexTy)->getElementCount())
2329  : NewScalarIndexTy;
2330 
2331  // If the element type has zero size then any index over it is equivalent
2332  // to an index of zero, so replace it with zero if it is not zero already.
2333  Type *EltTy = GTI.getIndexedType();
2334  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
2335  if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
2336  *I = Constant::getNullValue(NewIndexType);
2337  MadeChange = true;
2338  }
2339 
2340  if (IndexTy != NewIndexType) {
2341  // If we are using a wider index than needed for this platform, shrink
2342  // it to what we need. If narrower, sign-extend it to what we need.
2343  // This explicit cast can make subsequent optimizations more obvious.
2344  *I = Builder.CreateIntCast(*I, NewIndexType, true);
2345  MadeChange = true;
2346  }
2347  }
2348  if (MadeChange)
2349  return &GEP;
2350 
2351  // Check to see if the inputs to the PHI node are getelementptr instructions.
2352  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
2353  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
2354  if (!Op1)
2355  return nullptr;
2356 
2357  // Don't fold a GEP into itself through a PHI node. This can only happen
2358  // through the back-edge of a loop. Folding a GEP into itself means that
2359  // the value of the previous iteration needs to be stored in the meantime,
2360  // thus requiring an additional register variable to be live, but not
2361  // actually achieving anything (the GEP still needs to be executed once per
2362  // loop iteration).
2363  if (Op1 == &GEP)
2364  return nullptr;
2365 
2366  int DI = -1;
2367 
2368  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
2369  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
2370  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
2371  Op1->getSourceElementType() != Op2->getSourceElementType())
2372  return nullptr;
2373 
2374  // As for Op1 above, don't try to fold a GEP into itself.
2375  if (Op2 == &GEP)
2376  return nullptr;
2377 
2378  // Keep track of the type as we walk the GEP.
2379  Type *CurTy = nullptr;
2380 
2381  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
2382  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
2383  return nullptr;
2384 
2385  if (Op1->getOperand(J) != Op2->getOperand(J)) {
2386  if (DI == -1) {
2387  // We have not seen any differences yet in the GEPs feeding the
2388  // PHI yet, so we record this one if it is allowed to be a
2389  // variable.
2390 
2391  // The first two arguments can vary for any GEP, the rest have to be
2392  // static for struct slots
2393  if (J > 1) {
2394  assert(CurTy && "No current type?");
2395  if (CurTy->isStructTy())
2396  return nullptr;
2397  }
2398 
2399  DI = J;
2400  } else {
2401  // The GEP is different by more than one input. While this could be
2402  // extended to support GEPs that vary by more than one variable it
2403  // doesn't make sense since it greatly increases the complexity and
2404  // would result in an R+R+R addressing mode which no backend
2405  // directly supports and would need to be broken into several
2406  // simpler instructions anyway.
2407  return nullptr;
2408  }
2409  }
2410 
2411  // Sink down a layer of the type for the next iteration.
2412  if (J > 0) {
2413  if (J == 1) {
2414  CurTy = Op1->getSourceElementType();
2415  } else {
2416  CurTy =
2417  GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
2418  }
2419  }
2420  }
2421  }
2422 
2423  // If not all GEPs are identical we'll have to create a new PHI node.
2424  // Check that the old PHI node has only one use so that it will get
2425  // removed.
2426  if (DI != -1 && !PN->hasOneUse())
2427  return nullptr;
2428 
2429  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
2430  if (DI == -1) {
2431  // All the GEPs feeding the PHI are identical. Clone one down into our
2432  // BB so that it can be merged with the current GEP.
2433  } else {
2434  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
2435  // into the current block so it can be merged, and create a new PHI to
2436  // set that index.
2437  PHINode *NewPN;
2438  {
2440  Builder.SetInsertPoint(PN);
2441  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
2442  PN->getNumOperands());
2443  }
2444 
2445  for (auto &I : PN->operands())
2446  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
2447  PN->getIncomingBlock(I));
2448 
2449  NewGEP->setOperand(DI, NewPN);
2450  }
2451 
2452  NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
2453  return replaceOperand(GEP, 0, NewGEP);
2454  }
2455 
2456  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
2457  if (Instruction *I = visitGEPOfGEP(GEP, Src))
2458  return I;
2459 
2460  // Skip if GEP source element type is scalable. The type alloc size is unknown
2461  // at compile-time.
2462  if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
2463  unsigned AS = GEP.getPointerAddressSpace();
2464  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
2465  DL.getIndexSizeInBits(AS)) {
2466  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2467 
2468  bool Matched = false;
2469  uint64_t C;
2470  Value *V = nullptr;
2471  if (TyAllocSize == 1) {
2472  V = GEP.getOperand(1);
2473  Matched = true;
2474  } else if (match(GEP.getOperand(1),
2475  m_AShr(m_Value(V), m_ConstantInt(C)))) {
2476  if (TyAllocSize == 1ULL << C)
2477  Matched = true;
2478  } else if (match(GEP.getOperand(1),
2479  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
2480  if (TyAllocSize == C)
2481  Matched = true;
2482  }
2483 
2484  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
2485  // only if both point to the same underlying object (otherwise provenance
2486  // is not necessarily retained).
2487  Value *Y;
2488  Value *X = GEP.getOperand(0);
2489  if (Matched &&
2493  }
2494  }
2495 
2496  // We do not handle pointer-vector geps here.
2497  if (GEPType->isVectorTy())
2498  return nullptr;
2499 
2500  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
2501  Value *StrippedPtr = PtrOp->stripPointerCasts();
2502  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
2503 
2504  // TODO: The basic approach of these folds is not compatible with opaque
2505  // pointers, because we can't use bitcasts as a hint for a desirable GEP
2506  // type. Instead, we should perform canonicalization directly on the GEP
2507  // type. For now, skip these.
2508  if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
2509  bool HasZeroPointerIndex = false;
2510  Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
2511 
2512  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
2513  HasZeroPointerIndex = C->isZero();
2514 
2515  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
2516  // into : GEP [10 x i8]* X, i32 0, ...
2517  //
2518  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
2519  // into : GEP i8* X, ...
2520  //
2521  // This occurs when the program declares an array extern like "int X[];"
2522  if (HasZeroPointerIndex) {
2523  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
2524  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
2525  if (CATy->getElementType() == StrippedPtrEltTy) {
2526  // -> GEP i8* X, ...
2527  SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
2529  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
2530  Res->setIsInBounds(GEP.isInBounds());
2531  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
2532  return Res;
2533  // Insert Res, and create an addrspacecast.
2534  // e.g.,
2535  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
2536  // ->
2537  // %0 = GEP i8 addrspace(1)* X, ...
2538  // addrspacecast i8 addrspace(1)* %0 to i8*
2539  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
2540  }
2541 
2542  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
2543  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
2544  if (CATy->getElementType() == XATy->getElementType()) {
2545  // -> GEP [10 x i8]* X, i32 0, ...
2546  // At this point, we know that the cast source type is a pointer
2547  // to an array of the same type as the destination pointer
2548  // array. Because the array type is never stepped over (there
2549  // is a leading zero) we can fold the cast into this GEP.
2550  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
2551  GEP.setSourceElementType(XATy);
2552  return replaceOperand(GEP, 0, StrippedPtr);
2553  }
2554  // Cannot replace the base pointer directly because StrippedPtr's
2555  // address space is different. Instead, create a new GEP followed by
2556  // an addrspacecast.
2557  // e.g.,
2558  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2559  // i32 0, ...
2560  // ->
2561  // %0 = GEP [10 x i8] addrspace(1)* X, ...
2562  // addrspacecast i8 addrspace(1)* %0 to i8*
2563  SmallVector<Value *, 8> Idx(GEP.indices());
2564  Value *NewGEP =
2565  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2566  GEP.getName(), GEP.isInBounds());
2567  return new AddrSpaceCastInst(NewGEP, GEPType);
2568  }
2569  }
2570  }
2571  } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
2572  // Skip if GEP source element type is scalable. The type alloc size is
2573  // unknown at compile-time.
2574  // Transform things like: %t = getelementptr i32*
2575  // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
2576  // x i32]* %str, i32 0, i32 %V; bitcast
2577  if (StrippedPtrEltTy->isArrayTy() &&
2578  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2579  DL.getTypeAllocSize(GEPEltType)) {
2580  Type *IdxType = DL.getIndexType(GEPType);
2581  Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
2582  Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2583  GEP.getName(), GEP.isInBounds());
2584 
2585  // V and GEP are both pointer types --> BitCast
2586  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2587  }
2588 
2589  // Transform things like:
2590  // %V = mul i64 %N, 4
2591  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2592  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2593  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2594  // Check that changing the type amounts to dividing the index by a scale
2595  // factor.
2596  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2597  uint64_t SrcSize =
2598  DL.getTypeAllocSize(StrippedPtrEltTy).getFixedValue();
2599  if (ResSize && SrcSize % ResSize == 0) {
2600  Value *Idx = GEP.getOperand(1);
2601  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2602  uint64_t Scale = SrcSize / ResSize;
2603 
2604  // Earlier transforms ensure that the index has the right type
2605  // according to Data Layout, which considerably simplifies the
2606  // logic by eliminating implicit casts.
2607  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2608  "Index type does not match the Data Layout preferences");
2609 
2610  bool NSW;
2611  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2612  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2613  // If the multiplication NewIdx * Scale may overflow then the new
2614  // GEP may not be "inbounds".
2615  Value *NewGEP =
2616  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2617  GEP.getName(), GEP.isInBounds() && NSW);
2618 
2619  // The NewGEP must be pointer typed, so must the old one -> BitCast
2621  GEPType);
2622  }
2623  }
2624  }
2625 
2626  // Similarly, transform things like:
2627  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2628  // (where tmp = 8*tmp2) into:
2629  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2630  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2631  StrippedPtrEltTy->isArrayTy()) {
2632  // Check that changing to the array element type amounts to dividing the
2633  // index by a scale factor.
2634  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
2635  uint64_t ArrayEltSize =
2636  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
2637  .getFixedValue();
2638  if (ResSize && ArrayEltSize % ResSize == 0) {
2639  Value *Idx = GEP.getOperand(1);
2640  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2641  uint64_t Scale = ArrayEltSize / ResSize;
2642 
2643  // Earlier transforms ensure that the index has the right type
2644  // according to the Data Layout, which considerably simplifies
2645  // the logic by eliminating implicit casts.
2646  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2647  "Index type does not match the Data Layout preferences");
2648 
2649  bool NSW;
2650  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2651  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2652  // If the multiplication NewIdx * Scale may overflow then the new
2653  // GEP may not be "inbounds".
2654  Type *IndTy = DL.getIndexType(GEPType);
2655  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2656 
2657  Value *NewGEP =
2658  Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2659  GEP.getName(), GEP.isInBounds() && NSW);
2660  // The NewGEP must be pointer typed, so must the old one -> BitCast
2662  GEPType);
2663  }
2664  }
2665  }
2666  }
2667  }
2668 
2669  // addrspacecast between types is canonicalized as a bitcast, then an
2670  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2671  // through the addrspacecast.
2672  Value *ASCStrippedPtrOp = PtrOp;
2673  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2674  // X = bitcast A addrspace(1)* to B addrspace(1)*
2675  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2676  // Z = gep Y, <...constant indices...>
2677  // Into an addrspacecasted GEP of the struct.
2678  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2679  ASCStrippedPtrOp = BC;
2680  }
2681 
2682  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
2683  if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
2684  return I;
2685 
2686  if (!GEP.isInBounds()) {
2687  unsigned IdxWidth =
2688  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2689  APInt BasePtrOffset(IdxWidth, 0);
2690  Value *UnderlyingPtrOp =
2692  BasePtrOffset);
2693  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2694  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2695  BasePtrOffset.isNonNegative()) {
2696  APInt AllocSize(
2697  IdxWidth,
2698  DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue());
2699  if (BasePtrOffset.ule(AllocSize)) {
2701  GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
2702  }
2703  }
2704  }
2705  }
2706 
2707  if (Instruction *R = foldSelectGEP(GEP, Builder))
2708  return R;
2709 
2710  return nullptr;
2711 }
2712 
2714  Instruction *AI) {
2715  if (isa<ConstantPointerNull>(V))
2716  return true;
2717  if (auto *LI = dyn_cast<LoadInst>(V))
2718  return isa<GlobalVariable>(LI->getPointerOperand());
2719  // Two distinct allocations will never be equal.
2720  return isAllocLikeFn(V, &TLI) && V != AI;
2721 }
2722 
2723 /// Given a call CB which uses an address UsedV, return true if we can prove the
2724 /// call's only possible effect is storing to V.
2725 static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2726  const TargetLibraryInfo &TLI) {
2727  if (!CB.use_empty())
2728  // TODO: add recursion if returned attribute is present
2729  return false;
2730 
2731  if (CB.isTerminator())
2732  // TODO: remove implementation restriction
2733  return false;
2734 
2735  if (!CB.willReturn() || !CB.doesNotThrow())
2736  return false;
2737 
2738  // If the only possible side effect of the call is writing to the alloca,
2739  // and the result isn't used, we can safely remove any reads implied by the
2740  // call including those which might read the alloca itself.
2741  std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2742  return Dest && Dest->Ptr == UsedV;
2743 }
2744 
2747  const TargetLibraryInfo &TLI) {
2749  const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
2750  Worklist.push_back(AI);
2751 
2752  do {
2753  Instruction *PI = Worklist.pop_back_val();
2754  for (User *U : PI->users()) {
2755  Instruction *I = cast<Instruction>(U);
2756  switch (I->getOpcode()) {
2757  default:
2758  // Give up the moment we see something we can't handle.
2759  return false;
2760 
2761  case Instruction::AddrSpaceCast:
2762  case Instruction::BitCast:
2763  case Instruction::GetElementPtr:
2764  Users.emplace_back(I);
2765  Worklist.push_back(I);
2766  continue;
2767 
2768  case Instruction::ICmp: {
2769  ICmpInst *ICI = cast<ICmpInst>(I);
2770  // We can fold eq/ne comparisons with null to false/true, respectively.
2771  // We also fold comparisons in some conditions provided the alloc has
2772  // not escaped (see isNeverEqualToUnescapedAlloc).
2773  if (!ICI->isEquality())
2774  return false;
2775  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2776  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2777  return false;
2778  Users.emplace_back(I);
2779  continue;
2780  }
2781 
2782  case Instruction::Call:
2783  // Ignore no-op and store intrinsics.
2784  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2785  switch (II->getIntrinsicID()) {
2786  default:
2787  return false;
2788 
2789  case Intrinsic::memmove:
2790  case Intrinsic::memcpy:
2791  case Intrinsic::memset: {
2792  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2793  if (MI->isVolatile() || MI->getRawDest() != PI)
2794  return false;
2795  [[fallthrough]];
2796  }
2797  case Intrinsic::assume:
2798  case Intrinsic::invariant_start:
2799  case Intrinsic::invariant_end:
2800  case Intrinsic::lifetime_start:
2801  case Intrinsic::lifetime_end:
2802  case Intrinsic::objectsize:
2803  Users.emplace_back(I);
2804  continue;
2805  case Intrinsic::launder_invariant_group:
2806  case Intrinsic::strip_invariant_group:
2807  Users.emplace_back(I);
2808  Worklist.push_back(I);
2809  continue;
2810  }
2811  }
2812 
2813  if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
2814  Users.emplace_back(I);
2815  continue;
2816  }
2817 
2818  if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
2819  getAllocationFamily(I, &TLI) == Family) {
2820  assert(Family);
2821  Users.emplace_back(I);
2822  continue;
2823  }
2824 
2825  if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
2826  getAllocationFamily(I, &TLI) == Family) {
2827  assert(Family);
2828  Users.emplace_back(I);
2829  Worklist.push_back(I);
2830  continue;
2831  }
2832 
2833  return false;
2834 
2835  case Instruction::Store: {
2836  StoreInst *SI = cast<StoreInst>(I);
2837  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2838  return false;
2839  Users.emplace_back(I);
2840  continue;
2841  }
2842  }
2843  llvm_unreachable("missing a return?");
2844  }
2845  } while (!Worklist.empty());
2846  return true;
2847 }
2848 
2850  assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
2851 
2852  // If we have a malloc call which is only used in any amount of comparisons to
2853  // null and free calls, delete the calls and replace the comparisons with true
2854  // or false as appropriate.
2855 
2856  // This is based on the principle that we can substitute our own allocation
2857  // function (which will never return null) rather than knowledge of the
2858  // specific function being called. In some sense this can change the permitted
2859  // outputs of a program (when we convert a malloc to an alloca, the fact that
2860  // the allocation is now on the stack is potentially visible, for example),
2861  // but we believe in a permissible manner.
2863 
2864  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2865  // before each store.
2867  std::unique_ptr<DIBuilder> DIB;
2868  if (isa<AllocaInst>(MI)) {
2869  findDbgUsers(DVIs, &MI);
2870  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2871  }
2872 
2873  if (isAllocSiteRemovable(&MI, Users, TLI)) {
2874  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2875  // Lowering all @llvm.objectsize calls first because they may
2876  // use a bitcast/GEP of the alloca we are removing.
2877  if (!Users[i])
2878  continue;
2879 
2880  Instruction *I = cast<Instruction>(&*Users[i]);
2881 
2882  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2883  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2884  Value *Result =
2885  lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
2886  replaceInstUsesWith(*I, Result);
2887  eraseInstFromFunction(*I);
2888  Users[i] = nullptr; // Skip examining in the next loop.
2889  }
2890  }
2891  }
2892  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2893  if (!Users[i])
2894  continue;
2895 
2896  Instruction *I = cast<Instruction>(&*Users[i]);
2897 
2898  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2899  replaceInstUsesWith(*C,
2900  ConstantInt::get(Type::getInt1Ty(C->getContext()),
2901  C->isFalseWhenEqual()));
2902  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2903  for (auto *DVI : DVIs)
2904  if (DVI->isAddressOfVariable())
2905  ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
2906  } else {
2907  // Casts, GEP, or anything else: we're about to delete this instruction,
2908  // so it can not have any valid uses.
2909  replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
2910  }
2911  eraseInstFromFunction(*I);
2912  }
2913 
2914  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2915  // Replace invoke with a NOP intrinsic to maintain the original CFG
2916  Module *M = II->getModule();
2917  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2918  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2919  std::nullopt, "", II->getParent());
2920  }
2921 
2922  // Remove debug intrinsics which describe the value contained within the
2923  // alloca. In addition to removing dbg.{declare,addr} which simply point to
2924  // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
2925  //
2926  // ```
2927  // define void @foo(i32 %0) {
2928  // %a = alloca i32 ; Deleted.
2929  // store i32 %0, i32* %a
2930  // dbg.value(i32 %0, "arg0") ; Not deleted.
2931  // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
2932  // call void @trivially_inlinable_no_op(i32* %a)
2933  // ret void
2934  // }
2935  // ```
2936  //
2937  // This may not be required if we stop describing the contents of allocas
2938  // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
2939  // the LowerDbgDeclare utility.
2940  //
2941  // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
2942  // "arg0" dbg.value may be stale after the call. However, failing to remove
2943  // the DW_OP_deref dbg.value causes large gaps in location coverage.
2944  for (auto *DVI : DVIs)
2945  if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
2946  DVI->eraseFromParent();
2947 
2948  return eraseInstFromFunction(MI);
2949  }
2950  return nullptr;
2951 }
2952 
2953 /// Move the call to free before a NULL test.
2954 ///
2955 /// Check if this free is accessed after its argument has been test
2956 /// against NULL (property 0).
2957 /// If yes, it is legal to move this call in its predecessor block.
2958 ///
2959 /// The move is performed only if the block containing the call to free
2960 /// will be removed, i.e.:
2961 /// 1. it has only one predecessor P, and P has two successors
2962 /// 2. it contains the call, noops, and an unconditional branch
2963 /// 3. its successor is the same as its predecessor's successor
2964 ///
2965 /// The profitability is out-of concern here and this function should
2966 /// be called only if the caller knows this transformation would be
2967 /// profitable (e.g., for code size).
2969  const DataLayout &DL) {
2970  Value *Op = FI.getArgOperand(0);
2971  BasicBlock *FreeInstrBB = FI.getParent();
2972  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2973 
2974  // Validate part of constraint #1: Only one predecessor
2975  // FIXME: We can extend the number of predecessor, but in that case, we
2976  // would duplicate the call to free in each predecessor and it may
2977  // not be profitable even for code size.
2978  if (!PredBB)
2979  return nullptr;
2980 
2981  // Validate constraint #2: Does this block contains only the call to
2982  // free, noops, and an unconditional branch?
2983  BasicBlock *SuccBB;
2984  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2985  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2986  return nullptr;
2987 
2988  // If there are only 2 instructions in the block, at this point,
2989  // this is the call to free and unconditional.
2990  // If there are more than 2 instructions, check that they are noops
2991  // i.e., they won't hurt the performance of the generated code.
2992  if (FreeInstrBB->size() != 2) {
2993  for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
2994  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2995  continue;
2996  auto *Cast = dyn_cast<CastInst>(&Inst);
2997  if (!Cast || !Cast->isNoopCast(DL))
2998  return nullptr;
2999  }
3000  }
3001  // Validate the rest of constraint #1 by matching on the pred branch.
3002  Instruction *TI = PredBB->getTerminator();
3003  BasicBlock *TrueBB, *FalseBB;
3004  ICmpInst::Predicate Pred;
3005  if (!match(TI, m_Br(m_ICmp(Pred,
3007  m_Specific(Op->stripPointerCasts())),
3008  m_Zero()),
3009  TrueBB, FalseBB)))
3010  return nullptr;
3011  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
3012  return nullptr;
3013 
3014  // Validate constraint #3: Ensure the null case just falls through.
3015  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
3016  return nullptr;
3017  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
3018  "Broken CFG: missing edge from predecessor to successor");
3019 
3020  // At this point, we know that everything in FreeInstrBB can be moved
3021  // before TI.
3022  for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
3023  if (&Instr == FreeInstrBBTerminator)
3024  break;
3025  Instr.moveBefore(TI);
3026  }
3027  assert(FreeInstrBB->size() == 1 &&
3028  "Only the branch instruction should remain");
3029 
3030  // Now that we've moved the call to free before the NULL check, we have to
3031  // remove any attributes on its parameter that imply it's non-null, because
3032  // those attributes might have only been valid because of the NULL check, and
3033  // we can get miscompiles if we keep them. This is conservative if non-null is
3034  // also implied by something other than the NULL check, but it's guaranteed to
3035  // be correct, and the conservativeness won't matter in practice, since the
3036  // attributes are irrelevant for the call to free itself and the pointer
3037  // shouldn't be used after the call.
3039  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
3040  Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
3041  if (Dereferenceable.isValid()) {
3042  uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
3043  Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
3044  Attribute::Dereferenceable);
3046  }
3047  FI.setAttributes(Attrs);
3048 
3049  return &FI;
3050 }
3051 
3053  // free undef -> unreachable.
3054  if (isa<UndefValue>(Op)) {
3055  // Leave a marker since we can't modify the CFG here.
3056  CreateNonTerminatorUnreachable(&FI);
3057  return eraseInstFromFunction(FI);
3058  }
3059 
3060  // If we have 'free null' delete the instruction. This can happen in stl code
3061  // when lots of inlining happens.
3062  if (isa<ConstantPointerNull>(Op))
3063  return eraseInstFromFunction(FI);
3064 
3065  // If we had free(realloc(...)) with no intervening uses, then eliminate the
3066  // realloc() entirely.
3067  CallInst *CI = dyn_cast<CallInst>(Op);
3068  if (CI && CI->hasOneUse())
3069  if (Value *ReallocatedOp = getReallocatedOperand(CI))
3070  return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
3071 
3072  // If we optimize for code size, try to move the call to free before the null
3073  // test so that simplify cfg can remove the empty block and dead code
3074  // elimination the branch. I.e., helps to turn something like:
3075  // if (foo) free(foo);
3076  // into
3077  // free(foo);
3078  //
3079  // Note that we can only do this for 'free' and not for any flavor of
3080  // 'operator delete'; there is no 'operator delete' symbol for which we are
3081  // permitted to invent a call, even if we're passing in a null pointer.
3082  if (MinimizeSize) {
3083  LibFunc Func;
3084  if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
3086  return I;
3087  }
3088 
3089  return nullptr;
3090 }
3091 
3092 static bool isMustTailCall(Value *V) {
3093  if (auto *CI = dyn_cast<CallInst>(V))
3094  return CI->isMustTailCall();
3095  return false;
3096 }
3097 
3099  if (RI.getNumOperands() == 0) // ret void
3100  return nullptr;
3101 
3102  Value *ResultOp = RI.getOperand(0);
3103  Type *VTy = ResultOp->getType();
3104  if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
3105  return nullptr;
3106 
3107  // Don't replace result of musttail calls.
3108  if (isMustTailCall(ResultOp))
3109  return nullptr;
3110 
3111  // There might be assume intrinsics dominating this return that completely
3112  // determine the value. If so, constant fold it.
3113  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
3114  if (Known.isConstant())
3115  return replaceOperand(RI, 0,
3116  Constant::getIntegerValue(VTy, Known.getConstant()));
3117 
3118  return nullptr;
3119 }
3120 
3121 // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
3123  // Try to remove the previous instruction if it must lead to unreachable.
3124  // This includes instructions like stores and "llvm.assume" that may not get
3125  // removed by simple dead code elimination.
3126  while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
3127  // While we theoretically can erase EH, that would result in a block that
3128  // used to start with an EH no longer starting with EH, which is invalid.
3129  // To make it valid, we'd need to fixup predecessors to no longer refer to
3130  // this block, but that changes CFG, which is not allowed in InstCombine.
3131  if (Prev->isEHPad())
3132  return nullptr; // Can not drop any more instructions. We're done here.
3133 
3135  return nullptr; // Can not drop any more instructions. We're done here.
3136  // Otherwise, this instruction can be freely erased,
3137  // even if it is not side-effect free.
3138 
3139  // A value may still have uses before we process it here (for example, in
3140  // another unreachable block), so convert those to poison.
3141  replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
3142  eraseInstFromFunction(*Prev);
3143  }
3144  assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
3145  // FIXME: recurse into unconditional predecessors?
3146  return nullptr;
3147 }
3148 
3150  assert(BI.isUnconditional() && "Only for unconditional branches.");
3151 
3152  // If this store is the second-to-last instruction in the basic block
3153  // (excluding debug info and bitcasts of pointers) and if the block ends with
3154  // an unconditional branch, try to move the store to the successor block.
3155 
3156  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
3157  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
3158  return BBI->isDebugOrPseudoInst() ||
3159  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
3160  };
3161 
3162  BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
3163  do {
3164  if (BBI != FirstInstr)
3165  --BBI;
3166  } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
3167 
3168  return dyn_cast<StoreInst>(BBI);
3169  };
3170 
3171  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
3172  if (mergeStoreIntoSuccessor(*SI))
3173  return &BI;
3174 
3175  return nullptr;
3176 }
3177 
3179  if (BI.isUnconditional())
3180  return visitUnconditionalBranchInst(BI);
3181 
3182  // Change br (not X), label True, label False to: br X, label False, True
3183  Value *Cond = BI.getCondition();
3184  Value *X;
3185  if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
3186  // Swap Destinations and condition...
3187  BI.swapSuccessors();
3188  return replaceOperand(BI, 0, X);
3189  }
3190 
3191  // Canonicalize logical-and-with-invert as logical-or-with-invert.
3192  // This is done by inverting the condition and swapping successors:
3193  // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
3194  Value *Y;
3195  if (isa<SelectInst>(Cond) &&
3196  match(Cond,
3198  Value *NotX = Builder.CreateNot(X, "not." + X->getName());
3199  Value *Or = Builder.CreateLogicalOr(NotX, Y);
3200  BI.swapSuccessors();
3201  return replaceOperand(BI, 0, Or);
3202  }
3203 
3204  // If the condition is irrelevant, remove the use so that other
3205  // transforms on the condition become more effective.
3206  if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
3207  return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
3208 
3209  // Canonicalize, for example, fcmp_one -> fcmp_oeq.
3210  CmpInst::Predicate Pred;
3211  if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
3212  !isCanonicalPredicate(Pred)) {
3213  // Swap destinations and condition.
3214  auto *Cmp = cast<CmpInst>(Cond);
3215  Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
3216  BI.swapSuccessors();
3217  Worklist.push(Cmp);
3218  return &BI;
3219  }
3220 
3221  return nullptr;
3222 }
3223 
3225  Value *Cond = SI.getCondition();
3226  Value *Op0;
3227  ConstantInt *AddRHS;
3228  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
3229  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
3230  for (auto Case : SI.cases()) {
3231  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
3232  assert(isa<ConstantInt>(NewCase) &&
3233  "Result of expression should be constant");
3234  Case.setValue(cast<ConstantInt>(NewCase));
3235  }
3236  return replaceOperand(SI, 0, Op0);
3237  }
3238 
3239  KnownBits Known = computeKnownBits(Cond, 0, &SI);
3240  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
3241  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
3242 
3243  // Compute the number of leading bits we can ignore.
3244  // TODO: A better way to determine this would use ComputeNumSignBits().
3245  for (const auto &C : SI.cases()) {
3246  LeadingKnownZeros = std::min(
3247  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
3248  LeadingKnownOnes = std::min(
3249  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
3250  }
3251 
3252  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
3253 
3254  // Shrink the condition operand if the new type is smaller than the old type.
3255  // But do not shrink to a non-standard type, because backend can't generate
3256  // good code for that yet.
3257  // TODO: We can make it aggressive again after fixing PR39569.
3258  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
3259  shouldChangeType(Known.getBitWidth(), NewWidth)) {
3260  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
3261  Builder.SetInsertPoint(&SI);
3262  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
3263 
3264  for (auto Case : SI.cases()) {
3265  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
3266  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
3267  }
3268  return replaceOperand(SI, 0, NewCond);
3269  }
3270 
3271  return nullptr;
3272 }
3273 
3274 Instruction *
3275 InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
3276  auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
3277  if (!WO)
3278  return nullptr;
3279 
3280  Intrinsic::ID OvID = WO->getIntrinsicID();
3281  const APInt *C = nullptr;
3282  if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
3283  if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
3284  OvID == Intrinsic::umul_with_overflow)) {
3285  // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
3286  if (C->isAllOnes())
3287  return BinaryOperator::CreateNeg(WO->getLHS());
3288  // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
3289  if (C->isPowerOf2()) {
3290  return BinaryOperator::CreateShl(
3291  WO->getLHS(),
3292  ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
3293  }
3294  }
3295  }
3296 
3297  // We're extracting from an overflow intrinsic. See if we're the only user.
3298  // That allows us to simplify multiple result intrinsics to simpler things
3299  // that just get one value.
3300  if (!WO->hasOneUse())
3301  return nullptr;
3302 
3303  // Check if we're grabbing only the result of a 'with overflow' intrinsic
3304  // and replace it with a traditional binary instruction.
3305  if (*EV.idx_begin() == 0) {
3306  Instruction::BinaryOps BinOp = WO->getBinaryOp();
3307  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
3308  // Replace the old instruction's uses with poison.
3309  replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
3310  eraseInstFromFunction(*WO);
3311  return BinaryOperator::Create(BinOp, LHS, RHS);
3312  }
3313 
3314  assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
3315 
3316  // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
3317  if (OvID == Intrinsic::usub_with_overflow)
3318  return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
3319 
3320  // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
3321  // +1 is not possible because we assume signed values.
3322  if (OvID == Intrinsic::smul_with_overflow &&
3323  WO->getLHS()->getType()->isIntOrIntVectorTy(1))
3324  return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
3325 
3326  // If only the overflow result is used, and the right hand side is a
3327  // constant (or constant splat), we can remove the intrinsic by directly
3328  // checking for overflow.
3329  if (C) {
3330  // Compute the no-wrap range for LHS given RHS=C, then construct an
3331  // equivalent icmp, potentially using an offset.
3333  WO->getBinaryOp(), *C, WO->getNoWrapKind());
3334 
3335  CmpInst::Predicate Pred;
3336  APInt NewRHSC, Offset;
3337  NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
3338  auto *OpTy = WO->getRHS()->getType();
3339  auto *NewLHS = WO->getLHS();
3340  if (Offset != 0)
3341  NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
3342  return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
3343  ConstantInt::get(OpTy, NewRHSC));
3344  }
3345 
3346  return nullptr;
3347 }
3348 
3350  Value *Agg = EV.getAggregateOperand();
3351 
3352  if (!EV.hasIndices())
3353  return replaceInstUsesWith(EV, Agg);
3354 
3355  if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
3356  SQ.getWithInstruction(&EV)))
3357  return replaceInstUsesWith(EV, V);
3358 
3359  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
3360  // We're extracting from an insertvalue instruction, compare the indices
3361  const unsigned *exti, *exte, *insi, *inse;
3362  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
3363  exte = EV.idx_end(), inse = IV->idx_end();
3364  exti != exte && insi != inse;
3365  ++exti, ++insi) {
3366  if (*insi != *exti)
3367  // The insert and extract both reference distinctly different elements.
3368  // This means the extract is not influenced by the insert, and we can
3369  // replace the aggregate operand of the extract with the aggregate
3370  // operand of the insert. i.e., replace
3371  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3372  // %E = extractvalue { i32, { i32 } } %I, 0
3373  // with
3374  // %E = extractvalue { i32, { i32 } } %A, 0
3375  return ExtractValueInst::Create(IV->getAggregateOperand(),
3376  EV.getIndices());
3377  }
3378  if (exti == exte && insi == inse)
3379  // Both iterators are at the end: Index lists are identical. Replace
3380  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3381  // %C = extractvalue { i32, { i32 } } %B, 1, 0
3382  // with "i32 42"
3383  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
3384  if (exti == exte) {
3385  // The extract list is a prefix of the insert list. i.e. replace
3386  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
3387  // %E = extractvalue { i32, { i32 } } %I, 1
3388  // with
3389  // %X = extractvalue { i32, { i32 } } %A, 1
3390  // %E = insertvalue { i32 } %X, i32 42, 0
3391  // by switching the order of the insert and extract (though the
3392  // insertvalue should be left in, since it may have other uses).
3393  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
3394  EV.getIndices());
3395  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
3396  ArrayRef(insi, inse));
3397  }
3398  if (insi == inse)
3399  // The insert list is a prefix of the extract list
3400  // We can simply remove the common indices from the extract and make it
3401  // operate on the inserted value instead of the insertvalue result.
3402  // i.e., replace
3403  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
3404  // %E = extractvalue { i32, { i32 } } %I, 1, 0
3405  // with
3406  // %E extractvalue { i32 } { i32 42 }, 0
3407  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
3408  ArrayRef(exti, exte));
3409  }
3410 
3411  if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
3412  return R;
3413 
3414  if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
3415  // If the (non-volatile) load only has one use, we can rewrite this to a
3416  // load from a GEP. This reduces the size of the load. If a load is used
3417  // only by extractvalue instructions then this either must have been
3418  // optimized before, or it is a struct with padding, in which case we
3419  // don't want to do the transformation as it loses padding knowledge.
3420  if (L->isSimple() && L->hasOneUse()) {
3421  // extractvalue has integer indices, getelementptr has Value*s. Convert.
3422  SmallVector<Value*, 4> Indices;
3423  // Prefix an i32 0 since we need the first element.
3424  Indices.push_back(Builder.getInt32(0));
3425  for (unsigned Idx : EV.indices())
3426  Indices.push_back(Builder.getInt32(Idx));
3427 
3428  // We need to insert these at the location of the old load, not at that of
3429  // the extractvalue.
3430  Builder.SetInsertPoint(L);
3431  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
3432  L->getPointerOperand(), Indices);
3433  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
3434  // Whatever aliasing information we had for the orignal load must also
3435  // hold for the smaller load, so propagate the annotations.
3436  NL->setAAMetadata(L->getAAMetadata());
3437  // Returning the load directly will cause the main loop to insert it in
3438  // the wrong spot, so use replaceInstUsesWith().
3439  return replaceInstUsesWith(EV, NL);
3440  }
3441  }
3442 
3443  if (auto *PN = dyn_cast<PHINode>(Agg))
3444  if (Instruction *Res = foldOpIntoPhi(EV, PN))
3445  return Res;
3446 
3447  // We could simplify extracts from other values. Note that nested extracts may
3448  // already be simplified implicitly by the above: extract (extract (insert) )
3449  // will be translated into extract ( insert ( extract ) ) first and then just
3450  // the value inserted, if appropriate. Similarly for extracts from single-use
3451  // loads: extract (extract (load)) will be translated to extract (load (gep))
3452  // and if again single-use then via load (gep (gep)) to load (gep).
3453  // However, double extracts from e.g. function arguments or return values
3454  // aren't handled yet.
3455  return nullptr;
3456 }
3457 
3458 /// Return 'true' if the given typeinfo will match anything.
3459 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
3460  switch (Personality) {
3461  case EHPersonality::GNU_C:
3463  case EHPersonality::Rust:
3464  // The GCC C EH and Rust personality only exists to support cleanups, so
3465  // it's not clear what the semantics of catch clauses are.
3466  return false;
3468  return false;
3470  // While __gnat_all_others_value will match any Ada exception, it doesn't
3471  // match foreign exceptions (or didn't, before gcc-4.7).
3472  return false;
3481  case EHPersonality::XL_CXX:
3482  return TypeInfo->isNullValue();
3483  }
3484  llvm_unreachable("invalid enum");
3485 }
3486 
3487 static bool shorter_filter(const Value *LHS, const Value *RHS) {
3488  return
3489  cast<ArrayType>(LHS->getType())->getNumElements()
3490  <
3491  cast<ArrayType>(RHS->getType())->getNumElements();
3492 }
3493 
3495  // The logic here should be correct for any real-world personality function.
3496  // However if that turns out not to be true, the offending logic can always
3497  // be conditioned on the personality function, like the catch-all logic is.
3498  EHPersonality Personality =
3500 
3501  // Simplify the list of clauses, eg by removing repeated catch clauses
3502  // (these are often created by inlining).
3503  bool MakeNewInstruction = false; // If true, recreate using the following:
3504  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
3505  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
3506 
3507  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
3508  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
3509  bool isLastClause = i + 1 == e;
3510  if (LI.isCatch(i)) {
3511  // A catch clause.
3512  Constant *CatchClause = LI.getClause(i);
3513  Constant *TypeInfo = CatchClause->stripPointerCasts();
3514 
3515  // If we already saw this clause, there is no point in having a second
3516  // copy of it.
3517  if (AlreadyCaught.insert(TypeInfo).second) {
3518  // This catch clause was not already seen.
3519  NewClauses.push_back(CatchClause);
3520  } else {
3521  // Repeated catch clause - drop the redundant copy.
3522  MakeNewInstruction = true;
3523  }
3524 
3525  // If this is a catch-all then there is no point in keeping any following
3526  // clauses or marking the landingpad as having a cleanup.
3527  if (isCatchAll(Personality, TypeInfo)) {
3528  if (!isLastClause)
3529  MakeNewInstruction = true;
3530  CleanupFlag = false;
3531  break;
3532  }
3533  } else {
3534  // A filter clause. If any of the filter elements were already caught
3535  // then they can be dropped from the filter. It is tempting to try to
3536  // exploit the filter further by saying that any typeinfo that does not
3537  // occur in the filter can't be caught later (and thus can be dropped).
3538  // However this would be wrong, since typeinfos can match without being
3539  // equal (for example if one represents a C++ class, and the other some
3540  // class derived from it).
3541  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
3542  Constant *FilterClause = LI.getClause(i);
3543  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
3544  unsigned NumTypeInfos = FilterType->getNumElements();
3545 
3546  // An empty filter catches everything, so there is no point in keeping any
3547  // following clauses or marking the landingpad as having a cleanup. By
3548  // dealing with this case here the following code is made a bit simpler.
3549  if (!NumTypeInfos) {
3550  NewClauses.push_back(FilterClause);
3551  if (!isLastClause)
3552  MakeNewInstruction = true;
3553  CleanupFlag = false;
3554  break;
3555  }
3556 
3557  bool MakeNewFilter = false; // If true, make a new filter.
3558  SmallVector<Constant *, 16> NewFilterElts; // New elements.
3559  if (isa<ConstantAggregateZero>(FilterClause)) {
3560  // Not an empty filter - it contains at least one null typeinfo.
3561  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
3562  Constant *TypeInfo =
3563  Constant::getNullValue(FilterType->getElementType());
3564  // If this typeinfo is a catch-all then the filter can never match.
3565  if (isCatchAll(Personality, TypeInfo)) {
3566  // Throw the filter away.
3567  MakeNewInstruction = true;
3568  continue;
3569  }
3570 
3571  // There is no point in having multiple copies of this typeinfo, so
3572  // discard all but the first copy if there is more than one.
3573  NewFilterElts.push_back(TypeInfo);
3574  if (NumTypeInfos > 1)
3575  MakeNewFilter = true;
3576  } else {
3577  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
3578  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
3579  NewFilterElts.reserve(NumTypeInfos);
3580 
3581  // Remove any filter elements that were already caught or that already
3582  // occurred in the filter. While there, see if any of the elements are
3583  // catch-alls. If so, the filter can be discarded.
3584  bool SawCatchAll = false;
3585  for (unsigned j = 0; j != NumTypeInfos; ++j) {
3586  Constant *Elt = Filter->getOperand(j);
3587  Constant *TypeInfo = Elt->stripPointerCasts();
3588  if (isCatchAll(Personality, TypeInfo)) {
3589  // This element is a catch-all. Bail out, noting this fact.
3590  SawCatchAll = true;
3591  break;
3592  }
3593 
3594  // Even if we've seen a type in a catch clause, we don't want to
3595  // remove it from the filter. An unexpected type handler may be
3596  // set up for a call site which throws an exception of the same
3597  // type caught. In order for the exception thrown by the unexpected
3598  // handler to propagate correctly, the filter must be correctly
3599  // described for the call site.
3600  //
3601  // Example:
3602  //
3603  // void unexpected() { throw 1;}
3604  // void foo() throw (int) {
3605  // std::set_unexpected(unexpected);
3606  // try {
3607  // throw 2.0;
3608  // } catch (int i) {}
3609  // }
3610 
3611  // There is no point in having multiple copies of the same typeinfo in
3612  // a filter, so only add it if we didn't already.
3613  if (SeenInFilter.insert(TypeInfo).second)
3614  NewFilterElts.push_back(cast<Constant>(Elt));
3615  }
3616  // A filter containing a catch-all cannot match anything by definition.
3617  if (SawCatchAll) {
3618  // Throw the filter away.
3619  MakeNewInstruction = true;
3620  continue;
3621  }
3622 
3623  // If we dropped something from the filter, make a new one.
3624  if (NewFilterElts.size() < NumTypeInfos)
3625  MakeNewFilter = true;
3626  }
3627  if (MakeNewFilter) {
3628  FilterType = ArrayType::get(FilterType->getElementType(),
3629  NewFilterElts.size());
3630  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
3631  MakeNewInstruction = true;
3632  }
3633 
3634  NewClauses.push_back(FilterClause);
3635 
3636  // If the new filter is empty then it will catch everything so there is
3637  // no point in keeping any following clauses or marking the landingpad
3638  // as having a cleanup. The case of the original filter being empty was
3639  // already handled above.
3640  if (MakeNewFilter && !NewFilterElts.size()) {
3641  assert(MakeNewInstruction && "New filter but not a new instruction!");
3642  CleanupFlag = false;
3643  break;
3644  }
3645  }
3646  }
3647 
3648  // If several filters occur in a row then reorder them so that the shortest
3649  // filters come first (those with the smallest number of elements). This is
3650  // advantageous because shorter filters are more likely to match, speeding up
3651  // unwinding, but mostly because it increases the effectiveness of the other
3652  // filter optimizations below.
3653  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
3654  unsigned j;
3655  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
3656  for (j = i; j != e; ++j)
3657  if (!isa<ArrayType>(NewClauses[j]->getType()))
3658  break;
3659 
3660  // Check whether the filters are already sorted by length. We need to know
3661  // if sorting them is actually going to do anything so that we only make a
3662  // new landingpad instruction if it does.
3663  for (unsigned k = i; k + 1 < j; ++k)
3664  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
3665  // Not sorted, so sort the filters now. Doing an unstable sort would be
3666  // correct too but reordering filters pointlessly might confuse users.
3667  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
3668  shorter_filter);
3669  MakeNewInstruction = true;
3670  break;
3671  }
3672 
3673  // Look for the next batch of filters.
3674  i = j + 1;
3675  }
3676 
3677  // If typeinfos matched if and only if equal, then the elements of a filter L
3678  // that occurs later than a filter F could be replaced by the intersection of
3679  // the elements of F and L. In reality two typeinfos can match without being
3680  // equal (for example if one represents a C++ class, and the other some class
3681  // derived from it) so it would be wrong to perform this transform in general.
3682  // However the transform is correct and useful if F is a subset of L. In that
3683  // case L can be replaced by F, and thus removed altogether since repeating a
3684  // filter is pointless. So here we look at all pairs of filters F and L where
3685  // L follows F in the list of clauses, and remove L if every element of F is
3686  // an element of L. This can occur when inlining C++ functions with exception
3687  // specifications.
3688  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
3689  // Examine each filter in turn.
3690  Value *Filter = NewClauses[i];
3691  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
3692  if (!FTy)
3693  // Not a filter - skip it.
3694  continue;
3695  unsigned FElts = FTy->getNumElements();
3696  // Examine each filter following this one. Doing this backwards means that
3697  // we don't have to worry about filters disappearing under us when removed.
3698  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
3699  Value *LFilter = NewClauses[j];
3700  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3701  if (!LTy)
3702  // Not a filter - skip it.
3703  continue;
3704  // If Filter is a subset of LFilter, i.e. every element of Filter is also
3705  // an element of LFilter, then discard LFilter.
3706  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3707  // If Filter is empty then it is a subset of LFilter.
3708  if (!FElts) {
3709  // Discard LFilter.
3710  NewClauses.erase(J);
3711  MakeNewInstruction = true;
3712  // Move on to the next filter.
3713  continue;
3714  }
3715  unsigned LElts = LTy->getNumElements();
3716  // If Filter is longer than LFilter then it cannot be a subset of it.
3717  if (FElts > LElts)
3718  // Move on to the next filter.
3719  continue;
3720  // At this point we know that LFilter has at least one element.
3721  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3722  // Filter is a subset of LFilter iff Filter contains only zeros (as we
3723  // already know that Filter is not longer than LFilter).
3724  if (isa<ConstantAggregateZero>(Filter)) {
3725  assert(FElts <= LElts && "Should have handled this case earlier!");
3726  // Discard LFilter.
3727  NewClauses.erase(J);
3728  MakeNewInstruction = true;
3729  }
3730  // Move on to the next filter.
3731  continue;
3732  }
3733  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3734  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3735  // Since Filter is non-empty and contains only zeros, it is a subset of
3736  // LFilter iff LFilter contains a zero.
3737  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3738  for (unsigned l = 0; l != LElts; ++l)
3739  if (LArray->getOperand(l)->isNullValue()) {
3740  // LFilter contains a zero - discard it.
3741  NewClauses.erase(J);
3742  MakeNewInstruction = true;
3743  break;
3744  }
3745  // Move on to the next filter.
3746  continue;
3747  }
3748  // At this point we know that both filters are ConstantArrays. Loop over
3749  // operands to see whether every element of Filter is also an element of
3750  // LFilter. Since filters tend to be short this is probably faster than
3751  // using a method that scales nicely.
3752  ConstantArray *FArray = cast<ConstantArray>(Filter);
3753  bool AllFound = true;
3754  for (unsigned f = 0; f != FElts; ++f) {
3755  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3756  AllFound = false;
3757  for (unsigned l = 0; l != LElts; ++l) {
3758  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3759  if (LTypeInfo == FTypeInfo) {
3760  AllFound = true;
3761  break;
3762  }
3763  }
3764  if (!AllFound)
3765  break;
3766  }
3767  if (AllFound) {
3768  // Discard LFilter.
3769  NewClauses.erase(J);
3770  MakeNewInstruction = true;
3771  }
3772  // Move on to the next filter.
3773  }
3774  }
3775 
3776  // If we changed any of the clauses, replace the old landingpad instruction
3777  // with a new one.
3778  if (MakeNewInstruction) {
3780  NewClauses.size());
3781  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3782  NLI->addClause(NewClauses[i]);
3783  // A landing pad with no clauses must have the cleanup flag set. It is
3784  // theoretically possible, though highly unlikely, that we eliminated all
3785  // clauses. If so, force the cleanup flag to true.
3786  if (NewClauses.empty())
3787  CleanupFlag = true;
3788  NLI->setCleanup(CleanupFlag);
3789  return NLI;
3790  }
3791 
3792  // Even if none of the clauses changed, we may nonetheless have understood
3793  // that the cleanup flag is pointless. Clear it if so.
3794  if (LI.isCleanup() != CleanupFlag) {
3795  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3796  LI.setCleanup(CleanupFlag);
3797  return &LI;
3798  }
3799 
3800  return nullptr;
3801 }
3802 
3803 Value *
3805  // Try to push freeze through instructions that propagate but don't produce
3806  // poison as far as possible. If an operand of freeze follows three
3807  // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
3808  // guaranteed-non-poison operands then push the freeze through to the one
3809  // operand that is not guaranteed non-poison. The actual transform is as
3810  // follows.
3811  // Op1 = ... ; Op1 can be posion
3812  // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
3813  // ; single guaranteed-non-poison operands
3814  // ... = Freeze(Op0)
3815  // =>
3816  // Op1 = ...
3817  // Op1.fr = Freeze(Op1)
3818  // ... = Inst(Op1.fr, NonPoisonOps...)
3819  auto *OrigOp = OrigFI.getOperand(0);
3820  auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
3821 
3822  // While we could change the other users of OrigOp to use freeze(OrigOp), that
3823  // potentially reduces their optimization potential, so let's only do this iff
3824  // the OrigOp is only used by the freeze.
3825  if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
3826  return nullptr;
3827 
3828  // We can't push the freeze through an instruction which can itself create
3829  // poison. If the only source of new poison is flags, we can simply
3830  // strip them (since we know the only use is the freeze and nothing can
3831  // benefit from them.)
3832  if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
3833  /*ConsiderFlagsAndMetadata*/ false))
3834  return nullptr;
3835 
3836  // If operand is guaranteed not to be poison, there is no need to add freeze
3837  // to the operand. So we first find the operand that is not guaranteed to be
3838  // poison.
3839  Use *MaybePoisonOperand = nullptr;
3840  for (Use &U : OrigOpInst->operands()) {
3841  if (isa<MetadataAsValue>(U.get()) ||
3843  continue;
3844  if (!MaybePoisonOperand)
3845  MaybePoisonOperand = &U;
3846  else
3847  return nullptr;
3848  }
3849 
3850  OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
3851 
3852  // If all operands are guaranteed to be non-poison, we can drop freeze.
3853  if (!MaybePoisonOperand)
3854  return OrigOp;
3855 
3856  Builder.SetInsertPoint(OrigOpInst);
3857  auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
3858  MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
3859 
3860  replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
3861  return OrigOp;
3862 }
3863 
3865  PHINode *PN) {
3866  // Detect whether this is a recurrence with a start value and some number of
3867  // backedge values. We'll check whether we can push the freeze through the
3868  // backedge values (possibly dropping poison flags along the way) until we
3869  // reach the phi again. In that case, we can move the freeze to the start
3870  // value.
3871  Use *StartU = nullptr;
3872  SmallVector<Value *> Worklist;
3873  for (Use &U : PN->incoming_values()) {
3874  if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
3875  // Add backedge value to worklist.
3876  Worklist.push_back(U.get());
3877  continue;
3878  }
3879 
3880  // Don't bother handling multiple start values.
3881  if (StartU)
3882  return nullptr;
3883  StartU = &U;
3884  }
3885 
3886  if (!StartU || Worklist.empty())
3887  return nullptr; // Not a recurrence.
3888 
3889  Value *StartV = StartU->get();
3890  BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
3891  bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
3892  // We can't insert freeze if the the start value is the result of the
3893  // terminator (e.g. an invoke).
3894  if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
3895  return nullptr;
3896 
3897  SmallPtrSet<Value *, 32> Visited;
3898  SmallVector<Instruction *> DropFlags;
3899  while (!Worklist.empty()) {
3900  Value *V = Worklist.pop_back_val();
3901  if (!Visited.insert(V).second)
3902  continue;
3903 
3904  if (Visited.size() > 32)
3905  return nullptr; // Limit the total number of values we inspect.
3906 
3907  // Assume that PN is non-poison, because it will be after the transform.
3908  if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
3909  continue;
3910 
3911  Instruction *I = dyn_cast<Instruction>(V);
3912  if (!I || canCreateUndefOrPoison(cast<Operator>(I),
3913  /*ConsiderFlagsAndMetadata*/ false))
3914  return nullptr;
3915 
3916  DropFlags.push_back(I);
3917  append_range(Worklist, I->operands());
3918  }
3919 
3920  for (Instruction *I : DropFlags)
3921  I->dropPoisonGeneratingFlagsAndMetadata();
3922 
3923  if (StartNeedsFreeze) {
3924  Builder.SetInsertPoint(StartBB->getTerminator());
3925  Value *FrozenStartV = Builder.CreateFreeze(StartV,
3926  StartV->getName() + ".fr");
3927  replaceUse(*StartU, FrozenStartV);
3928  }
3929  return replaceInstUsesWith(FI, PN);
3930 }
3931 
3933  Value *Op = FI.getOperand(0);
3934 
3935  if (isa<Constant>(Op) || Op->hasOneUse())
3936  return false;
3937 
3938  // Move the freeze directly after the definition of its operand, so that
3939  // it dominates the maximum number of uses. Note that it may not dominate
3940  // *all* uses if the operand is an invoke/callbr and the use is in a phi on
3941  // the normal/default destination. This is why the domination check in the
3942  // replacement below is still necessary.
3943  Instruction *MoveBefore;
3944  if (isa<Argument>(Op)) {
3945  MoveBefore =
3947  } else {
3948  MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
3949  if (!MoveBefore)
3950  return false;
3951  }
3952 
3953  bool Changed = false;
3954  if (&FI != MoveBefore) {
3955  FI.moveBefore(MoveBefore);
3956  Changed = true;
3957  }
3958 
3959  Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
3960  bool Dominates = DT.dominates(&FI, U);
3961  Changed |= Dominates;
3962  return Dominates;
3963  });
3964 
3965  return Changed;
3966 }
3967 
3969  Value *Op0 = I.getOperand(0);
3970 
3971  if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
3972  return replaceInstUsesWith(I, V);
3973 
3974  // freeze (phi const, x) --> phi const, (freeze x)
3975  if (auto *PN = dyn_cast<PHINode>(Op0)) {
3976  if (Instruction *NV = foldOpIntoPhi(I, PN))
3977  return NV;
3978  if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
3979  return NV;
3980  }
3981 
3982  if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
3983  return replaceInstUsesWith(I, NI);
3984 
3985  // If I is freeze(undef), check its uses and fold it to a fixed constant.
3986  // - or: pick -1
3987  // - select's condition: if the true value is constant, choose it by making
3988  // the condition true.
3989  // - default: pick 0
3990  //
3991  // Note that this transform is intentionally done here rather than
3992  // via an analysis in InstSimplify or at individual user sites. That is
3993  // because we must produce the same value for all uses of the freeze -
3994  // it's the reason "freeze" exists!
3995  //
3996  // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
3997  // duplicating logic for binops at least.
3998  auto getUndefReplacement = [&I](Type *Ty) {
3999  Constant *BestValue = nullptr;
4000  Constant *NullValue = Constant::getNullValue(Ty);
4001  for (const auto *U : I.users()) {
4002  Constant *C = NullValue;
4003  if (match(U, m_Or(m_Value(), m_Value())))
4005  else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
4006  C = ConstantInt::getTrue(Ty);
4007 
4008  if (!BestValue)
4009  BestValue = C;
4010  else if (BestValue != C)
4011  BestValue = NullValue;
4012  }
4013  assert(BestValue && "Must have at least one use");
4014  return BestValue;
4015  };
4016 
4017  if (match(Op0, m_Undef()))
4018  return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
4019 
4020  Constant *C;
4021  if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
4022  Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
4023  return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
4024  }
4025 
4026  // Replace uses of Op with freeze(Op).
4027  if (freezeOtherUses(I))
4028  return &I;
4029 
4030  return nullptr;
4031 }
4032 
4033 /// Check for case where the call writes to an otherwise dead alloca. This
4034 /// shows up for unused out-params in idiomatic C/C++ code. Note that this
4035 /// helper *only* analyzes the write; doesn't check any other legality aspect.
4037  auto *CB = dyn_cast<CallBase>(I);
4038  if (!CB)
4039  // TODO: handle e.g. store to alloca here - only worth doing if we extend
4040  // to allow reload along used path as described below. Otherwise, this
4041  // is simply a store to a dead allocation which will be removed.
4042  return false;
4043  std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
4044  if (!Dest)
4045  return false;
4046  auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
4047  if (!AI)
4048  // TODO: allow malloc?
4049  return false;
4050  // TODO: allow memory access dominated by move point? Note that since AI
4051  // could have a reference to itself captured by the call, we would need to
4052  // account for cycles in doing so.
4053  SmallVector<const User *> AllocaUsers;
4055  auto pushUsers = [&](const Instruction &I) {
4056  for (const User *U : I.users()) {
4057  if (Visited.insert(U).second)
4058  AllocaUsers.push_back(U);
4059  }
4060  };
4061  pushUsers(*AI);
4062  while (!AllocaUsers.empty()) {
4063  auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
4064  if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
4065  isa<AddrSpaceCastInst>(UserI)) {
4066  pushUsers(*UserI);
4067  continue;
4068  }
4069  if (UserI == CB)
4070  continue;
4071  // TODO: support lifetime.start/end here
4072  return false;
4073  }
4074  return true;
4075 }
4076 
4077 /// Try to move the specified instruction from its current block into the
4078 /// beginning of DestBlock, which can only happen if it's safe to move the
4079 /// instruction past all of the instructions between it and the end of its
4080 /// block.
4082  TargetLibraryInfo &TLI) {
4083  BasicBlock *SrcBlock = I->getParent();
4084 
4085  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
4086  if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
4087  I->isTerminator())
4088  return false;
4089 
4090  // Do not sink static or dynamic alloca instructions. Static allocas must
4091  // remain in the entry block, and dynamic allocas must not be sunk in between
4092  // a stacksave / stackrestore pair, which would incorrectly shorten its
4093  // lifetime.
4094  if (isa<AllocaInst>(I))
4095  return false;
4096 
4097  // Do not sink into catchswitch blocks.
4098  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
4099  return false;
4100 
4101  // Do not sink convergent call instructions.
4102  if (auto *CI = dyn_cast<CallInst>(I)) {
4103  if (CI->isConvergent())
4104  return false;
4105  }
4106 
4107  // Unless we can prove that the memory write isn't visibile except on the
4108  // path we're sinking to, we must bail.
4109  if (I->mayWriteToMemory()) {
4110  if (!SoleWriteToDeadLocal(I, TLI))
4111  return false;
4112  }
4113 
4114  // We can only sink load instructions if there is nothing between the load and
4115  // the end of block that could change the value.
4116  if (I->mayReadFromMemory()) {
4117  // We don't want to do any sophisticated alias analysis, so we only check
4118  // the instructions after I in I's parent block if we try to sink to its
4119  // successor block.
4120  if (DestBlock->getUniquePredecessor() != I->getParent())
4121  return false;
4122  for (BasicBlock::iterator Scan = std::next(I->getIterator()),
4123  E = I->getParent()->end();
4124  Scan != E; ++Scan)
4125  if (Scan->mayWriteToMemory())
4126  return false;
4127  }
4128 
4129  I->dropDroppableUses([DestBlock](const Use *U) {
4130  if (auto *I = dyn_cast<Instruction>(U->getUser()))
4131  return I->getParent() != DestBlock;
4132  return true;
4133  });
4134  /// FIXME: We could remove droppable uses that are not dominated by
4135  /// the new position.
4136 
4137  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
4138  I->moveBefore(&*InsertPos);
4139  ++NumSunkInst;
4140 
4141  // Also sink all related debug uses from the source basic block. Otherwise we
4142  // get debug use before the def. Attempt to salvage debug uses first, to
4143  // maximise the range variables have location for. If we cannot salvage, then
4144  // mark the location undef: we know it was supposed to receive a new location
4145  // here, but that computation has been sunk.
4147  findDbgUsers(DbgUsers, I);
4148  // Process the sinking DbgUsers in reverse order, as we only want to clone the
4149  // last appearing debug intrinsic for each given variable.
4151  for (DbgVariableIntrinsic *DVI : DbgUsers)
4152  if (DVI->getParent() == SrcBlock)
4153  DbgUsersToSink.push_back(DVI);
4154  llvm::sort(DbgUsersToSink,
4155  [](auto *A, auto *B) { return B->comesBefore(A); });
4156 
4158  SmallSet<DebugVariable, 4> SunkVariables;
4159  for (auto *User : DbgUsersToSink) {
4160  // A dbg.declare instruction should not be cloned, since there can only be
4161  // one per variable fragment. It should be left in the original place
4162  // because the sunk instruction is not an alloca (otherwise we could not be
4163  // here).
4164  if (isa<DbgDeclareInst>(User))
4165  continue;
4166 
4167  DebugVariable DbgUserVariable =
4168  DebugVariable(User->getVariable(), User->getExpression(),
4169  User->getDebugLoc()->getInlinedAt());
4170 
4171  if (!SunkVariables.insert(DbgUserVariable).second)
4172  continue;
4173 
4174  // Leave dbg.assign intrinsics in their original positions and there should
4175  // be no need to insert a clone.
4176  if (isa<DbgAssignIntrinsic>(User))
4177  continue;
4178 
4179  DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
4180  if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
4181  DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
4182  LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
4183  }
4184 
4185  // Perform salvaging without the clones, then sink the clones.
4186  if (!DIIClones.empty()) {
4187  salvageDebugInfoForDbgValues(*I, DbgUsers);
4188  // The clones are in reverse order of original appearance, reverse again to
4189  // maintain the original order.
4190  for (auto &DIIClone : llvm::reverse(DIIClones)) {
4191  DIIClone->insertBefore(&*InsertPos);
4192  LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
4193  }
4194  }
4195 
4196  return true;
4197 }
4198 
4200  while (!Worklist.isEmpty()) {
4201  // Walk deferred instructions in reverse order, and push them to the
4202  // worklist, which means they'll end up popped from the worklist in-order.
4203  while (Instruction *I = Worklist.popDeferred()) {
4204  // Check to see if we can DCE the instruction. We do this already here to
4205  // reduce the number of uses and thus allow other folds to trigger.
4206  // Note that eraseInstFromFunction() may push additional instructions on
4207  // the deferred worklist, so this will DCE whole instruction chains.
4208  if (isInstructionTriviallyDead(I, &TLI)) {
4209  eraseInstFromFunction(*I);
4210  ++NumDeadInst;
4211  continue;
4212  }
4213 
4214  Worklist.push(I);
4215  }
4216 
4217  Instruction *I = Worklist.removeOne();
4218  if (I == nullptr) continue; // skip null values.
4219 
4220  // Check to see if we can DCE the instruction.
4221  if (isInstructionTriviallyDead(I, &TLI)) {
4222  eraseInstFromFunction(*I);
4223  ++NumDeadInst;
4224  continue;
4225  }
4226 
4227  if (!DebugCounter::shouldExecute(VisitCounter))
4228  continue;
4229 
4230  // Instruction isn't dead, see if we can constant propagate it.
4231  if (!I->use_empty() &&
4232  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
4233  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
4234  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
4235  << '\n');
4236 
4237  // Add operands to the worklist.
4238  replaceInstUsesWith(*I, C);
4239  ++NumConstProp;
4240  if (isInstructionTriviallyDead(I, &TLI))
4241  eraseInstFromFunction(*I);
4242  MadeIRChange = true;
4243  continue;
4244  }
4245  }
4246 
4247  // See if we can trivially sink this instruction to its user if we can
4248  // prove that the successor is not executed more frequently than our block.
4249  // Return the UserBlock if successful.
4250  auto getOptionalSinkBlockForInst =
4251  [this](Instruction *I) -> std::optional<BasicBlock *> {
4252  if (!EnableCodeSinking)
4253  return std::nullopt;
4254 
4255  BasicBlock *BB = I->getParent();
4256  BasicBlock *UserParent = nullptr;
4257  unsigned NumUsers = 0;
4258 
4259  for (auto *U : I->users()) {
4260  if (U->isDroppable())
4261  continue;
4262  if (NumUsers > MaxSinkNumUsers)
4263  return std::nullopt;
4264 
4265  Instruction *UserInst = cast<Instruction>(U);
4266  // Special handling for Phi nodes - get the block the use occurs in.
4267  if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
4268  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
4269  if (PN->getIncomingValue(i) == I) {
4270  // Bail out if we have uses in different blocks. We don't do any
4271  // sophisticated analysis (i.e finding NearestCommonDominator of
4272  // these use blocks).
4273  if (UserParent && UserParent != PN->getIncomingBlock(i))
4274  return std::nullopt;
4275  UserParent = PN->getIncomingBlock(i);
4276  }
4277  }
4278  assert(UserParent && "expected to find user block!");
4279  } else {
4280  if (UserParent && UserParent != UserInst->getParent())
4281  return std::nullopt;
4282  UserParent = UserInst->getParent();
4283  }
4284 
4285  // Make sure these checks are done only once, naturally we do the checks
4286  // the first time we get the userparent, this will save compile time.
4287  if (NumUsers == 0) {
4288  // Try sinking to another block. If that block is unreachable, then do
4289  // not bother. SimplifyCFG should handle it.
4290  if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
4291  return std::nullopt;
4292 
4293  auto *Term = UserParent->getTerminator();
4294  // See if the user is one of our successors that has only one
4295  // predecessor, so that we don't have to split the critical edge.
4296  // Another option where we can sink is a block that ends with a
4297  // terminator that does not pass control to other block (such as
4298  // return or unreachable or resume). In this case:
4299  // - I dominates the User (by SSA form);
4300  // - the User will be executed at most once.
4301  // So sinking I down to User is always profitable or neutral.
4302  if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
4303  return std::nullopt;
4304 
4305  assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
4306  }
4307 
4308  NumUsers++;
4309  }
4310 
4311  // No user or only has droppable users.
4312  if (!UserParent)
4313  return std::nullopt;
4314 
4315  return UserParent;
4316  };
4317 
4318  auto OptBB = getOptionalSinkBlockForInst(I);
4319  if (OptBB) {
4320  auto *UserParent = *OptBB;
4321  // Okay, the CFG is simple enough, try to sink this instruction.
4322  if (TryToSinkInstruction(I, UserParent, TLI)) {
4323  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
4324  MadeIRChange = true;
4325  // We'll add uses of the sunk instruction below, but since
4326  // sinking can expose opportunities for it's *operands* add
4327  // them to the worklist
4328  for (Use &U : I->operands())
4329  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
4330  Worklist.push(OpI);
4331  }
4332  }
4333 
4334  // Now that we have an instruction, try combining it to simplify it.
4335  Builder.SetInsertPoint(I);
4336  Builder.CollectMetadataToCopy(
4337  I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4338 
4339 #ifndef NDEBUG
4340  std::string OrigI;
4341 #endif
4342  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
4343  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
4344 
4345  if (Instruction *Result = visit(*I)) {
4346  ++NumCombined;
4347  // Should we replace the old instruction with a new one?
4348  if (Result != I) {
4349  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
4350  << " New = " << *Result << '\n');
4351 
4352  Result->copyMetadata(*I,
4353  {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
4354  // Everything uses the new instruction now.
4355  I->replaceAllUsesWith(Result);
4356 
4357  // Move the name to the new instruction first.
4358  Result->takeName(I);
4359 
4360  // Insert the new instruction into the basic block...
4361  BasicBlock *InstParent = I->getParent();
4362  BasicBlock::iterator InsertPos = I->getIterator();
4363 
4364  // Are we replace a PHI with something that isn't a PHI, or vice versa?
4365  if (isa<PHINode>(Result) != isa<PHINode>(I)) {
4366  // We need to fix up the insertion point.
4367  if (isa<PHINode>(I)) // PHI -> Non-PHI
4368  InsertPos = InstParent->getFirstInsertionPt();
4369  else // Non-PHI -> PHI
4370  InsertPos = InstParent->getFirstNonPHI()->getIterator();
4371  }
4372 
4373  Result->insertInto(InstParent, InsertPos);
4374 
4375  // Push the new instruction and any users onto the worklist.
4376  Worklist.pushUsersToWorkList(*Result);
4377  Worklist.push(Result);
4378 
4379  eraseInstFromFunction(*I);
4380  } else {
4381  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
4382  << " New = " << *I << '\n');
4383 
4384  // If the instruction was modified, it's possible that it is now dead.
4385  // if so, remove it.
4386  if (isInstructionTriviallyDead(I, &TLI)) {
4387  eraseInstFromFunction(*I);
4388  } else {
4389  Worklist.pushUsersToWorkList(*I);
4390  Worklist.push(I);
4391  }
4392  }
4393  MadeIRChange = true;
4394  }
4395  }
4396 
4397  Worklist.zap();
4398  return MadeIRChange;
4399 }
4400 
4401 // Track the scopes used by !alias.scope and !noalias. In a function, a
4402 // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
4403 // by both sets. If not, the declaration of the scope can be safely omitted.
4404 // The MDNode of the scope can be omitted as well for the instructions that are
4405 // part of this function. We do not do that at this point, as this might become
4406 // too time consuming to do.
4408  SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
4409  SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
4410 
4411 public:
4413  // This seems to be faster than checking 'mayReadOrWriteMemory()'.
4414  if (!I->hasMetadataOtherThanDebugLoc())
4415  return;
4416 
4417  auto Track = [](Metadata *ScopeList, auto &Container) {
4418  const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
4419  if (!MDScopeList || !Container.insert(MDScopeList).second)
4420  return;
4421  for (const auto &MDOperand : MDScopeList->operands())
4422  if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
4423  Container.insert(MDScope);
4424  };
4425 
4426  Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
4427  Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
4428  }
4429 
4431  NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
4432  if (!Decl)
4433  return false;
4434 
4435  assert(Decl->use_empty() &&
4436  "llvm.experimental.noalias.scope.decl in use ?");
4437  const MDNode *MDSL = Decl->getScopeList();
4438  assert(MDSL->getNumOperands() == 1 &&
4439  "llvm.experimental.noalias.scope should refer to a single scope");
4440  auto &MDOperand = MDSL->getOperand(0);
4441  if (auto *MD = dyn_cast<MDNode>(MDOperand))
4442  return !UsedAliasScopesAndLists.contains(MD) ||
4443  !UsedNoAliasScopesAndLists.contains(MD);
4444 
4445  // Not an MDNode ? throw away.
4446  return true;
4447  }
4448 };
4449 
4450 /// Populate the IC worklist from a function, by walking it in depth-first
4451 /// order and adding all reachable code to the worklist.
4452 ///
4453 /// This has a couple of tricks to make the code faster and more powerful. In
4454 /// particular, we constant fold and DCE instructions as we go, to avoid adding
4455 /// them to the worklist (this significantly speeds up instcombine on code where
4456 /// many instructions are dead or constant). Additionally, if we find a branch
4457 /// whose condition is a known constant, we only visit the reachable successors.
4459  const TargetLibraryInfo *TLI,
4460  InstructionWorklist &ICWorklist) {
4461  bool MadeIRChange = false;
4464  Worklist.push_back(&F.front());
4465 
4466  SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
4467  DenseMap<Constant *, Constant *> FoldedConstants;
4468  AliasScopeTracker SeenAliasScopes;
4469 
4470  do {
4471  BasicBlock *BB = Worklist.pop_back_val();
4472 
4473  // We have now visited this block! If we've already been here, ignore it.
4474  if (!Visited.insert(BB).second)
4475  continue;
4476 
4477  for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
4478  // ConstantProp instruction if trivially constant.
4479  if (!Inst.use_empty() &&
4480  (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
4481  if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
4482  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
4483  << '\n');
4484  Inst.replaceAllUsesWith(C);
4485  ++NumConstProp;
4486  if (isInstructionTriviallyDead(&Inst, TLI))
4487  Inst.eraseFromParent();
4488  MadeIRChange = true;
4489  continue;
4490  }
4491 
4492  // See if we can constant fold its operands.
4493  for (Use &U : Inst.operands()) {
4494  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
4495  continue;
4496 
4497  auto *C = cast<Constant>(U);
4498  Constant *&FoldRes = FoldedConstants[C];
4499  if (!FoldRes)
4500  FoldRes = ConstantFoldConstant(C, DL, TLI);
4501 
4502  if (FoldRes != C) {
4503  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
4504  << "\n Old = " << *C
4505  << "\n New = " << *FoldRes << '\n');
4506  U = FoldRes;
4507  MadeIRChange = true;
4508  }
4509  }
4510 
4511  // Skip processing debug and pseudo intrinsics in InstCombine. Processing
4512  // these call instructions consumes non-trivial amount of time and
4513  // provides no value for the optimization.
4514  if (!Inst.isDebugOrPseudoInst()) {
4515  InstrsForInstructionWorklist.push_back(&Inst);
4516  SeenAliasScopes.analyse(&Inst);
4517  }
4518  }
4519 
4520  // Recursively visit successors. If this is a branch or switch on a
4521  // constant, only visit the reachable successor.
4522  Instruction *TI = BB->getTerminator();
4523  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
4524  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
4525  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
4526  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
4527  Worklist.push_back(ReachableBB);
4528  continue;
4529  }
4530  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
4531  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
4532  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
4533  continue;
4534  }
4535  }
4536 
4537  append_range(Worklist, successors(TI));
4538  } while (!Worklist.empty());
4539 
4540  // Remove instructions inside unreachable blocks. This prevents the
4541  // instcombine code from having to deal with some bad special cases, and
4542  // reduces use counts of instructions.
4543  for (BasicBlock &BB : F) {
4544  if (Visited.count(&BB))
4545  continue;
4546 
4547  unsigned NumDeadInstInBB;
4548  unsigned NumDeadDbgInstInBB;
4549  std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
4551 
4552  MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
4553  NumDeadInst += NumDeadInstInBB;
4554  }
4555 
4556  // Once we've found all of the instructions to add to instcombine's worklist,
4557  // add them in reverse order. This way instcombine will visit from the top
4558  // of the function down. This jives well with the way that it adds all uses
4559  // of instructions to the worklist after doing a transformation, thus avoiding
4560  // some N^2 behavior in pathological cases.
4561  ICWorklist.reserve(InstrsForInstructionWorklist.size());
4562  for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
4563  // DCE instruction if trivially dead. As we iterate in reverse program
4564  // order here, we will clean up whole chains of dead instructions.
4565  if (isInstructionTriviallyDead(Inst, TLI) ||
4566  SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
4567  ++NumDeadInst;
4568  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
4569  salvageDebugInfo(*Inst);
4570  Inst->eraseFromParent();
4571  MadeIRChange = true;
4572  continue;
4573  }
4574 
4575  ICWorklist.push(Inst);
4576  }
4577 
4578  return MadeIRChange;
4579 }
4580 
4582  Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
4585  ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
4586  auto &DL = F.getParent()->getDataLayout();
4587  MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
4588 
4589  /// Builder - This is an IRBuilder that automatically inserts new
4590  /// instructions into the worklist when they are created.
4592  F.getContext(), TargetFolder(DL),
4593  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
4594  Worklist.add(I);
4595  if (auto *Assume = dyn_cast<AssumeInst>(I))
4596  AC.registerAssumption(Assume);
4597  }));
4598 
4599  // Lower dbg.declare intrinsics otherwise their value may be clobbered
4600  // by instcombiner.
4601  bool MadeIRChange = false;
4603  MadeIRChange = LowerDbgDeclare(F);
4604  // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will
4605  // almost never return true when running an assignment tracking build. Take
4606  // this opportunity to do some clean up for assignment tracking builds too.
4607  if (!MadeIRChange && isAssignmentTrackingEnabled(*F.getParent())) {
4608  for (auto &BB : F)
4610  }
4611 
4612  // Iterate while there is work to do.
4613  unsigned Iteration = 0;
4614  while (true) {
4615  ++NumWorklistIterations;
4616  ++Iteration;
4617 
4618  if (Iteration > InfiniteLoopDetectionThreshold) {
4620  "Instruction Combining seems stuck in an infinite loop after " +
4621  Twine(InfiniteLoopDetectionThreshold) + " iterations.");
4622  }
4623 
4624  if (Iteration > MaxIterations) {
4625  LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
4626  << " on " << F.getName()
4627  << " reached; stopping before reaching a fixpoint\n");
4628  break;
4629  }
4630 
4631  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
4632  << F.getName() << "\n");
4633 
4634  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
4635 
4636  InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
4637  ORE, BFI, PSI, DL, LI);
4639 
4640  if (!IC.run())
4641  break;
4642 
4643  MadeIRChange = true;
4644  }
4645 
4646  return MadeIRChange;
4647 }
4648 
4650 
4651 InstCombinePass::InstCombinePass(unsigned MaxIterations)
4652  : MaxIterations(MaxIterations) {}
4653 
4656  auto &AC = AM.getResult<AssumptionAnalysis>(F);
4657  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
4658  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
4660  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
4661 
4662  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4663 
4664  auto *AA = &AM.getResult<AAManager>(F);
4665  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
4666  ProfileSummaryInfo *PSI =
4667  MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
4668  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
4669  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
4670 
4671  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4672  BFI, PSI, MaxIterations, LI))
4673  // No changes, all analyses are preserved.
4674  return PreservedAnalyses::all();
4675 
4676  // Mark all the analyses that instcombine updates as preserved.
4677  PreservedAnalyses PA;
4678  PA.preserveSet<CFGAnalyses>();
4679  return PA;
4680 }
4681 
4683  AU.setPreservesCFG();
4696 }
4697 
4699  if (skipFunction(F))
4700  return false;
4701 
4702  // Required analyses.
4703  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4704  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4705  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4706  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
4707  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
4708  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
4709 
4710  // Optional analyses.
4711  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
4712  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
4713  ProfileSummaryInfo *PSI =
4714  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
4716  (PSI && PSI->hasProfileSummary()) ?
4717  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
4718  nullptr;
4719 
4720  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
4721  BFI, PSI, MaxIterations, LI);
4722 }
4723 
4725 
4727  : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
4729 }
4730 
4732  : FunctionPass(ID), MaxIterations(MaxIterations) {
4734 }
4735 
4737  "Combine redundant instructions", false, false)
4749 
4750 // Initialization Routines
4753 }
4754 
4757 }
4758 
4760  return new InstructionCombiningPass();
4761 }
4762 
4764  return new InstructionCombiningPass(MaxIterations);
4765 }
4766 
4769 }
llvm::EHPersonality::MSVC_CXX
@ MSVC_CXX
llvm::NoAliasScopeDeclInst
Definition: IntrinsicInst.h:1429
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::ExtractValueInst::hasIndices
bool hasIndices() const
Definition: Instructions.h:2528
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:171
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1056
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:885
foldSelectGEP
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
Definition: InstructionCombining.cpp:1962
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:295
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2607
llvm::LandingPadInst::Create
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
Definition: Instructions.cpp:225
llvm::Instruction::isAssociative
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
Definition: Instruction.cpp:801
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1938
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::simplifyGEPInst
Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, bool InBounds, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4864
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1926
llvm::InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: InstructionCombining.cpp:192
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
prepareICWorklistFromFunction
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI, InstructionWorklist &ICWorklist)
Populate the IC worklist from a function, by walking it in depth-first order and adding all reachable...
Definition: InstructionCombining.cpp:4458
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::LandingPadInst::isCleanup
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
Definition: Instructions.h:3003
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:185
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3858
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
llvm::InstCombinerImpl::visitGetElementPtrInst
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Definition: InstructionCombining.cpp:2279
foldOperationIntoSelectOperand
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Definition: InstructionCombining.cpp:1054
llvm::CastInst::CreatePointerBitCastOrAddrSpaceCast
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
Definition: Instructions.cpp:3483
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConvertDebugDeclareToDebugValue
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===------------------------------------------------------------------—===// Dbg Intrinsic utilities
Definition: Local.cpp:1517
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:320
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3057
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::EHPersonality::GNU_C_SjLj
@ GNU_C_SjLj
leftDistributesOverRight
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
Definition: InstructionCombining.cpp:570
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2784
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:219
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::Instruction::getInsertionPointAfterDef
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
Definition: Instruction.cpp:131
llvm::InstCombinerImpl::visitBranchInst
Instruction * visitBranchInst(BranchInst &BI)
Definition: InstructionCombining.cpp:3178
Metadata.h
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::InstructionCombiningPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: InstructionCombining.cpp:4682
llvm::InstCombinerImpl::run
bool run()
Run the combiner over the entire worklist until it is empty.
Definition: InstructionCombining.cpp:4199
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
llvm::EHPersonality::GNU_Ada
@ GNU_Ada
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::DIBuilder
Definition: DIBuilder.h:42
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::InstCombinerImpl::foldBinOpIntoSelectOrPhi
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
Definition: InstructionCombining.cpp:1363
llvm::Function
Definition: Function.h:59
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::Attribute
Definition: Attributes.h:67
llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
TargetFolder.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
llvm::PatternMatch::m_SplatOrUndefMask
Definition: PatternMatch.h:1535
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5258
llvm::InstCombinerImpl::visitExtractValueInst
Instruction * visitExtractValueInst(ExtractValueInst &EV)
Definition: InstructionCombining.cpp:3349
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:69
GetElementPtrTypeIterator.h
llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:5584
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetTransformInfo::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
Definition: TargetTransformInfo.cpp:313
Statistic.h
llvm::isAssignmentTrackingEnabled
bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
Definition: DebugInfo.cpp:2033
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1529
llvm::LandingPadInst
The landingpad instruction holds all of the information necessary to generate correct exception handl...
Definition: Instructions.h:2956
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2264
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
LazyBlockFrequencyInfo.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:735
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2605
getBinOpsForFactorization
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Definition: InstructionCombining.cpp:619
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3354
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:832
llvm::findDbgUsers
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:105
llvm::InstCombinerImpl::foldUsingDistributiveLaws
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Definition: InstructionCombining.cpp:778
llvm::ConstantRange::getEquivalentICmp
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
Definition: ConstantRange.cpp:235
ValueTracking.h
Local.h
llvm::TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
Definition: TargetTransformInfo.cpp:325
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::InstructionWorklist::reserve
void reserve(size_t Size)
Definition: InstructionWorklist.h:79
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::NoAliasScopeDeclInst::getScopeList
MDNode * getScopeList() const
Definition: IntrinsicInst.h:1439
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
llvm::InstCombiner::MaxArraySizeForCombine
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:53
APInt.h
llvm::InstCombinerImpl::freezeOtherUses
bool freezeOtherUses(FreezeInst &FI)
Definition: InstructionCombining.cpp:3932
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::InstructionCombiningPass::InstructionCombiningPass
InstructionCombiningPass()
Definition: InstructionCombining.cpp:4726
llvm::simplifyInstructionWithOperands
Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
Like simplifyInstruction but the operands of I are replaced with NewOps.
Definition: InstructionSimplify.cpp:6702
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:31
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
llvm::InstCombinerImpl::visitAllocSite
Instruction * visitAllocSite(Instruction &FI)
Definition: InstructionCombining.cpp:2849
MemoryBuiltins.h
llvm::getReallocatedOperand
Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
Definition: MemoryBuiltins.cpp:324
tryFactorization
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
Definition: InstructionCombining.cpp:638
llvm::AttributeList
Definition: Attributes.h:432
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1484
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:175
EHPersonalities.h
InfiniteLoopDetectionThreshold
static cl::opt< unsigned > InfiniteLoopDetectionThreshold("instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " "infinite loop"), cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
llvm::ExtractValueInst::indices
iterator_range< idx_iterator > indices() const
Definition: Instructions.h:2506
llvm::InstCombiner::targetInstCombineIntrinsic
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
Definition: InstructionCombining.cpp:173
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:1048
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
isMustTailCall
static bool isMustTailCall(Value *V)
Definition: InstructionCombining.cpp:3092
Operator.h
llvm::InstCombinerImpl::Descale
Value * Descale(Value *Val, APInt Scale, bool &NoSignedWrap)
Returns a value X such that Val = X * Scale, or null if none.
Definition: InstructionCombining.cpp:1411
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
LLVMInitializeInstCombine
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
Definition: InstructionCombining.cpp:4755
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2966
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
BaseType
llvm::succ_empty
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
isRemovableWrite
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
Definition: InstructionCombining.cpp:2725
BasicAliasAnalysis.h
LegacyPassManager.h
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:416
ConstantFolding.h
Use.h
llvm::EHPersonality::GNU_ObjC
@ GNU_ObjC
llvm::APInt::isMinValue
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:409
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::InstCombinePass::InstCombinePass
InstCombinePass()
Definition: InstructionCombining.cpp:4649
llvm::InstructionWorklist::add
void add(Instruction *I)
Add instruction to the worklist.
Definition: InstructionWorklist.h:44
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BranchInst::swapSuccessors
void swapSuccessors()
Swap the successors of this branch instruction.
Definition: Instructions.cpp:1427