LLVM  9.0.0svn
InstructionCombining.cpp
Go to the documentation of this file.
1 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstructionCombining - Combine instructions to form fewer, simple
10 // instructions. This pass does not modify the CFG. This pass is where
11 // algebraic simplification happens.
12 //
13 // This pass combines things like:
14 // %Y = add i32 %X, 1
15 // %Z = add i32 %Y, 1
16 // into:
17 // %Z = add i32 %X, 2
18 //
19 // This is a simple worklist driven algorithm.
20 //
21 // This pass guarantees that the following canonicalizations are performed on
22 // the program:
23 // 1. If a binary operator has a constant operand, it is moved to the RHS
24 // 2. Bitwise operators with constant operands are always grouped so that
25 // shifts are performed first, then or's, then and's, then xor's.
26 // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27 // 4. All cmp instructions on boolean values are replaced with logical ops
28 // 5. add X, X is represented as (X*2) => (X << 1)
29 // 6. Multiplies with a power-of-two constant argument are transformed into
30 // shifts.
31 // ... etc.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "InstCombineInternal.h"
36 #include "llvm-c/Initialization.h"
38 #include "llvm/ADT/APInt.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/ADT/TinyPtrVector.h"
50 #include "llvm/Analysis/CFG.h"
56 #include "llvm/Analysis/LoopInfo.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/CFG.h"
65 #include "llvm/IR/Constant.h"
66 #include "llvm/IR/Constants.h"
67 #include "llvm/IR/DIBuilder.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DerivedTypes.h"
70 #include "llvm/IR/Dominators.h"
71 #include "llvm/IR/Function.h"
73 #include "llvm/IR/IRBuilder.h"
74 #include "llvm/IR/InstrTypes.h"
75 #include "llvm/IR/Instruction.h"
76 #include "llvm/IR/Instructions.h"
77 #include "llvm/IR/IntrinsicInst.h"
78 #include "llvm/IR/Intrinsics.h"
80 #include "llvm/IR/Metadata.h"
81 #include "llvm/IR/Operator.h"
82 #include "llvm/IR/PassManager.h"
83 #include "llvm/IR/PatternMatch.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/Use.h"
86 #include "llvm/IR/User.h"
87 #include "llvm/IR/Value.h"
88 #include "llvm/IR/ValueHandle.h"
89 #include "llvm/Pass.h"
91 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/Compiler.h"
94 #include "llvm/Support/Debug.h"
97 #include "llvm/Support/KnownBits.h"
102 #include <algorithm>
103 #include <cassert>
104 #include <cstdint>
105 #include <memory>
106 #include <string>
107 #include <utility>
108 
109 using namespace llvm;
110 using namespace llvm::PatternMatch;
111 
112 #define DEBUG_TYPE "instcombine"
113 
114 STATISTIC(NumCombined , "Number of insts combined");
115 STATISTIC(NumConstProp, "Number of constant folds");
116 STATISTIC(NumDeadInst , "Number of dead inst eliminated");
117 STATISTIC(NumSunkInst , "Number of instructions sunk");
118 STATISTIC(NumExpand, "Number of expansions");
119 STATISTIC(NumFactor , "Number of factorizations");
120 STATISTIC(NumReassoc , "Number of reassociations");
121 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
122  "Controls which instructions are visited");
123 
124 static cl::opt<bool>
125 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
126  cl::init(true));
127 
128 static cl::opt<bool>
129 EnableExpensiveCombines("expensive-combines",
130  cl::desc("Enable expensive instruction combines"));
131 
132 static cl::opt<unsigned>
133 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
134  cl::desc("Maximum array size considered when doing a combine"));
135 
136 // FIXME: Remove this flag when it is no longer necessary to convert
137 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
138 // increases variable availability at the cost of accuracy. Variables that
139 // cannot be promoted by mem2reg or SROA will be described as living in memory
140 // for their entire lifetime. However, passes like DSE and instcombine can
141 // delete stores to the alloca, leading to misleading and inaccurate debug
142 // information. This flag can be removed when those passes are fixed.
143 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
144  cl::Hidden, cl::init(true));
145 
146 Value *InstCombiner::EmitGEPOffset(User *GEP) {
147  return llvm::EmitGEPOffset(&Builder, DL, GEP);
148 }
149 
150 /// Return true if it is desirable to convert an integer computation from a
151 /// given bit width to a new bit width.
152 /// We don't want to convert from a legal to an illegal type or from a smaller
153 /// to a larger illegal type. A width of '1' is always treated as a legal type
154 /// because i1 is a fundamental type in IR, and there are many specialized
155 /// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as
156 /// legal to convert to, in order to open up more combining opportunities.
157 /// NOTE: this treats i8, i16 and i32 specially, due to them being so common
158 /// from frontend languages.
159 bool InstCombiner::shouldChangeType(unsigned FromWidth,
160  unsigned ToWidth) const {
161  bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
162  bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
163 
164  // Convert to widths of 8, 16 or 32 even if they are not legal types. Only
165  // shrink types, to prevent infinite loops.
166  if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32))
167  return true;
168 
169  // If this is a legal integer from type, and the result would be an illegal
170  // type, don't do the transformation.
171  if (FromLegal && !ToLegal)
172  return false;
173 
174  // Otherwise, if both are illegal, do not increase the size of the result. We
175  // do allow things like i160 -> i64, but not i64 -> i160.
176  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
177  return false;
178 
179  return true;
180 }
181 
182 /// Return true if it is desirable to convert a computation from 'From' to 'To'.
183 /// We don't want to convert from a legal to an illegal type or from a smaller
184 /// to a larger illegal type. i1 is always treated as a legal type because it is
185 /// a fundamental type in IR, and there are many specialized optimizations for
186 /// i1 types.
187 bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
188  // TODO: This could be extended to allow vectors. Datalayout changes might be
189  // needed to properly support that.
190  if (!From->isIntegerTy() || !To->isIntegerTy())
191  return false;
192 
193  unsigned FromWidth = From->getPrimitiveSizeInBits();
194  unsigned ToWidth = To->getPrimitiveSizeInBits();
195  return shouldChangeType(FromWidth, ToWidth);
196 }
197 
198 // Return true, if No Signed Wrap should be maintained for I.
199 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
200 // where both B and C should be ConstantInts, results in a constant that does
201 // not overflow. This function only handles the Add and Sub opcodes. For
202 // all other opcodes, the function conservatively returns false.
205  if (!OBO || !OBO->hasNoSignedWrap())
206  return false;
207 
208  // We reason about Add and Sub Only.
209  Instruction::BinaryOps Opcode = I.getOpcode();
210  if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
211  return false;
212 
213  const APInt *BVal, *CVal;
214  if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
215  return false;
216 
217  bool Overflow = false;
218  if (Opcode == Instruction::Add)
219  (void)BVal->sadd_ov(*CVal, Overflow);
220  else
221  (void)BVal->ssub_ov(*CVal, Overflow);
222 
223  return !Overflow;
224 }
225 
226 /// Conservatively clears subclassOptionalData after a reassociation or
227 /// commutation. We preserve fast-math flags when applicable as they can be
228 /// preserved.
231  if (!FPMO) {
233  return;
234  }
235 
238  I.setFastMathFlags(FMF);
239 }
240 
241 /// Combine constant operands of associative operations either before or after a
242 /// cast to eliminate one of the associative operations:
243 /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
244 /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
246  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
247  if (!Cast || !Cast->hasOneUse())
248  return false;
249 
250  // TODO: Enhance logic for other casts and remove this check.
251  auto CastOpcode = Cast->getOpcode();
252  if (CastOpcode != Instruction::ZExt)
253  return false;
254 
255  // TODO: Enhance logic for other BinOps and remove this check.
256  if (!BinOp1->isBitwiseLogicOp())
257  return false;
258 
259  auto AssocOpcode = BinOp1->getOpcode();
260  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
261  if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
262  return false;
263 
264  Constant *C1, *C2;
265  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
266  !match(BinOp2->getOperand(1), m_Constant(C2)))
267  return false;
268 
269  // TODO: This assumes a zext cast.
270  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
271  // to the destination type might lose bits.
272 
273  // Fold the constants together in the destination type:
274  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
275  Type *DestTy = C1->getType();
276  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
277  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
278  Cast->setOperand(0, BinOp2->getOperand(0));
279  BinOp1->setOperand(1, FoldedC);
280  return true;
281 }
282 
283 /// This performs a few simplifications for operators that are associative or
284 /// commutative:
285 ///
286 /// Commutative operators:
287 ///
288 /// 1. Order operands such that they are listed from right (least complex) to
289 /// left (most complex). This puts constants before unary operators before
290 /// binary operators.
291 ///
292 /// Associative operators:
293 ///
294 /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
295 /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
296 ///
297 /// Associative and commutative operators:
298 ///
299 /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
300 /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
301 /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
302 /// if C1 and C2 are constants.
303 bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
304  Instruction::BinaryOps Opcode = I.getOpcode();
305  bool Changed = false;
306 
307  do {
308  // Order operands such that they are listed from right (least complex) to
309  // left (most complex). This puts constants before unary operators before
310  // binary operators.
311  if (I.isCommutative() && getComplexity(I.getOperand(0)) <
313  Changed = !I.swapOperands();
314 
317 
318  if (I.isAssociative()) {
319  // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
320  if (Op0 && Op0->getOpcode() == Opcode) {
321  Value *A = Op0->getOperand(0);
322  Value *B = Op0->getOperand(1);
323  Value *C = I.getOperand(1);
324 
325  // Does "B op C" simplify?
326  if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
327  // It simplifies to V. Form "A op V".
328  I.setOperand(0, A);
329  I.setOperand(1, V);
330  // Conservatively clear the optional flags, since they may not be
331  // preserved by the reassociation.
332  if (MaintainNoSignedWrap(I, B, C) &&
333  (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
334  // Note: this is only valid because SimplifyBinOp doesn't look at
335  // the operands to Op0.
337  I.setHasNoSignedWrap(true);
338  } else {
340  }
341 
342  Changed = true;
343  ++NumReassoc;
344  continue;
345  }
346  }
347 
348  // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
349  if (Op1 && Op1->getOpcode() == Opcode) {
350  Value *A = I.getOperand(0);
351  Value *B = Op1->getOperand(0);
352  Value *C = Op1->getOperand(1);
353 
354  // Does "A op B" simplify?
355  if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
356  // It simplifies to V. Form "V op C".
357  I.setOperand(0, V);
358  I.setOperand(1, C);
359  // Conservatively clear the optional flags, since they may not be
360  // preserved by the reassociation.
362  Changed = true;
363  ++NumReassoc;
364  continue;
365  }
366  }
367  }
368 
369  if (I.isAssociative() && I.isCommutative()) {
370  if (simplifyAssocCastAssoc(&I)) {
371  Changed = true;
372  ++NumReassoc;
373  continue;
374  }
375 
376  // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
377  if (Op0 && Op0->getOpcode() == Opcode) {
378  Value *A = Op0->getOperand(0);
379  Value *B = Op0->getOperand(1);
380  Value *C = I.getOperand(1);
381 
382  // Does "C op A" simplify?
383  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
384  // It simplifies to V. Form "V op B".
385  I.setOperand(0, V);
386  I.setOperand(1, B);
387  // Conservatively clear the optional flags, since they may not be
388  // preserved by the reassociation.
390  Changed = true;
391  ++NumReassoc;
392  continue;
393  }
394  }
395 
396  // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
397  if (Op1 && Op1->getOpcode() == Opcode) {
398  Value *A = I.getOperand(0);
399  Value *B = Op1->getOperand(0);
400  Value *C = Op1->getOperand(1);
401 
402  // Does "C op A" simplify?
403  if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
404  // It simplifies to V. Form "B op V".
405  I.setOperand(0, B);
406  I.setOperand(1, V);
407  // Conservatively clear the optional flags, since they may not be
408  // preserved by the reassociation.
410  Changed = true;
411  ++NumReassoc;
412  continue;
413  }
414  }
415 
416  // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
417  // if C1 and C2 are constants.
418  Value *A, *B;
419  Constant *C1, *C2;
420  if (Op0 && Op1 &&
421  Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
422  match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
423  match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
424  BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B);
425  if (isa<FPMathOperator>(NewBO)) {
426  FastMathFlags Flags = I.getFastMathFlags();
427  Flags &= Op0->getFastMathFlags();
428  Flags &= Op1->getFastMathFlags();
429  NewBO->setFastMathFlags(Flags);
430  }
431  InsertNewInstWith(NewBO, I);
432  NewBO->takeName(Op1);
433  I.setOperand(0, NewBO);
434  I.setOperand(1, ConstantExpr::get(Opcode, C1, C2));
435  // Conservatively clear the optional flags, since they may not be
436  // preserved by the reassociation.
438 
439  Changed = true;
440  continue;
441  }
442  }
443 
444  // No further simplifications.
445  return Changed;
446  } while (true);
447 }
448 
449 /// Return whether "X LOp (Y ROp Z)" is always equal to
450 /// "(X LOp Y) ROp (X LOp Z)".
453  // X & (Y | Z) <--> (X & Y) | (X & Z)
454  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
455  if (LOp == Instruction::And)
456  return ROp == Instruction::Or || ROp == Instruction::Xor;
457 
458  // X | (Y & Z) <--> (X | Y) & (X | Z)
459  if (LOp == Instruction::Or)
460  return ROp == Instruction::And;
461 
462  // X * (Y + Z) <--> (X * Y) + (X * Z)
463  // X * (Y - Z) <--> (X * Y) - (X * Z)
464  if (LOp == Instruction::Mul)
465  return ROp == Instruction::Add || ROp == Instruction::Sub;
466 
467  return false;
468 }
469 
470 /// Return whether "(X LOp Y) ROp Z" is always equal to
471 /// "(X ROp Z) LOp (Y ROp Z)".
475  return leftDistributesOverRight(ROp, LOp);
476 
477  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
479 
480  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
481  // but this requires knowing that the addition does not overflow and other
482  // such subtleties.
483 }
484 
485 /// This function returns identity value for given opcode, which can be used to
486 /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
488  if (isa<Constant>(V))
489  return nullptr;
490 
491  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
492 }
493 
494 /// This function predicates factorization using distributive laws. By default,
495 /// it just returns the 'Op' inputs. But for special-cases like
496 /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
497 /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
498 /// allow more factorization opportunities.
501  Value *&LHS, Value *&RHS) {
502  assert(Op && "Expected a binary operator");
503  LHS = Op->getOperand(0);
504  RHS = Op->getOperand(1);
505  if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
506  Constant *C;
507  if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
508  // X << C --> X * (1 << C)
509  RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
510  return Instruction::Mul;
511  }
512  // TODO: We can add other conversions e.g. shr => div etc.
513  }
514  return Op->getOpcode();
515 }
516 
517 /// This tries to simplify binary operations by factorizing out common terms
518 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
519 Value *InstCombiner::tryFactorization(BinaryOperator &I,
520  Instruction::BinaryOps InnerOpcode,
521  Value *A, Value *B, Value *C, Value *D) {
522  assert(A && B && C && D && "All values must be provided");
523 
524  Value *V = nullptr;
525  Value *SimplifiedInst = nullptr;
526  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
527  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
528 
529  // Does "X op' Y" always equal "Y op' X"?
530  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
531 
532  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
533  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode))
534  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
535  // commutative case, "(A op' B) op (C op' A)"?
536  if (A == C || (InnerCommutative && A == D)) {
537  if (A != C)
538  std::swap(C, D);
539  // Consider forming "A op' (B op D)".
540  // If "B op D" simplifies then it can be formed with no cost.
541  V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
542  // If "B op D" doesn't simplify then only go on if both of the existing
543  // operations "A op' B" and "C op' D" will be zapped as no longer used.
544  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
545  V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
546  if (V) {
547  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
548  }
549  }
550 
551  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
552  if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
553  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
554  // commutative case, "(A op' B) op (B op' D)"?
555  if (B == D || (InnerCommutative && B == C)) {
556  if (B != D)
557  std::swap(C, D);
558  // Consider forming "(A op C) op' B".
559  // If "A op C" simplifies then it can be formed with no cost.
560  V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
561 
562  // If "A op C" doesn't simplify then only go on if both of the existing
563  // operations "A op' B" and "C op' D" will be zapped as no longer used.
564  if (!V && LHS->hasOneUse() && RHS->hasOneUse())
565  V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
566  if (V) {
567  SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
568  }
569  }
570 
571  if (SimplifiedInst) {
572  ++NumFactor;
573  SimplifiedInst->takeName(&I);
574 
575  // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
576  // TODO: Check for NUW.
577  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
578  if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
579  bool HasNSW = false;
580  if (isa<OverflowingBinaryOperator>(&I))
581  HasNSW = I.hasNoSignedWrap();
582 
583  if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS))
584  HasNSW &= LOBO->hasNoSignedWrap();
585 
586  if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS))
587  HasNSW &= ROBO->hasNoSignedWrap();
588 
589  // We can propagate 'nsw' if we know that
590  // %Y = mul nsw i16 %X, C
591  // %Z = add nsw i16 %Y, %X
592  // =>
593  // %Z = mul nsw i16 %X, C+1
594  //
595  // iff C+1 isn't INT_MIN
596  const APInt *CInt;
597  if (TopLevelOpcode == Instruction::Add &&
598  InnerOpcode == Instruction::Mul)
599  if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
600  BO->setHasNoSignedWrap(HasNSW);
601  }
602  }
603  }
604  return SimplifiedInst;
605 }
606 
607 /// This tries to simplify binary operations which some other binary operation
608 /// distributes over either by factorizing out common terms
609 /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
610 /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
611 /// Returns the simplified value, or null if it didn't simplify.
612 Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
613  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
616  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
617 
618  {
619  // Factorization.
620  Value *A, *B, *C, *D;
621  Instruction::BinaryOps LHSOpcode, RHSOpcode;
622  if (Op0)
623  LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
624  if (Op1)
625  RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
626 
627  // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
628  // a common term.
629  if (Op0 && Op1 && LHSOpcode == RHSOpcode)
630  if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
631  return V;
632 
633  // The instruction has the form "(A op' B) op (C)". Try to factorize common
634  // term.
635  if (Op0)
636  if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
637  if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
638  return V;
639 
640  // The instruction has the form "(B) op (C op' D)". Try to factorize common
641  // term.
642  if (Op1)
643  if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
644  if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
645  return V;
646  }
647 
648  // Expansion.
649  if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
650  // The instruction has the form "(A op' B) op C". See if expanding it out
651  // to "(A op C) op' (B op C)" results in simplifications.
652  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
653  Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
654 
655  Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
656  Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I));
657 
658  // Do "A op C" and "B op C" both simplify?
659  if (L && R) {
660  // They do! Return "L op' R".
661  ++NumExpand;
662  C = Builder.CreateBinOp(InnerOpcode, L, R);
663  C->takeName(&I);
664  return C;
665  }
666 
667  // Does "A op C" simplify to the identity value for the inner opcode?
668  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
669  // They do! Return "B op C".
670  ++NumExpand;
671  C = Builder.CreateBinOp(TopLevelOpcode, B, C);
672  C->takeName(&I);
673  return C;
674  }
675 
676  // Does "B op C" simplify to the identity value for the inner opcode?
677  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
678  // They do! Return "A op C".
679  ++NumExpand;
680  C = Builder.CreateBinOp(TopLevelOpcode, A, C);
681  C->takeName(&I);
682  return C;
683  }
684  }
685 
686  if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
687  // The instruction has the form "A op (B op' C)". See if expanding it out
688  // to "(A op B) op' (A op C)" results in simplifications.
689  Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
690  Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
691 
692  Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I));
693  Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
694 
695  // Do "A op B" and "A op C" both simplify?
696  if (L && R) {
697  // They do! Return "L op' R".
698  ++NumExpand;
699  A = Builder.CreateBinOp(InnerOpcode, L, R);
700  A->takeName(&I);
701  return A;
702  }
703 
704  // Does "A op B" simplify to the identity value for the inner opcode?
705  if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
706  // They do! Return "A op C".
707  ++NumExpand;
708  A = Builder.CreateBinOp(TopLevelOpcode, A, C);
709  A->takeName(&I);
710  return A;
711  }
712 
713  // Does "A op C" simplify to the identity value for the inner opcode?
714  if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
715  // They do! Return "A op B".
716  ++NumExpand;
717  A = Builder.CreateBinOp(TopLevelOpcode, A, B);
718  A->takeName(&I);
719  return A;
720  }
721  }
722 
723  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
724 }
725 
726 Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
727  Value *LHS, Value *RHS) {
728  Instruction::BinaryOps Opcode = I.getOpcode();
729  // (op (select (a, b, c)), (select (a, d, e))) -> (select (a, (op b, d), (op
730  // c, e)))
731  Value *A, *B, *C, *D, *E;
732  Value *SI = nullptr;
733  if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) &&
734  match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) {
735  bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse();
736  BuilderTy::FastMathFlagGuard Guard(Builder);
737  if (isa<FPMathOperator>(&I))
738  Builder.setFastMathFlags(I.getFastMathFlags());
739 
740  Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I));
741  Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I));
742  if (V1 && V2)
743  SI = Builder.CreateSelect(A, V2, V1);
744  else if (V2 && SelectsHaveOneUse)
745  SI = Builder.CreateSelect(A, V2, Builder.CreateBinOp(Opcode, C, E));
746  else if (V1 && SelectsHaveOneUse)
747  SI = Builder.CreateSelect(A, Builder.CreateBinOp(Opcode, B, D), V1);
748 
749  if (SI)
750  SI->takeName(&I);
751  }
752 
753  return SI;
754 }
755 
756 /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
757 /// constant zero (which is the 'negate' form).
758 Value *InstCombiner::dyn_castNegVal(Value *V) const {
759  Value *NegV;
760  if (match(V, m_Neg(m_Value(NegV))))
761  return NegV;
762 
763  // Constants can be considered to be negated values if they can be folded.
764  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
765  return ConstantExpr::getNeg(C);
766 
767  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
768  if (C->getType()->getElementType()->isIntegerTy())
769  return ConstantExpr::getNeg(C);
770 
771  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
772  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
773  Constant *Elt = CV->getAggregateElement(i);
774  if (!Elt)
775  return nullptr;
776 
777  if (isa<UndefValue>(Elt))
778  continue;
779 
780  if (!isa<ConstantInt>(Elt))
781  return nullptr;
782  }
783  return ConstantExpr::getNeg(CV);
784  }
785 
786  return nullptr;
787 }
788 
790  InstCombiner::BuilderTy &Builder) {
791  if (auto *Cast = dyn_cast<CastInst>(&I))
792  return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
793 
794  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
795 
796  // Figure out if the constant is the left or the right argument.
797  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
798  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
799 
800  if (auto *SOC = dyn_cast<Constant>(SO)) {
801  if (ConstIsRHS)
802  return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
803  return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
804  }
805 
806  Value *Op0 = SO, *Op1 = ConstOperand;
807  if (!ConstIsRHS)
808  std::swap(Op0, Op1);
809 
810  auto *BO = cast<BinaryOperator>(&I);
811  Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
812  SO->getName() + ".op");
813  auto *FPInst = dyn_cast<Instruction>(RI);
814  if (FPInst && isa<FPMathOperator>(FPInst))
815  FPInst->copyFastMathFlags(BO);
816  return RI;
817 }
818 
819 Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
820  // Don't modify shared select instructions.
821  if (!SI->hasOneUse())
822  return nullptr;
823 
824  Value *TV = SI->getTrueValue();
825  Value *FV = SI->getFalseValue();
826  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
827  return nullptr;
828 
829  // Bool selects with constant operands can be folded to logical ops.
830  if (SI->getType()->isIntOrIntVectorTy(1))
831  return nullptr;
832 
833  // If it's a bitcast involving vectors, make sure it has the same number of
834  // elements on both sides.
835  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
836  VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
837  VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
838 
839  // Verify that either both or neither are vectors.
840  if ((SrcTy == nullptr) != (DestTy == nullptr))
841  return nullptr;
842 
843  // If vectors, verify that they have the same number of elements.
844  if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
845  return nullptr;
846  }
847 
848  // Test if a CmpInst instruction is used exclusively by a select as
849  // part of a minimum or maximum operation. If so, refrain from doing
850  // any other folding. This helps out other analyses which understand
851  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
852  // and CodeGen. And in this case, at least one of the comparison
853  // operands has at least one user besides the compare (the select),
854  // which would often largely negate the benefit of folding anyway.
855  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
856  if (CI->hasOneUse()) {
857  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
858  if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
859  (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
860  return nullptr;
861  }
862  }
863 
864  Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
865  Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
866  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
867 }
868 
870  InstCombiner::BuilderTy &Builder) {
871  bool ConstIsRHS = isa<Constant>(I->getOperand(1));
872  Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
873 
874  if (auto *InC = dyn_cast<Constant>(InV)) {
875  if (ConstIsRHS)
876  return ConstantExpr::get(I->getOpcode(), InC, C);
877  return ConstantExpr::get(I->getOpcode(), C, InC);
878  }
879 
880  Value *Op0 = InV, *Op1 = C;
881  if (!ConstIsRHS)
882  std::swap(Op0, Op1);
883 
884  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
885  auto *FPInst = dyn_cast<Instruction>(RI);
886  if (FPInst && isa<FPMathOperator>(FPInst))
887  FPInst->copyFastMathFlags(I);
888  return RI;
889 }
890 
891 Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
892  unsigned NumPHIValues = PN->getNumIncomingValues();
893  if (NumPHIValues == 0)
894  return nullptr;
895 
896  // We normally only transform phis with a single use. However, if a PHI has
897  // multiple uses and they are all the same operation, we can fold *all* of the
898  // uses into the PHI.
899  if (!PN->hasOneUse()) {
900  // Walk the use list for the instruction, comparing them to I.
901  for (User *U : PN->users()) {
902  Instruction *UI = cast<Instruction>(U);
903  if (UI != &I && !I.isIdenticalTo(UI))
904  return nullptr;
905  }
906  // Otherwise, we can replace *all* users with the new PHI we form.
907  }
908 
909  // Check to see if all of the operands of the PHI are simple constants
910  // (constantint/constantfp/undef). If there is one non-constant value,
911  // remember the BB it is in. If there is more than one or if *it* is a PHI,
912  // bail out. We don't do arbitrary constant expressions here because moving
913  // their computation can be expensive without a cost model.
914  BasicBlock *NonConstBB = nullptr;
915  for (unsigned i = 0; i != NumPHIValues; ++i) {
916  Value *InVal = PN->getIncomingValue(i);
917  if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
918  continue;
919 
920  if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
921  if (NonConstBB) return nullptr; // More than one non-const value.
922 
923  NonConstBB = PN->getIncomingBlock(i);
924 
925  // If the InVal is an invoke at the end of the pred block, then we can't
926  // insert a computation after it without breaking the edge.
927  if (isa<InvokeInst>(InVal))
928  if (cast<Instruction>(InVal)->getParent() == NonConstBB)
929  return nullptr;
930 
931  // If the incoming non-constant value is in I's block, we will remove one
932  // instruction, but insert another equivalent one, leading to infinite
933  // instcombine.
934  if (isPotentiallyReachable(I.getParent(), NonConstBB, &DT, LI))
935  return nullptr;
936  }
937 
938  // If there is exactly one non-constant value, we can insert a copy of the
939  // operation in that block. However, if this is a critical edge, we would be
940  // inserting the computation on some other paths (e.g. inside a loop). Only
941  // do this if the pred block is unconditionally branching into the phi block.
942  if (NonConstBB != nullptr) {
943  BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
944  if (!BI || !BI->isUnconditional()) return nullptr;
945  }
946 
947  // Okay, we can do the transformation: create the new PHI node.
949  InsertNewInstBefore(NewPN, *PN);
950  NewPN->takeName(PN);
951 
952  // If we are going to have to insert a new computation, do so right before the
953  // predecessor's terminator.
954  if (NonConstBB)
955  Builder.SetInsertPoint(NonConstBB->getTerminator());
956 
957  // Next, add all of the operands to the PHI.
958  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
959  // We only currently try to fold the condition of a select when it is a phi,
960  // not the true/false values.
961  Value *TrueV = SI->getTrueValue();
962  Value *FalseV = SI->getFalseValue();
963  BasicBlock *PhiTransBB = PN->getParent();
964  for (unsigned i = 0; i != NumPHIValues; ++i) {
965  BasicBlock *ThisBB = PN->getIncomingBlock(i);
966  Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
967  Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
968  Value *InV = nullptr;
969  // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
970  // even if currently isNullValue gives false.
971  Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
972  // For vector constants, we cannot use isNullValue to fold into
973  // FalseVInPred versus TrueVInPred. When we have individual nonzero
974  // elements in the vector, we will incorrectly fold InC to
975  // `TrueVInPred`.
976  if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC))
977  InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
978  else {
979  // Generate the select in the same block as PN's current incoming block.
980  // Note: ThisBB need not be the NonConstBB because vector constants
981  // which are constants by definition are handled here.
982  // FIXME: This can lead to an increase in IR generation because we might
983  // generate selects for vector constant phi operand, that could not be
984  // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
985  // non-vector phis, this transformation was always profitable because
986  // the select would be generated exactly once in the NonConstBB.
987  Builder.SetInsertPoint(ThisBB->getTerminator());
988  InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
989  FalseVInPred, "phitmp");
990  }
991  NewPN->addIncoming(InV, ThisBB);
992  }
993  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
994  Constant *C = cast<Constant>(I.getOperand(1));
995  for (unsigned i = 0; i != NumPHIValues; ++i) {
996  Value *InV = nullptr;
997  if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
998  InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
999  else if (isa<ICmpInst>(CI))
1000  InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
1001  C, "phitmp");
1002  else
1003  InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
1004  C, "phitmp");
1005  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1006  }
1007  } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
1008  for (unsigned i = 0; i != NumPHIValues; ++i) {
1010  Builder);
1011  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1012  }
1013  } else {
1014  CastInst *CI = cast<CastInst>(&I);
1015  Type *RetTy = CI->getType();
1016  for (unsigned i = 0; i != NumPHIValues; ++i) {
1017  Value *InV;
1018  if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1019  InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
1020  else
1021  InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
1022  I.getType(), "phitmp");
1023  NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1024  }
1025  }
1026 
1027  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1028  Instruction *User = cast<Instruction>(*UI++);
1029  if (User == &I) continue;
1030  replaceInstUsesWith(*User, NewPN);
1031  eraseInstFromFunction(*User);
1032  }
1033  return replaceInstUsesWith(I, NewPN);
1034 }
1035 
1036 Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
1037  if (!isa<Constant>(I.getOperand(1)))
1038  return nullptr;
1039 
1040  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1041  if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1042  return NewSel;
1043  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1044  if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1045  return NewPhi;
1046  }
1047  return nullptr;
1048 }
1049 
1050 /// Given a pointer type and a constant offset, determine whether or not there
1051 /// is a sequence of GEP indices into the pointed type that will land us at the
1052 /// specified offset. If so, fill them into NewIndices and return the resultant
1053 /// element type, otherwise return null.
1054 Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
1055  SmallVectorImpl<Value *> &NewIndices) {
1056  Type *Ty = PtrTy->getElementType();
1057  if (!Ty->isSized())
1058  return nullptr;
1059 
1060  // Start with the index over the outer type. Note that the type size
1061  // might be zero (even if the offset isn't zero) if the indexed type
1062  // is something like [0 x {int, int}]
1063  Type *IndexTy = DL.getIndexType(PtrTy);
1064  int64_t FirstIdx = 0;
1065  if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
1066  FirstIdx = Offset/TySize;
1067  Offset -= FirstIdx*TySize;
1068 
1069  // Handle hosts where % returns negative instead of values [0..TySize).
1070  if (Offset < 0) {
1071  --FirstIdx;
1072  Offset += TySize;
1073  assert(Offset >= 0);
1074  }
1075  assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
1076  }
1077 
1078  NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
1079 
1080  // Index into the types. If we fail, set OrigBase to null.
1081  while (Offset) {
1082  // Indexing into tail padding between struct/array elements.
1083  if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
1084  return nullptr;
1085 
1086  if (StructType *STy = dyn_cast<StructType>(Ty)) {
1087  const StructLayout *SL = DL.getStructLayout(STy);
1088  assert(Offset < (int64_t)SL->getSizeInBytes() &&
1089  "Offset must stay within the indexed type");
1090 
1091  unsigned Elt = SL->getElementContainingOffset(Offset);
1093  Elt));
1094 
1095  Offset -= SL->getElementOffset(Elt);
1096  Ty = STy->getElementType(Elt);
1097  } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
1098  uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
1099  assert(EltSize && "Cannot index into a zero-sized array");
1100  NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
1101  Offset %= EltSize;
1102  Ty = AT->getElementType();
1103  } else {
1104  // Otherwise, we can't index into the middle of this atomic type, bail.
1105  return nullptr;
1106  }
1107  }
1108 
1109  return Ty;
1110 }
1111 
1113  // If this GEP has only 0 indices, it is the same pointer as
1114  // Src. If Src is not a trivial GEP too, don't combine
1115  // the indices.
1116  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
1117  !Src.hasOneUse())
1118  return false;
1119  return true;
1120 }
1121 
1122 /// Return a value X such that Val = X * Scale, or null if none.
1123 /// If the multiplication is known not to overflow, then NoSignedWrap is set.
1124 Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1125  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1126  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1127  Scale.getBitWidth() && "Scale not compatible with value!");
1128 
1129  // If Val is zero or Scale is one then Val = Val * Scale.
1130  if (match(Val, m_Zero()) || Scale == 1) {
1131  NoSignedWrap = true;
1132  return Val;
1133  }
1134 
1135  // If Scale is zero then it does not divide Val.
1136  if (Scale.isMinValue())
1137  return nullptr;
1138 
1139  // Look through chains of multiplications, searching for a constant that is
1140  // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
1141  // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
1142  // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
1143  // down from Val:
1144  //
1145  // Val = M1 * X || Analysis starts here and works down
1146  // M1 = M2 * Y || Doesn't descend into terms with more
1147  // M2 = Z * 4 \/ than one use
1148  //
1149  // Then to modify a term at the bottom:
1150  //
1151  // Val = M1 * X
1152  // M1 = Z * Y || Replaced M2 with Z
1153  //
1154  // Then to work back up correcting nsw flags.
1155 
1156  // Op - the term we are currently analyzing. Starts at Val then drills down.
1157  // Replaced with its descaled value before exiting from the drill down loop.
1158  Value *Op = Val;
1159 
1160  // Parent - initially null, but after drilling down notes where Op came from.
1161  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1162  // 0'th operand of Val.
1163  std::pair<Instruction *, unsigned> Parent;
1164 
1165  // Set if the transform requires a descaling at deeper levels that doesn't
1166  // overflow.
1167  bool RequireNoSignedWrap = false;
1168 
1169  // Log base 2 of the scale. Negative if not a power of 2.
1170  int32_t logScale = Scale.exactLogBase2();
1171 
1172  for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
1173  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1174  // If Op is a constant divisible by Scale then descale to the quotient.
1175  APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1176  APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1177  if (!Remainder.isMinValue())
1178  // Not divisible by Scale.
1179  return nullptr;
1180  // Replace with the quotient in the parent.
1181  Op = ConstantInt::get(CI->getType(), Quotient);
1182  NoSignedWrap = true;
1183  break;
1184  }
1185 
1186  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1187  if (BO->getOpcode() == Instruction::Mul) {
1188  // Multiplication.
1189  NoSignedWrap = BO->hasNoSignedWrap();
1190  if (RequireNoSignedWrap && !NoSignedWrap)
1191  return nullptr;
1192 
1193  // There are three cases for multiplication: multiplication by exactly
1194  // the scale, multiplication by a constant different to the scale, and
1195  // multiplication by something else.
1196  Value *LHS = BO->getOperand(0);
1197  Value *RHS = BO->getOperand(1);
1198 
1199  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1200  // Multiplication by a constant.
1201  if (CI->getValue() == Scale) {
1202  // Multiplication by exactly the scale, replace the multiplication
1203  // by its left-hand side in the parent.
1204  Op = LHS;
1205  break;
1206  }
1207 
1208  // Otherwise drill down into the constant.
1209  if (!Op->hasOneUse())
1210  return nullptr;
1211 
1212  Parent = std::make_pair(BO, 1);
1213  continue;
1214  }
1215 
1216  // Multiplication by something else. Drill down into the left-hand side
1217  // since that's where the reassociate pass puts the good stuff.
1218  if (!Op->hasOneUse())
1219  return nullptr;
1220 
1221  Parent = std::make_pair(BO, 0);
1222  continue;
1223  }
1224 
1225  if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
1226  isa<ConstantInt>(BO->getOperand(1))) {
1227  // Multiplication by a power of 2.
1228  NoSignedWrap = BO->hasNoSignedWrap();
1229  if (RequireNoSignedWrap && !NoSignedWrap)
1230  return nullptr;
1231 
1232  Value *LHS = BO->getOperand(0);
1233  int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1234  getLimitedValue(Scale.getBitWidth());
1235  // Op = LHS << Amt.
1236 
1237  if (Amt == logScale) {
1238  // Multiplication by exactly the scale, replace the multiplication
1239  // by its left-hand side in the parent.
1240  Op = LHS;
1241  break;
1242  }
1243  if (Amt < logScale || !Op->hasOneUse())
1244  return nullptr;
1245 
1246  // Multiplication by more than the scale. Reduce the multiplying amount
1247  // by the scale in the parent.
1248  Parent = std::make_pair(BO, 1);
1249  Op = ConstantInt::get(BO->getType(), Amt - logScale);
1250  break;
1251  }
1252  }
1253 
1254  if (!Op->hasOneUse())
1255  return nullptr;
1256 
1257  if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1258  if (Cast->getOpcode() == Instruction::SExt) {
1259  // Op is sign-extended from a smaller type, descale in the smaller type.
1260  unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1261  APInt SmallScale = Scale.trunc(SmallSize);
1262  // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
1263  // descale Op as (sext Y) * Scale. In order to have
1264  // sext (Y * SmallScale) = (sext Y) * Scale
1265  // some conditions need to hold however: SmallScale must sign-extend to
1266  // Scale and the multiplication Y * SmallScale should not overflow.
1267  if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1268  // SmallScale does not sign-extend to Scale.
1269  return nullptr;
1270  assert(SmallScale.exactLogBase2() == logScale);
1271  // Require that Y * SmallScale must not overflow.
1272  RequireNoSignedWrap = true;
1273 
1274  // Drill down through the cast.
1275  Parent = std::make_pair(Cast, 0);
1276  Scale = SmallScale;
1277  continue;
1278  }
1279 
1280  if (Cast->getOpcode() == Instruction::Trunc) {
1281  // Op is truncated from a larger type, descale in the larger type.
1282  // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
1283  // trunc (Y * sext Scale) = (trunc Y) * Scale
1284  // always holds. However (trunc Y) * Scale may overflow even if
1285  // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1286  // from this point up in the expression (see later).
1287  if (RequireNoSignedWrap)
1288  return nullptr;
1289 
1290  // Drill down through the cast.
1291  unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1292  Parent = std::make_pair(Cast, 0);
1293  Scale = Scale.sext(LargeSize);
1294  if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1295  logScale = -1;
1296  assert(Scale.exactLogBase2() == logScale);
1297  continue;
1298  }
1299  }
1300 
1301  // Unsupported expression, bail out.
1302  return nullptr;
1303  }
1304 
1305  // If Op is zero then Val = Op * Scale.
1306  if (match(Op, m_Zero())) {
1307  NoSignedWrap = true;
1308  return Op;
1309  }
1310 
1311  // We know that we can successfully descale, so from here on we can safely
1312  // modify the IR. Op holds the descaled version of the deepest term in the
1313  // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
1314  // not to overflow.
1315 
1316  if (!Parent.first)
1317  // The expression only had one term.
1318  return Op;
1319 
1320  // Rewrite the parent using the descaled version of its operand.
1321  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1322  assert(Op != Parent.first->getOperand(Parent.second) &&
1323  "Descaling was a no-op?");
1324  Parent.first->setOperand(Parent.second, Op);
1325  Worklist.Add(Parent.first);
1326 
1327  // Now work back up the expression correcting nsw flags. The logic is based
1328  // on the following observation: if X * Y is known not to overflow as a signed
1329  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1330  // then X * Z will not overflow as a signed multiplication either. As we work
1331  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1332  // current level has strictly smaller absolute value than the original.
1333  Instruction *Ancestor = Parent.first;
1334  do {
1335  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1336  // If the multiplication wasn't nsw then we can't say anything about the
1337  // value of the descaled multiplication, and we have to clear nsw flags
1338  // from this point on up.
1339  bool OpNoSignedWrap = BO->hasNoSignedWrap();
1340  NoSignedWrap &= OpNoSignedWrap;
1341  if (NoSignedWrap != OpNoSignedWrap) {
1342  BO->setHasNoSignedWrap(NoSignedWrap);
1343  Worklist.Add(Ancestor);
1344  }
1345  } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1346  // The fact that the descaled input to the trunc has smaller absolute
1347  // value than the original input doesn't tell us anything useful about
1348  // the absolute values of the truncations.
1349  NoSignedWrap = false;
1350  }
1351  assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1352  "Failed to keep proper track of nsw flags while drilling down?");
1353 
1354  if (Ancestor == Val)
1355  // Got to the top, all done!
1356  return Val;
1357 
1358  // Move up one level in the expression.
1359  assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1360  Ancestor = Ancestor->user_back();
1361  } while (true);
1362 }
1363 
1364 Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
1365  if (!Inst.getType()->isVectorTy()) return nullptr;
1366 
1367  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1368  unsigned NumElts = cast<VectorType>(Inst.getType())->getNumElements();
1369  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1370  assert(cast<VectorType>(LHS->getType())->getNumElements() == NumElts);
1371  assert(cast<VectorType>(RHS->getType())->getNumElements() == NumElts);
1372 
1373  // If both operands of the binop are vector concatenations, then perform the
1374  // narrow binop on each pair of the source operands followed by concatenation
1375  // of the results.
1376  Value *L0, *L1, *R0, *R1;
1377  Constant *Mask;
1378  if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
1379  match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
1380  LHS->hasOneUse() && RHS->hasOneUse() &&
1381  cast<ShuffleVectorInst>(LHS)->isConcat() &&
1382  cast<ShuffleVectorInst>(RHS)->isConcat()) {
1383  // This transform does not have the speculative execution constraint as
1384  // below because the shuffle is a concatenation. The new binops are
1385  // operating on exactly the same elements as the existing binop.
1386  // TODO: We could ease the mask requirement to allow different undef lanes,
1387  // but that requires an analysis of the binop-with-undef output value.
1388  Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1389  if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1390  BO->copyIRFlags(&Inst);
1391  Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1392  if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1393  BO->copyIRFlags(&Inst);
1394  return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1395  }
1396 
1397  // It may not be safe to reorder shuffles and things like div, urem, etc.
1398  // because we may trap when executing those ops on unknown vector elements.
1399  // See PR20059.
1400  if (!isSafeToSpeculativelyExecute(&Inst))
1401  return nullptr;
1402 
1403  auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) {
1404  Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1405  if (auto *BO = dyn_cast<BinaryOperator>(XY))
1406  BO->copyIRFlags(&Inst);
1407  return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
1408  };
1409 
1410  // If both arguments of the binary operation are shuffles that use the same
1411  // mask and shuffle within a single vector, move the shuffle after the binop.
1412  Value *V1, *V2;
1413  if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) &&
1414  match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) &&
1415  V1->getType() == V2->getType() &&
1416  (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
1417  // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1418  return createBinOpShuffle(V1, V2, Mask);
1419  }
1420 
1421  // If both arguments of a commutative binop are select-shuffles that use the
1422  // same mask with commuted operands, the shuffles are unnecessary.
1423  if (Inst.isCommutative() &&
1424  match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
1426  m_Specific(Mask)))) {
1427  auto *LShuf = cast<ShuffleVectorInst>(LHS);
1428  auto *RShuf = cast<ShuffleVectorInst>(RHS);
1429  // TODO: Allow shuffles that contain undefs in the mask?
1430  // That is legal, but it reduces undef knowledge.
1431  // TODO: Allow arbitrary shuffles by shuffling after binop?
1432  // That might be legal, but we have to deal with poison.
1433  if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
1434  RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
1435  // Example:
1436  // LHS = shuffle V1, V2, <0, 5, 6, 3>
1437  // RHS = shuffle V2, V1, <0, 5, 6, 3>
1438  // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1439  Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1440  NewBO->copyIRFlags(&Inst);
1441  return NewBO;
1442  }
1443  }
1444 
1445  // If one argument is a shuffle within one vector and the other is a constant,
1446  // try moving the shuffle after the binary operation. This canonicalization
1447  // intends to move shuffles closer to other shuffles and binops closer to
1448  // other binops, so they can be folded. It may also enable demanded elements
1449  // transforms.
1450  Constant *C;
1451  if (match(&Inst, m_c_BinOp(
1453  m_Constant(C))) &&
1454  V1->getType()->getVectorNumElements() <= NumElts) {
1455  assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
1456  "Shuffle should not change scalar type");
1457 
1458  // Find constant NewC that has property:
1459  // shuffle(NewC, ShMask) = C
1460  // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1461  // reorder is not possible. A 1-to-1 mapping is not required. Example:
1462  // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1463  bool ConstOp1 = isa<Constant>(RHS);
1464  SmallVector<int, 16> ShMask;
1465  ShuffleVectorInst::getShuffleMask(Mask, ShMask);
1466  unsigned SrcVecNumElts = V1->getType()->getVectorNumElements();
1467  UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1468  SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1469  bool MayChange = true;
1470  for (unsigned I = 0; I < NumElts; ++I) {
1471  Constant *CElt = C->getAggregateElement(I);
1472  if (ShMask[I] >= 0) {
1473  assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1474  Constant *NewCElt = NewVecC[ShMask[I]];
1475  // Bail out if:
1476  // 1. The constant vector contains a constant expression.
1477  // 2. The shuffle needs an element of the constant vector that can't
1478  // be mapped to a new constant vector.
1479  // 3. This is a widening shuffle that copies elements of V1 into the
1480  // extended elements (extending with undef is allowed).
1481  if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
1482  I >= SrcVecNumElts) {
1483  MayChange = false;
1484  break;
1485  }
1486  NewVecC[ShMask[I]] = CElt;
1487  }
1488  // If this is a widening shuffle, we must be able to extend with undef
1489  // elements. If the original binop does not produce an undef in the high
1490  // lanes, then this transform is not safe.
1491  // TODO: We could shuffle those non-undef constant values into the
1492  // result by using a constant vector (rather than an undef vector)
1493  // as operand 1 of the new binop, but that might be too aggressive
1494  // for target-independent shuffle creation.
1495  if (I >= SrcVecNumElts) {
1496  Constant *MaybeUndef =
1497  ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
1498  : ConstantExpr::get(Opcode, CElt, UndefScalar);
1499  if (!isa<UndefValue>(MaybeUndef)) {
1500  MayChange = false;
1501  break;
1502  }
1503  }
1504  }
1505  if (MayChange) {
1506  Constant *NewC = ConstantVector::get(NewVecC);
1507  // It may not be safe to execute a binop on a vector with undef elements
1508  // because the entire instruction can be folded to undef or create poison
1509  // that did not exist in the original code.
1510  if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
1511  NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1512 
1513  // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1514  // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1515  Value *NewLHS = ConstOp1 ? V1 : NewC;
1516  Value *NewRHS = ConstOp1 ? NewC : V1;
1517  return createBinOpShuffle(NewLHS, NewRHS, Mask);
1518  }
1519  }
1520 
1521  return nullptr;
1522 }
1523 
1524 /// Try to narrow the width of a binop if at least 1 operand is an extend of
1525 /// of a value. This requires a potentially expensive known bits check to make
1526 /// sure the narrow op does not overflow.
1527 Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
1528  // We need at least one extended operand.
1529  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1530 
1531  // If this is a sub, we swap the operands since we always want an extension
1532  // on the RHS. The LHS can be an extension or a constant.
1533  if (BO.getOpcode() == Instruction::Sub)
1534  std::swap(Op0, Op1);
1535 
1536  Value *X;
1537  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1538  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
1539  return nullptr;
1540 
1541  // If both operands are the same extension from the same source type and we
1542  // can eliminate at least one (hasOneUse), this might work.
1543  CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
1544  Value *Y;
1545  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
1546  cast<Operator>(Op1)->getOpcode() == CastOpc &&
1547  (Op0->hasOneUse() || Op1->hasOneUse()))) {
1548  // If that did not match, see if we have a suitable constant operand.
1549  // Truncating and extending must produce the same constant.
1550  Constant *WideC;
1551  if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
1552  return nullptr;
1553  Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1554  if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1555  return nullptr;
1556  Y = NarrowC;
1557  }
1558 
1559  // Swap back now that we found our operands.
1560  if (BO.getOpcode() == Instruction::Sub)
1561  std::swap(X, Y);
1562 
1563  // Both operands have narrow versions. Last step: the math must not overflow
1564  // in the narrow width.
1565  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1566  return nullptr;
1567 
1568  // bo (ext X), (ext Y) --> ext (bo X, Y)
1569  // bo (ext X), C --> ext (bo X, C')
1570  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1571  if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
1572  if (IsSext)
1573  NewBinOp->setHasNoSignedWrap();
1574  else
1575  NewBinOp->setHasNoUnsignedWrap();
1576  }
1577  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1578 }
1579 
1581  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
1582  Type *GEPType = GEP.getType();
1583  Type *GEPEltType = GEP.getSourceElementType();
1584  if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
1585  return replaceInstUsesWith(GEP, V);
1586 
1587  // For vector geps, use the generic demanded vector support.
1588  if (GEP.getType()->isVectorTy()) {
1589  auto VWidth = GEP.getType()->getVectorNumElements();
1590  APInt UndefElts(VWidth, 0);
1591  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1592  if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
1593  UndefElts)) {
1594  if (V != &GEP)
1595  return replaceInstUsesWith(GEP, V);
1596  return &GEP;
1597  }
1598 
1599  // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
1600  // possible (decide on canonical form for pointer broadcast), 3) exploit
1601  // undef elements to decrease demanded bits
1602  }
1603 
1604  Value *PtrOp = GEP.getOperand(0);
1605 
1606  // Eliminate unneeded casts for indices, and replace indices which displace
1607  // by multiples of a zero size type with zero.
1608  bool MadeChange = false;
1609 
1610  // Index width may not be the same width as pointer width.
1611  // Data layout chooses the right type based on supported integer types.
1612  Type *NewScalarIndexTy =
1613  DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
1614 
1615  gep_type_iterator GTI = gep_type_begin(GEP);
1616  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
1617  ++I, ++GTI) {
1618  // Skip indices into struct types.
1619  if (GTI.isStruct())
1620  continue;
1621 
1622  Type *IndexTy = (*I)->getType();
1623  Type *NewIndexType =
1624  IndexTy->isVectorTy()
1625  ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements())
1626  : NewScalarIndexTy;
1627 
1628  // If the element type has zero size then any index over it is equivalent
1629  // to an index of zero, so replace it with zero if it is not zero already.
1630  Type *EltTy = GTI.getIndexedType();
1631  if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0)
1632  if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
1633  *I = Constant::getNullValue(NewIndexType);
1634  MadeChange = true;
1635  }
1636 
1637  if (IndexTy != NewIndexType) {
1638  // If we are using a wider index than needed for this platform, shrink
1639  // it to what we need. If narrower, sign-extend it to what we need.
1640  // This explicit cast can make subsequent optimizations more obvious.
1641  *I = Builder.CreateIntCast(*I, NewIndexType, true);
1642  MadeChange = true;
1643  }
1644  }
1645  if (MadeChange)
1646  return &GEP;
1647 
1648  // Check to see if the inputs to the PHI node are getelementptr instructions.
1649  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
1650  auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
1651  if (!Op1)
1652  return nullptr;
1653 
1654  // Don't fold a GEP into itself through a PHI node. This can only happen
1655  // through the back-edge of a loop. Folding a GEP into itself means that
1656  // the value of the previous iteration needs to be stored in the meantime,
1657  // thus requiring an additional register variable to be live, but not
1658  // actually achieving anything (the GEP still needs to be executed once per
1659  // loop iteration).
1660  if (Op1 == &GEP)
1661  return nullptr;
1662 
1663  int DI = -1;
1664 
1665  for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
1666  auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
1667  if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
1668  return nullptr;
1669 
1670  // As for Op1 above, don't try to fold a GEP into itself.
1671  if (Op2 == &GEP)
1672  return nullptr;
1673 
1674  // Keep track of the type as we walk the GEP.
1675  Type *CurTy = nullptr;
1676 
1677  for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
1678  if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
1679  return nullptr;
1680 
1681  if (Op1->getOperand(J) != Op2->getOperand(J)) {
1682  if (DI == -1) {
1683  // We have not seen any differences yet in the GEPs feeding the
1684  // PHI yet, so we record this one if it is allowed to be a
1685  // variable.
1686 
1687  // The first two arguments can vary for any GEP, the rest have to be
1688  // static for struct slots
1689  if (J > 1 && CurTy->isStructTy())
1690  return nullptr;
1691 
1692  DI = J;
1693  } else {
1694  // The GEP is different by more than one input. While this could be
1695  // extended to support GEPs that vary by more than one variable it
1696  // doesn't make sense since it greatly increases the complexity and
1697  // would result in an R+R+R addressing mode which no backend
1698  // directly supports and would need to be broken into several
1699  // simpler instructions anyway.
1700  return nullptr;
1701  }
1702  }
1703 
1704  // Sink down a layer of the type for the next iteration.
1705  if (J > 0) {
1706  if (J == 1) {
1707  CurTy = Op1->getSourceElementType();
1708  } else if (auto *CT = dyn_cast<CompositeType>(CurTy)) {
1709  CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
1710  } else {
1711  CurTy = nullptr;
1712  }
1713  }
1714  }
1715  }
1716 
1717  // If not all GEPs are identical we'll have to create a new PHI node.
1718  // Check that the old PHI node has only one use so that it will get
1719  // removed.
1720  if (DI != -1 && !PN->hasOneUse())
1721  return nullptr;
1722 
1723  auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
1724  if (DI == -1) {
1725  // All the GEPs feeding the PHI are identical. Clone one down into our
1726  // BB so that it can be merged with the current GEP.
1727  GEP.getParent()->getInstList().insert(
1728  GEP.getParent()->getFirstInsertionPt(), NewGEP);
1729  } else {
1730  // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
1731  // into the current block so it can be merged, and create a new PHI to
1732  // set that index.
1733  PHINode *NewPN;
1734  {
1735  IRBuilderBase::InsertPointGuard Guard(Builder);
1736  Builder.SetInsertPoint(PN);
1737  NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
1738  PN->getNumOperands());
1739  }
1740 
1741  for (auto &I : PN->operands())
1742  NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
1743  PN->getIncomingBlock(I));
1744 
1745  NewGEP->setOperand(DI, NewPN);
1746  GEP.getParent()->getInstList().insert(
1747  GEP.getParent()->getFirstInsertionPt(), NewGEP);
1748  NewGEP->setOperand(DI, NewPN);
1749  }
1750 
1751  GEP.setOperand(0, NewGEP);
1752  PtrOp = NewGEP;
1753  }
1754 
1755  // Combine Indices - If the source pointer to this getelementptr instruction
1756  // is a getelementptr instruction, combine the indices of the two
1757  // getelementptr instructions into a single instruction.
1758  if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) {
1759  if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1760  return nullptr;
1761 
1762  // Try to reassociate loop invariant GEP chains to enable LICM.
1763  if (LI && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
1764  Src->hasOneUse()) {
1765  if (Loop *L = LI->getLoopFor(GEP.getParent())) {
1766  Value *GO1 = GEP.getOperand(1);
1767  Value *SO1 = Src->getOperand(1);
1768  // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
1769  // invariant: this breaks the dependence between GEPs and allows LICM
1770  // to hoist the invariant part out of the loop.
1771  if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
1772  // We have to be careful here.
1773  // We have something like:
1774  // %src = getelementptr <ty>, <ty>* %base, <ty> %idx
1775  // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
1776  // If we just swap idx & idx2 then we could inadvertantly
1777  // change %src from a vector to a scalar, or vice versa.
1778  // Cases:
1779  // 1) %base a scalar & idx a scalar & idx2 a vector
1780  // => Swapping idx & idx2 turns %src into a vector type.
1781  // 2) %base a scalar & idx a vector & idx2 a scalar
1782  // => Swapping idx & idx2 turns %src in a scalar type
1783  // 3) %base, %idx, and %idx2 are scalars
1784  // => %src & %gep are scalars
1785  // => swapping idx & idx2 is safe
1786  // 4) %base a vector
1787  // => %src is a vector
1788  // => swapping idx & idx2 is safe.
1789  auto *SO0 = Src->getOperand(0);
1790  auto *SO0Ty = SO0->getType();
1791  if (!isa<VectorType>(GEPType) || // case 3
1792  isa<VectorType>(SO0Ty)) { // case 4
1793  Src->setOperand(1, GO1);
1794  GEP.setOperand(1, SO1);
1795  return &GEP;
1796  } else {
1797  // Case 1 or 2
1798  // -- have to recreate %src & %gep
1799  // put NewSrc at same location as %src
1800  Builder.SetInsertPoint(cast<Instruction>(PtrOp));
1801  auto *NewSrc = cast<GetElementPtrInst>(
1802  Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
1803  NewSrc->setIsInBounds(Src->isInBounds());
1804  auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
1805  NewGEP->setIsInBounds(GEP.isInBounds());
1806  return NewGEP;
1807  }
1808  }
1809  }
1810  }
1811 
1812  // Note that if our source is a gep chain itself then we wait for that
1813  // chain to be resolved before we perform this transformation. This
1814  // avoids us creating a TON of code in some cases.
1815  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
1816  if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
1817  return nullptr; // Wait until our source is folded to completion.
1818 
1819  SmallVector<Value*, 8> Indices;
1820 
1821  // Find out whether the last index in the source GEP is a sequential idx.
1822  bool EndsWithSequential = false;
1823  for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
1824  I != E; ++I)
1825  EndsWithSequential = I.isSequential();
1826 
1827  // Can we combine the two pointer arithmetics offsets?
1828  if (EndsWithSequential) {
1829  // Replace: gep (gep %P, long B), long A, ...
1830  // With: T = long A+B; gep %P, T, ...
1831  Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
1832  Value *GO1 = GEP.getOperand(1);
1833 
1834  // If they aren't the same type, then the input hasn't been processed
1835  // by the loop above yet (which canonicalizes sequential index types to
1836  // intptr_t). Just avoid transforming this until the input has been
1837  // normalized.
1838  if (SO1->getType() != GO1->getType())
1839  return nullptr;
1840 
1841  Value *Sum =
1842  SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
1843  // Only do the combine when we are sure the cost after the
1844  // merge is never more than that before the merge.
1845  if (Sum == nullptr)
1846  return nullptr;
1847 
1848  // Update the GEP in place if possible.
1849  if (Src->getNumOperands() == 2) {
1850  GEP.setOperand(0, Src->getOperand(0));
1851  GEP.setOperand(1, Sum);
1852  return &GEP;
1853  }
1854  Indices.append(Src->op_begin()+1, Src->op_end()-1);
1855  Indices.push_back(Sum);
1856  Indices.append(GEP.op_begin()+2, GEP.op_end());
1857  } else if (isa<Constant>(*GEP.idx_begin()) &&
1858  cast<Constant>(*GEP.idx_begin())->isNullValue() &&
1859  Src->getNumOperands() != 1) {
1860  // Otherwise we can do the fold if the first index of the GEP is a zero
1861  Indices.append(Src->op_begin()+1, Src->op_end());
1862  Indices.append(GEP.idx_begin()+1, GEP.idx_end());
1863  }
1864 
1865  if (!Indices.empty())
1866  return GEP.isInBounds() && Src->isInBounds()
1868  Src->getSourceElementType(), Src->getOperand(0), Indices,
1869  GEP.getName())
1870  : GetElementPtrInst::Create(Src->getSourceElementType(),
1871  Src->getOperand(0), Indices,
1872  GEP.getName());
1873  }
1874 
1875  if (GEP.getNumIndices() == 1) {
1876  unsigned AS = GEP.getPointerAddressSpace();
1877  if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
1878  DL.getIndexSizeInBits(AS)) {
1879  uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType);
1880 
1881  bool Matched = false;
1882  uint64_t C;
1883  Value *V = nullptr;
1884  if (TyAllocSize == 1) {
1885  V = GEP.getOperand(1);
1886  Matched = true;
1887  } else if (match(GEP.getOperand(1),
1888  m_AShr(m_Value(V), m_ConstantInt(C)))) {
1889  if (TyAllocSize == 1ULL << C)
1890  Matched = true;
1891  } else if (match(GEP.getOperand(1),
1892  m_SDiv(m_Value(V), m_ConstantInt(C)))) {
1893  if (TyAllocSize == C)
1894  Matched = true;
1895  }
1896 
1897  if (Matched) {
1898  // Canonicalize (gep i8* X, -(ptrtoint Y))
1899  // to (inttoptr (sub (ptrtoint X), (ptrtoint Y)))
1900  // The GEP pattern is emitted by the SCEV expander for certain kinds of
1901  // pointer arithmetic.
1902  if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
1903  Operator *Index = cast<Operator>(V);
1904  Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType());
1905  Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1));
1906  return CastInst::Create(Instruction::IntToPtr, NewSub, GEPType);
1907  }
1908  // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
1909  // to (bitcast Y)
1910  Value *Y;
1911  if (match(V, m_Sub(m_PtrToInt(m_Value(Y)),
1912  m_PtrToInt(m_Specific(GEP.getOperand(0))))))
1914  }
1915  }
1916  }
1917 
1918  // We do not handle pointer-vector geps here.
1919  if (GEPType->isVectorTy())
1920  return nullptr;
1921 
1922  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
1923  Value *StrippedPtr = PtrOp->stripPointerCasts();
1924  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
1925 
1926  if (StrippedPtr != PtrOp) {
1927  bool HasZeroPointerIndex = false;
1928  Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
1929 
1930  if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
1931  HasZeroPointerIndex = C->isZero();
1932 
1933  // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
1934  // into : GEP [10 x i8]* X, i32 0, ...
1935  //
1936  // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
1937  // into : GEP i8* X, ...
1938  //
1939  // This occurs when the program declares an array extern like "int X[];"
1940  if (HasZeroPointerIndex) {
1941  if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
1942  // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
1943  if (CATy->getElementType() == StrippedPtrEltTy) {
1944  // -> GEP i8* X, ...
1945  SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
1947  StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
1948  Res->setIsInBounds(GEP.isInBounds());
1949  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
1950  return Res;
1951  // Insert Res, and create an addrspacecast.
1952  // e.g.,
1953  // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
1954  // ->
1955  // %0 = GEP i8 addrspace(1)* X, ...
1956  // addrspacecast i8 addrspace(1)* %0 to i8*
1957  return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
1958  }
1959 
1960  if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
1961  // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
1962  if (CATy->getElementType() == XATy->getElementType()) {
1963  // -> GEP [10 x i8]* X, i32 0, ...
1964  // At this point, we know that the cast source type is a pointer
1965  // to an array of the same type as the destination pointer
1966  // array. Because the array type is never stepped over (there
1967  // is a leading zero) we can fold the cast into this GEP.
1968  if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
1969  GEP.setOperand(0, StrippedPtr);
1970  GEP.setSourceElementType(XATy);
1971  return &GEP;
1972  }
1973  // Cannot replace the base pointer directly because StrippedPtr's
1974  // address space is different. Instead, create a new GEP followed by
1975  // an addrspacecast.
1976  // e.g.,
1977  // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
1978  // i32 0, ...
1979  // ->
1980  // %0 = GEP [10 x i8] addrspace(1)* X, ...
1981  // addrspacecast i8 addrspace(1)* %0 to i8*
1982  SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
1983  Value *NewGEP =
1984  GEP.isInBounds()
1985  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
1986  Idx, GEP.getName())
1987  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
1988  GEP.getName());
1989  return new AddrSpaceCastInst(NewGEP, GEPType);
1990  }
1991  }
1992  }
1993  } else if (GEP.getNumOperands() == 2) {
1994  // Transform things like:
1995  // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
1996  // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
1997  if (StrippedPtrEltTy->isArrayTy() &&
1998  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
1999  DL.getTypeAllocSize(GEPEltType)) {
2000  Type *IdxType = DL.getIndexType(GEPType);
2001  Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
2002  Value *NewGEP =
2003  GEP.isInBounds()
2004  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2005  GEP.getName())
2006  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2007  GEP.getName());
2008 
2009  // V and GEP are both pointer types --> BitCast
2010  return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2011  }
2012 
2013  // Transform things like:
2014  // %V = mul i64 %N, 4
2015  // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2016  // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
2017  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2018  // Check that changing the type amounts to dividing the index by a scale
2019  // factor.
2020  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
2021  uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
2022  if (ResSize && SrcSize % ResSize == 0) {
2023  Value *Idx = GEP.getOperand(1);
2024  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2025  uint64_t Scale = SrcSize / ResSize;
2026 
2027  // Earlier transforms ensure that the index has the right type
2028  // according to Data Layout, which considerably simplifies the
2029  // logic by eliminating implicit casts.
2030  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2031  "Index type does not match the Data Layout preferences");
2032 
2033  bool NSW;
2034  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2035  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2036  // If the multiplication NewIdx * Scale may overflow then the new
2037  // GEP may not be "inbounds".
2038  Value *NewGEP =
2039  GEP.isInBounds() && NSW
2040  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2041  NewIdx, GEP.getName())
2042  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2043  GEP.getName());
2044 
2045  // The NewGEP must be pointer typed, so must the old one -> BitCast
2047  GEPType);
2048  }
2049  }
2050  }
2051 
2052  // Similarly, transform things like:
2053  // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2054  // (where tmp = 8*tmp2) into:
2055  // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2056  if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2057  StrippedPtrEltTy->isArrayTy()) {
2058  // Check that changing to the array element type amounts to dividing the
2059  // index by a scale factor.
2060  uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
2061  uint64_t ArrayEltSize =
2062  DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
2063  if (ResSize && ArrayEltSize % ResSize == 0) {
2064  Value *Idx = GEP.getOperand(1);
2065  unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2066  uint64_t Scale = ArrayEltSize / ResSize;
2067 
2068  // Earlier transforms ensure that the index has the right type
2069  // according to the Data Layout, which considerably simplifies
2070  // the logic by eliminating implicit casts.
2071  assert(Idx->getType() == DL.getIndexType(GEPType) &&
2072  "Index type does not match the Data Layout preferences");
2073 
2074  bool NSW;
2075  if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2076  // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2077  // If the multiplication NewIdx * Scale may overflow then the new
2078  // GEP may not be "inbounds".
2079  Type *IndTy = DL.getIndexType(GEPType);
2080  Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2081 
2082  Value *NewGEP =
2083  GEP.isInBounds() && NSW
2084  ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2085  Off, GEP.getName())
2086  : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2087  GEP.getName());
2088  // The NewGEP must be pointer typed, so must the old one -> BitCast
2090  GEPType);
2091  }
2092  }
2093  }
2094  }
2095  }
2096 
2097  // addrspacecast between types is canonicalized as a bitcast, then an
2098  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2099  // through the addrspacecast.
2100  Value *ASCStrippedPtrOp = PtrOp;
2101  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2102  // X = bitcast A addrspace(1)* to B addrspace(1)*
2103  // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2104  // Z = gep Y, <...constant indices...>
2105  // Into an addrspacecasted GEP of the struct.
2106  if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2107  ASCStrippedPtrOp = BC;
2108  }
2109 
2110  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
2111  Value *SrcOp = BCI->getOperand(0);
2112  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2113  Type *SrcEltType = SrcType->getElementType();
2114 
2115  // GEP directly using the source operand if this GEP is accessing an element
2116  // of a bitcasted pointer to vector or array of the same dimensions:
2117  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2118  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2119  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) {
2120  return ArrTy->getArrayElementType() == VecTy->getVectorElementType() &&
2121  ArrTy->getArrayNumElements() == VecTy->getVectorNumElements();
2122  };
2123  if (GEP.getNumOperands() == 3 &&
2124  ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() &&
2125  areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) ||
2126  (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() &&
2127  areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) {
2128 
2129  // Create a new GEP here, as using `setOperand()` followed by
2130  // `setSourceElementType()` won't actually update the type of the
2131  // existing GEP Value. Causing issues if this Value is accessed when
2132  // constructing an AddrSpaceCastInst
2133  Value *NGEP =
2134  GEP.isInBounds()
2135  ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})
2136  : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]});
2137  NGEP->takeName(&GEP);
2138 
2139  // Preserve GEP address space to satisfy users
2140  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2141  return new AddrSpaceCastInst(NGEP, GEPType);
2142 
2143  return replaceInstUsesWith(GEP, NGEP);
2144  }
2145 
2146  // See if we can simplify:
2147  // X = bitcast A* to B*
2148  // Y = gep X, <...constant indices...>
2149  // into a gep of the original struct. This is important for SROA and alias
2150  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2151  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
2152  APInt Offset(OffsetBits, 0);
2153  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) {
2154  // If this GEP instruction doesn't move the pointer, just replace the GEP
2155  // with a bitcast of the real input to the dest type.
2156  if (!Offset) {
2157  // If the bitcast is of an allocation, and the allocation will be
2158  // converted to match the type of the cast, don't touch this.
2159  if (isa<AllocaInst>(SrcOp) || isAllocationFn(SrcOp, &TLI)) {
2160  // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2161  if (Instruction *I = visitBitCast(*BCI)) {
2162  if (I != BCI) {
2163  I->takeName(BCI);
2164  BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2165  replaceInstUsesWith(*BCI, I);
2166  }
2167  return &GEP;
2168  }
2169  }
2170 
2171  if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2172  return new AddrSpaceCastInst(SrcOp, GEPType);
2173  return new BitCastInst(SrcOp, GEPType);
2174  }
2175 
2176  // Otherwise, if the offset is non-zero, we need to find out if there is a
2177  // field at Offset in 'A's type. If so, we can pull the cast through the
2178  // GEP.
2179  SmallVector<Value*, 8> NewIndices;
2180  if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
2181  Value *NGEP =
2182  GEP.isInBounds()
2183  ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
2184  : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
2185 
2186  if (NGEP->getType() == GEPType)
2187  return replaceInstUsesWith(GEP, NGEP);
2188  NGEP->takeName(&GEP);
2189 
2190  if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2191  return new AddrSpaceCastInst(NGEP, GEPType);
2192  return new BitCastInst(NGEP, GEPType);
2193  }
2194  }
2195  }
2196 
2197  if (!GEP.isInBounds()) {
2198  unsigned IdxWidth =
2199  DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2200  APInt BasePtrOffset(IdxWidth, 0);
2201  Value *UnderlyingPtrOp =
2203  BasePtrOffset);
2204  if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2205  if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2206  BasePtrOffset.isNonNegative()) {
2207  APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
2208  if (BasePtrOffset.ule(AllocSize)) {
2210  GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
2211  GEP.getName());
2212  }
2213  }
2214  }
2215  }
2216 
2217  return nullptr;
2218 }
2219 
2221  Instruction *AI) {
2222  if (isa<ConstantPointerNull>(V))
2223  return true;
2224  if (auto *LI = dyn_cast<LoadInst>(V))
2225  return isa<GlobalVariable>(LI->getPointerOperand());
2226  // Two distinct allocations will never be equal.
2227  // We rely on LookThroughBitCast in isAllocLikeFn being false, since looking
2228  // through bitcasts of V can cause
2229  // the result statement below to be true, even when AI and V (ex:
2230  // i8* ->i32* ->i8* of AI) are the same allocations.
2231  return isAllocLikeFn(V, TLI) && V != AI;
2232 }
2233 
2236  const TargetLibraryInfo *TLI) {
2238  Worklist.push_back(AI);
2239 
2240  do {
2241  Instruction *PI = Worklist.pop_back_val();
2242  for (User *U : PI->users()) {
2243  Instruction *I = cast<Instruction>(U);
2244  switch (I->getOpcode()) {
2245  default:
2246  // Give up the moment we see something we can't handle.
2247  return false;
2248 
2249  case Instruction::AddrSpaceCast:
2250  case Instruction::BitCast:
2251  case Instruction::GetElementPtr:
2252  Users.emplace_back(I);
2253  Worklist.push_back(I);
2254  continue;
2255 
2256  case Instruction::ICmp: {
2257  ICmpInst *ICI = cast<ICmpInst>(I);
2258  // We can fold eq/ne comparisons with null to false/true, respectively.
2259  // We also fold comparisons in some conditions provided the alloc has
2260  // not escaped (see isNeverEqualToUnescapedAlloc).
2261  if (!ICI->isEquality())
2262  return false;
2263  unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
2264  if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2265  return false;
2266  Users.emplace_back(I);
2267  continue;
2268  }
2269 
2270  case Instruction::Call:
2271  // Ignore no-op and store intrinsics.
2272  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2273  switch (II->getIntrinsicID()) {
2274  default:
2275  return false;
2276 
2277  case Intrinsic::memmove:
2278  case Intrinsic::memcpy:
2279  case Intrinsic::memset: {
2280  MemIntrinsic *MI = cast<MemIntrinsic>(II);
2281  if (MI->isVolatile() || MI->getRawDest() != PI)
2282  return false;
2284  }
2285  case Intrinsic::invariant_start:
2286  case Intrinsic::invariant_end:
2287  case Intrinsic::lifetime_start:
2288  case Intrinsic::lifetime_end:
2289  case Intrinsic::objectsize:
2290  Users.emplace_back(I);
2291  continue;
2292  }
2293  }
2294 
2295  if (isFreeCall(I, TLI)) {
2296  Users.emplace_back(I);
2297  continue;
2298  }
2299  return false;
2300 
2301  case Instruction::Store: {
2302  StoreInst *SI = cast<StoreInst>(I);
2303  if (SI->isVolatile() || SI->getPointerOperand() != PI)
2304  return false;
2305  Users.emplace_back(I);
2306  continue;
2307  }
2308  }
2309  llvm_unreachable("missing a return?");
2310  }
2311  } while (!Worklist.empty());
2312  return true;
2313 }
2314 
2316  // If we have a malloc call which is only used in any amount of comparisons to
2317  // null and free calls, delete the calls and replace the comparisons with true
2318  // or false as appropriate.
2319 
2320  // This is based on the principle that we can substitute our own allocation
2321  // function (which will never return null) rather than knowledge of the
2322  // specific function being called. In some sense this can change the permitted
2323  // outputs of a program (when we convert a malloc to an alloca, the fact that
2324  // the allocation is now on the stack is potentially visible, for example),
2325  // but we believe in a permissible manner.
2327 
2328  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2329  // before each store.
2331  std::unique_ptr<DIBuilder> DIB;
2332  if (isa<AllocaInst>(MI)) {
2333  DIIs = FindDbgAddrUses(&MI);
2334  DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2335  }
2336 
2337  if (isAllocSiteRemovable(&MI, Users, &TLI)) {
2338  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2339  // Lowering all @llvm.objectsize calls first because they may
2340  // use a bitcast/GEP of the alloca we are removing.
2341  if (!Users[i])
2342  continue;
2343 
2344  Instruction *I = cast<Instruction>(&*Users[i]);
2345 
2346  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2347  if (II->getIntrinsicID() == Intrinsic::objectsize) {
2348  Value *Result =
2349  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
2350  replaceInstUsesWith(*I, Result);
2351  eraseInstFromFunction(*I);
2352  Users[i] = nullptr; // Skip examining in the next loop.
2353  }
2354  }
2355  }
2356  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
2357  if (!Users[i])
2358  continue;
2359 
2360  Instruction *I = cast<Instruction>(&*Users[i]);
2361 
2362  if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2363  replaceInstUsesWith(*C,
2365  C->isFalseWhenEqual()));
2366  } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I) ||
2367  isa<AddrSpaceCastInst>(I)) {
2368  replaceInstUsesWith(*I, UndefValue::get(I->getType()));
2369  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2370  for (auto *DII : DIIs)
2371  ConvertDebugDeclareToDebugValue(DII, SI, *DIB);
2372  }
2373  eraseInstFromFunction(*I);
2374  }
2375 
2376  if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2377  // Replace invoke with a NOP intrinsic to maintain the original CFG
2378  Module *M = II->getModule();
2379  Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2380  InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2381  None, "", II->getParent());
2382  }
2383 
2384  for (auto *DII : DIIs)
2385  eraseInstFromFunction(*DII);
2386 
2387  return eraseInstFromFunction(MI);
2388  }
2389  return nullptr;
2390 }
2391 
2392 /// Move the call to free before a NULL test.
2393 ///
2394 /// Check if this free is accessed after its argument has been test
2395 /// against NULL (property 0).
2396 /// If yes, it is legal to move this call in its predecessor block.
2397 ///
2398 /// The move is performed only if the block containing the call to free
2399 /// will be removed, i.e.:
2400 /// 1. it has only one predecessor P, and P has two successors
2401 /// 2. it contains the call, noops, and an unconditional branch
2402 /// 3. its successor is the same as its predecessor's successor
2403 ///
2404 /// The profitability is out-of concern here and this function should
2405 /// be called only if the caller knows this transformation would be
2406 /// profitable (e.g., for code size).
2408  const DataLayout &DL) {
2409  Value *Op = FI.getArgOperand(0);
2410  BasicBlock *FreeInstrBB = FI.getParent();
2411  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2412 
2413  // Validate part of constraint #1: Only one predecessor
2414  // FIXME: We can extend the number of predecessor, but in that case, we
2415  // would duplicate the call to free in each predecessor and it may
2416  // not be profitable even for code size.
2417  if (!PredBB)
2418  return nullptr;
2419 
2420  // Validate constraint #2: Does this block contains only the call to
2421  // free, noops, and an unconditional branch?
2422  BasicBlock *SuccBB;
2423  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2424  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2425  return nullptr;
2426 
2427  // If there are only 2 instructions in the block, at this point,
2428  // this is the call to free and unconditional.
2429  // If there are more than 2 instructions, check that they are noops
2430  // i.e., they won't hurt the performance of the generated code.
2431  if (FreeInstrBB->size() != 2) {
2432  for (const Instruction &Inst : *FreeInstrBB) {
2433  if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
2434  continue;
2435  auto *Cast = dyn_cast<CastInst>(&Inst);
2436  if (!Cast || !Cast->isNoopCast(DL))
2437  return nullptr;
2438  }
2439  }
2440  // Validate the rest of constraint #1 by matching on the pred branch.
2441  Instruction *TI = PredBB->getTerminator();
2442  BasicBlock *TrueBB, *FalseBB;
2443  ICmpInst::Predicate Pred;
2444  if (!match(TI, m_Br(m_ICmp(Pred,
2445  m_CombineOr(m_Specific(Op),
2446  m_Specific(Op->stripPointerCasts())),
2447  m_Zero()),
2448  TrueBB, FalseBB)))
2449  return nullptr;
2450  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
2451  return nullptr;
2452 
2453  // Validate constraint #3: Ensure the null case just falls through.
2454  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
2455  return nullptr;
2456  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2457  "Broken CFG: missing edge from predecessor to successor");
2458 
2459  // At this point, we know that everything in FreeInstrBB can be moved
2460  // before TI.
2461  for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
2462  It != End;) {
2463  Instruction &Instr = *It++;
2464  if (&Instr == FreeInstrBBTerminator)
2465  break;
2466  Instr.moveBefore(TI);
2467  }
2468  assert(FreeInstrBB->size() == 1 &&
2469  "Only the branch instruction should remain");
2470  return &FI;
2471 }
2472 
2474  Value *Op = FI.getArgOperand(0);
2475 
2476  // free undef -> unreachable.
2477  if (isa<UndefValue>(Op)) {
2478  // Leave a marker since we can't modify the CFG here.
2479  CreateNonTerminatorUnreachable(&FI);
2480  return eraseInstFromFunction(FI);
2481  }
2482 
2483  // If we have 'free null' delete the instruction. This can happen in stl code
2484  // when lots of inlining happens.
2485  if (isa<ConstantPointerNull>(Op))
2486  return eraseInstFromFunction(FI);
2487 
2488  // If we optimize for code size, try to move the call to free before the null
2489  // test so that simplify cfg can remove the empty block and dead code
2490  // elimination the branch. I.e., helps to turn something like:
2491  // if (foo) free(foo);
2492  // into
2493  // free(foo);
2494  if (MinimizeSize)
2495  if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
2496  return I;
2497 
2498  return nullptr;
2499 }
2500 
2502  if (RI.getNumOperands() == 0) // ret void
2503  return nullptr;
2504 
2505  Value *ResultOp = RI.getOperand(0);
2506  Type *VTy = ResultOp->getType();
2507  if (!VTy->isIntegerTy())
2508  return nullptr;
2509 
2510  // There might be assume intrinsics dominating this return that completely
2511  // determine the value. If so, constant fold it.
2512  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
2513  if (Known.isConstant())
2514  RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant()));
2515 
2516  return nullptr;
2517 }
2518 
2520  // Change br (not X), label True, label False to: br X, label False, True
2521  Value *X = nullptr;
2522  BasicBlock *TrueDest;
2523  BasicBlock *FalseDest;
2524  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
2525  !isa<Constant>(X)) {
2526  // Swap Destinations and condition...
2527  BI.setCondition(X);
2528  BI.swapSuccessors();
2529  return &BI;
2530  }
2531 
2532  // If the condition is irrelevant, remove the use so that other
2533  // transforms on the condition become more effective.
2534  if (BI.isConditional() && !isa<ConstantInt>(BI.getCondition()) &&
2535  BI.getSuccessor(0) == BI.getSuccessor(1)) {
2537  return &BI;
2538  }
2539 
2540  // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
2541  CmpInst::Predicate Pred;
2542  if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest,
2543  FalseDest)) &&
2544  !isCanonicalPredicate(Pred)) {
2545  // Swap destinations and condition.
2546  CmpInst *Cond = cast<CmpInst>(BI.getCondition());
2548  BI.swapSuccessors();
2549  Worklist.Add(Cond);
2550  return &BI;
2551  }
2552 
2553  return nullptr;
2554 }
2555 
2557  Value *Cond = SI.getCondition();
2558  Value *Op0;
2559  ConstantInt *AddRHS;
2560  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
2561  // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2562  for (auto Case : SI.cases()) {
2563  Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
2564  assert(isa<ConstantInt>(NewCase) &&
2565  "Result of expression should be constant");
2566  Case.setValue(cast<ConstantInt>(NewCase));
2567  }
2568  SI.setCondition(Op0);
2569  return &SI;
2570  }
2571 
2572  KnownBits Known = computeKnownBits(Cond, 0, &SI);
2573  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
2574  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
2575 
2576  // Compute the number of leading bits we can ignore.
2577  // TODO: A better way to determine this would use ComputeNumSignBits().
2578  for (auto &C : SI.cases()) {
2579  LeadingKnownZeros = std::min(
2580  LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
2581  LeadingKnownOnes = std::min(
2582  LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
2583  }
2584 
2585  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
2586 
2587  // Shrink the condition operand if the new type is smaller than the old type.
2588  // But do not shrink to a non-standard type, because backend can't generate
2589  // good code for that yet.
2590  // TODO: We can make it aggressive again after fixing PR39569.
2591  if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
2592  shouldChangeType(Known.getBitWidth(), NewWidth)) {
2593  IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
2594  Builder.SetInsertPoint(&SI);
2595  Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
2596  SI.setCondition(NewCond);
2597 
2598  for (auto Case : SI.cases()) {
2599  APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
2600  Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
2601  }
2602  return &SI;
2603  }
2604 
2605  return nullptr;
2606 }
2607 
2609  Value *Agg = EV.getAggregateOperand();
2610 
2611  if (!EV.hasIndices())
2612  return replaceInstUsesWith(EV, Agg);
2613 
2614  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
2615  SQ.getWithInstruction(&EV)))
2616  return replaceInstUsesWith(EV, V);
2617 
2618  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
2619  // We're extracting from an insertvalue instruction, compare the indices
2620  const unsigned *exti, *exte, *insi, *inse;
2621  for (exti = EV.idx_begin(), insi = IV->idx_begin(),
2622  exte = EV.idx_end(), inse = IV->idx_end();
2623  exti != exte && insi != inse;
2624  ++exti, ++insi) {
2625  if (*insi != *exti)
2626  // The insert and extract both reference distinctly different elements.
2627  // This means the extract is not influenced by the insert, and we can
2628  // replace the aggregate operand of the extract with the aggregate
2629  // operand of the insert. i.e., replace
2630  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2631  // %E = extractvalue { i32, { i32 } } %I, 0
2632  // with
2633  // %E = extractvalue { i32, { i32 } } %A, 0
2634  return ExtractValueInst::Create(IV->getAggregateOperand(),
2635  EV.getIndices());
2636  }
2637  if (exti == exte && insi == inse)
2638  // Both iterators are at the end: Index lists are identical. Replace
2639  // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2640  // %C = extractvalue { i32, { i32 } } %B, 1, 0
2641  // with "i32 42"
2642  return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
2643  if (exti == exte) {
2644  // The extract list is a prefix of the insert list. i.e. replace
2645  // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2646  // %E = extractvalue { i32, { i32 } } %I, 1
2647  // with
2648  // %X = extractvalue { i32, { i32 } } %A, 1
2649  // %E = insertvalue { i32 } %X, i32 42, 0
2650  // by switching the order of the insert and extract (though the
2651  // insertvalue should be left in, since it may have other uses).
2652  Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
2653  EV.getIndices());
2654  return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
2655  makeArrayRef(insi, inse));
2656  }
2657  if (insi == inse)
2658  // The insert list is a prefix of the extract list
2659  // We can simply remove the common indices from the extract and make it
2660  // operate on the inserted value instead of the insertvalue result.
2661  // i.e., replace
2662  // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2663  // %E = extractvalue { i32, { i32 } } %I, 1, 0
2664  // with
2665  // %E extractvalue { i32 } { i32 42 }, 0
2666  return ExtractValueInst::Create(IV->getInsertedValueOperand(),
2667  makeArrayRef(exti, exte));
2668  }
2669  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
2670  // We're extracting from an overflow intrinsic, see if we're the only user,
2671  // which allows us to simplify multiple result intrinsics to simpler
2672  // things that just get one value.
2673  if (WO->hasOneUse()) {
2674  // Check if we're grabbing only the result of a 'with overflow' intrinsic
2675  // and replace it with a traditional binary instruction.
2676  if (*EV.idx_begin() == 0) {
2677  Instruction::BinaryOps BinOp = WO->getBinaryOp();
2678  Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
2679  replaceInstUsesWith(*WO, UndefValue::get(WO->getType()));
2680  eraseInstFromFunction(*WO);
2681  return BinaryOperator::Create(BinOp, LHS, RHS);
2682  }
2683 
2684  // If the normal result of the add is dead, and the RHS is a constant,
2685  // we can transform this into a range comparison.
2686  // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
2687  if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
2688  if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
2689  return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
2690  ConstantExpr::getNot(CI));
2691  }
2692  }
2693  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
2694  // If the (non-volatile) load only has one use, we can rewrite this to a
2695  // load from a GEP. This reduces the size of the load. If a load is used
2696  // only by extractvalue instructions then this either must have been
2697  // optimized before, or it is a struct with padding, in which case we
2698  // don't want to do the transformation as it loses padding knowledge.
2699  if (L->isSimple() && L->hasOneUse()) {
2700  // extractvalue has integer indices, getelementptr has Value*s. Convert.
2701  SmallVector<Value*, 4> Indices;
2702  // Prefix an i32 0 since we need the first element.
2703  Indices.push_back(Builder.getInt32(0));
2704  for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
2705  I != E; ++I)
2706  Indices.push_back(Builder.getInt32(*I));
2707 
2708  // We need to insert these at the location of the old load, not at that of
2709  // the extractvalue.
2710  Builder.SetInsertPoint(L);
2711  Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
2712  L->getPointerOperand(), Indices);
2713  Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
2714  // Whatever aliasing information we had for the orignal load must also
2715  // hold for the smaller load, so propagate the annotations.
2716  AAMDNodes Nodes;
2717  L->getAAMetadata(Nodes);
2718  NL->setAAMetadata(Nodes);
2719  // Returning the load directly will cause the main loop to insert it in
2720  // the wrong spot, so use replaceInstUsesWith().
2721  return replaceInstUsesWith(EV, NL);
2722  }
2723  // We could simplify extracts from other values. Note that nested extracts may
2724  // already be simplified implicitly by the above: extract (extract (insert) )
2725  // will be translated into extract ( insert ( extract ) ) first and then just
2726  // the value inserted, if appropriate. Similarly for extracts from single-use
2727  // loads: extract (extract (load)) will be translated to extract (load (gep))
2728  // and if again single-use then via load (gep (gep)) to load (gep).
2729  // However, double extracts from e.g. function arguments or return values
2730  // aren't handled yet.
2731  return nullptr;
2732 }
2733 
2734 /// Return 'true' if the given typeinfo will match anything.
2735 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
2736  switch (Personality) {
2737  case EHPersonality::GNU_C:
2739  case EHPersonality::Rust:
2740  // The GCC C EH and Rust personality only exists to support cleanups, so
2741  // it's not clear what the semantics of catch clauses are.
2742  return false;
2744  return false;
2746  // While __gnat_all_others_value will match any Ada exception, it doesn't
2747  // match foreign exceptions (or didn't, before gcc-4.7).
2748  return false;
2757  return TypeInfo->isNullValue();
2758  }
2759  llvm_unreachable("invalid enum");
2760 }
2761 
2762 static bool shorter_filter(const Value *LHS, const Value *RHS) {
2763  return
2764  cast<ArrayType>(LHS->getType())->getNumElements()
2765  <
2766  cast<ArrayType>(RHS->getType())->getNumElements();
2767 }
2768 
2770  // The logic here should be correct for any real-world personality function.
2771  // However if that turns out not to be true, the offending logic can always
2772  // be conditioned on the personality function, like the catch-all logic is.
2773  EHPersonality Personality =
2775 
2776  // Simplify the list of clauses, eg by removing repeated catch clauses
2777  // (these are often created by inlining).
2778  bool MakeNewInstruction = false; // If true, recreate using the following:
2779  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
2780  bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
2781 
2782  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
2783  for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
2784  bool isLastClause = i + 1 == e;
2785  if (LI.isCatch(i)) {
2786  // A catch clause.
2787  Constant *CatchClause = LI.getClause(i);
2788  Constant *TypeInfo = CatchClause->stripPointerCasts();
2789 
2790  // If we already saw this clause, there is no point in having a second
2791  // copy of it.
2792  if (AlreadyCaught.insert(TypeInfo).second) {
2793  // This catch clause was not already seen.
2794  NewClauses.push_back(CatchClause);
2795  } else {
2796  // Repeated catch clause - drop the redundant copy.
2797  MakeNewInstruction = true;
2798  }
2799 
2800  // If this is a catch-all then there is no point in keeping any following
2801  // clauses or marking the landingpad as having a cleanup.
2802  if (isCatchAll(Personality, TypeInfo)) {
2803  if (!isLastClause)
2804  MakeNewInstruction = true;
2805  CleanupFlag = false;
2806  break;
2807  }
2808  } else {
2809  // A filter clause. If any of the filter elements were already caught
2810  // then they can be dropped from the filter. It is tempting to try to
2811  // exploit the filter further by saying that any typeinfo that does not
2812  // occur in the filter can't be caught later (and thus can be dropped).
2813  // However this would be wrong, since typeinfos can match without being
2814  // equal (for example if one represents a C++ class, and the other some
2815  // class derived from it).
2816  assert(LI.isFilter(i) && "Unsupported landingpad clause!");
2817  Constant *FilterClause = LI.getClause(i);
2818  ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
2819  unsigned NumTypeInfos = FilterType->getNumElements();
2820 
2821  // An empty filter catches everything, so there is no point in keeping any
2822  // following clauses or marking the landingpad as having a cleanup. By
2823  // dealing with this case here the following code is made a bit simpler.
2824  if (!NumTypeInfos) {
2825  NewClauses.push_back(FilterClause);
2826  if (!isLastClause)
2827  MakeNewInstruction = true;
2828  CleanupFlag = false;
2829  break;
2830  }
2831 
2832  bool MakeNewFilter = false; // If true, make a new filter.
2833  SmallVector<Constant *, 16> NewFilterElts; // New elements.
2834  if (isa<ConstantAggregateZero>(FilterClause)) {
2835  // Not an empty filter - it contains at least one null typeinfo.
2836  assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
2837  Constant *TypeInfo =
2838  Constant::getNullValue(FilterType->getElementType());
2839  // If this typeinfo is a catch-all then the filter can never match.
2840  if (isCatchAll(Personality, TypeInfo)) {
2841  // Throw the filter away.
2842  MakeNewInstruction = true;
2843  continue;
2844  }
2845 
2846  // There is no point in having multiple copies of this typeinfo, so
2847  // discard all but the first copy if there is more than one.
2848  NewFilterElts.push_back(TypeInfo);
2849  if (NumTypeInfos > 1)
2850  MakeNewFilter = true;
2851  } else {
2852  ConstantArray *Filter = cast<ConstantArray>(FilterClause);
2853  SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
2854  NewFilterElts.reserve(NumTypeInfos);
2855 
2856  // Remove any filter elements that were already caught or that already
2857  // occurred in the filter. While there, see if any of the elements are
2858  // catch-alls. If so, the filter can be discarded.
2859  bool SawCatchAll = false;
2860  for (unsigned j = 0; j != NumTypeInfos; ++j) {
2861  Constant *Elt = Filter->getOperand(j);
2862  Constant *TypeInfo = Elt->stripPointerCasts();
2863  if (isCatchAll(Personality, TypeInfo)) {
2864  // This element is a catch-all. Bail out, noting this fact.
2865  SawCatchAll = true;
2866  break;
2867  }
2868 
2869  // Even if we've seen a type in a catch clause, we don't want to
2870  // remove it from the filter. An unexpected type handler may be
2871  // set up for a call site which throws an exception of the same
2872  // type caught. In order for the exception thrown by the unexpected
2873  // handler to propagate correctly, the filter must be correctly
2874  // described for the call site.
2875  //
2876  // Example:
2877  //
2878  // void unexpected() { throw 1;}
2879  // void foo() throw (int) {
2880  // std::set_unexpected(unexpected);
2881  // try {
2882  // throw 2.0;
2883  // } catch (int i) {}
2884  // }
2885 
2886  // There is no point in having multiple copies of the same typeinfo in
2887  // a filter, so only add it if we didn't already.
2888  if (SeenInFilter.insert(TypeInfo).second)
2889  NewFilterElts.push_back(cast<Constant>(Elt));
2890  }
2891  // A filter containing a catch-all cannot match anything by definition.
2892  if (SawCatchAll) {
2893  // Throw the filter away.
2894  MakeNewInstruction = true;
2895  continue;
2896  }
2897 
2898  // If we dropped something from the filter, make a new one.
2899  if (NewFilterElts.size() < NumTypeInfos)
2900  MakeNewFilter = true;
2901  }
2902  if (MakeNewFilter) {
2903  FilterType = ArrayType::get(FilterType->getElementType(),
2904  NewFilterElts.size());
2905  FilterClause = ConstantArray::get(FilterType, NewFilterElts);
2906  MakeNewInstruction = true;
2907  }
2908 
2909  NewClauses.push_back(FilterClause);
2910 
2911  // If the new filter is empty then it will catch everything so there is
2912  // no point in keeping any following clauses or marking the landingpad
2913  // as having a cleanup. The case of the original filter being empty was
2914  // already handled above.
2915  if (MakeNewFilter && !NewFilterElts.size()) {
2916  assert(MakeNewInstruction && "New filter but not a new instruction!");
2917  CleanupFlag = false;
2918  break;
2919  }
2920  }
2921  }
2922 
2923  // If several filters occur in a row then reorder them so that the shortest
2924  // filters come first (those with the smallest number of elements). This is
2925  // advantageous because shorter filters are more likely to match, speeding up
2926  // unwinding, but mostly because it increases the effectiveness of the other
2927  // filter optimizations below.
2928  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
2929  unsigned j;
2930  // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
2931  for (j = i; j != e; ++j)
2932  if (!isa<ArrayType>(NewClauses[j]->getType()))
2933  break;
2934 
2935  // Check whether the filters are already sorted by length. We need to know
2936  // if sorting them is actually going to do anything so that we only make a
2937  // new landingpad instruction if it does.
2938  for (unsigned k = i; k + 1 < j; ++k)
2939  if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
2940  // Not sorted, so sort the filters now. Doing an unstable sort would be
2941  // correct too but reordering filters pointlessly might confuse users.
2942  std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
2943  shorter_filter);
2944  MakeNewInstruction = true;
2945  break;
2946  }
2947 
2948  // Look for the next batch of filters.
2949  i = j + 1;
2950  }
2951 
2952  // If typeinfos matched if and only if equal, then the elements of a filter L
2953  // that occurs later than a filter F could be replaced by the intersection of
2954  // the elements of F and L. In reality two typeinfos can match without being
2955  // equal (for example if one represents a C++ class, and the other some class
2956  // derived from it) so it would be wrong to perform this transform in general.
2957  // However the transform is correct and useful if F is a subset of L. In that
2958  // case L can be replaced by F, and thus removed altogether since repeating a
2959  // filter is pointless. So here we look at all pairs of filters F and L where
2960  // L follows F in the list of clauses, and remove L if every element of F is
2961  // an element of L. This can occur when inlining C++ functions with exception
2962  // specifications.
2963  for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
2964  // Examine each filter in turn.
2965  Value *Filter = NewClauses[i];
2966  ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
2967  if (!FTy)
2968  // Not a filter - skip it.
2969  continue;
2970  unsigned FElts = FTy->getNumElements();
2971  // Examine each filter following this one. Doing this backwards means that
2972  // we don't have to worry about filters disappearing under us when removed.
2973  for (unsigned j = NewClauses.size() - 1; j != i; --j) {
2974  Value *LFilter = NewClauses[j];
2975  ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
2976  if (!LTy)
2977  // Not a filter - skip it.
2978  continue;
2979  // If Filter is a subset of LFilter, i.e. every element of Filter is also
2980  // an element of LFilter, then discard LFilter.
2981  SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
2982  // If Filter is empty then it is a subset of LFilter.
2983  if (!FElts) {
2984  // Discard LFilter.
2985  NewClauses.erase(J);
2986  MakeNewInstruction = true;
2987  // Move on to the next filter.
2988  continue;
2989  }
2990  unsigned LElts = LTy->getNumElements();
2991  // If Filter is longer than LFilter then it cannot be a subset of it.
2992  if (FElts > LElts)
2993  // Move on to the next filter.
2994  continue;
2995  // At this point we know that LFilter has at least one element.
2996  if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
2997  // Filter is a subset of LFilter iff Filter contains only zeros (as we
2998  // already know that Filter is not longer than LFilter).
2999  if (isa<ConstantAggregateZero>(Filter)) {
3000  assert(FElts <= LElts && "Should have handled this case earlier!");
3001  // Discard LFilter.
3002  NewClauses.erase(J);
3003  MakeNewInstruction = true;
3004  }
3005  // Move on to the next filter.
3006  continue;
3007  }
3008  ConstantArray *LArray = cast<ConstantArray>(LFilter);
3009  if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3010  // Since Filter is non-empty and contains only zeros, it is a subset of
3011  // LFilter iff LFilter contains a zero.
3012  assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3013  for (unsigned l = 0; l != LElts; ++l)
3014  if (LArray->getOperand(l)->isNullValue()) {
3015  // LFilter contains a zero - discard it.
3016  NewClauses.erase(J);
3017  MakeNewInstruction = true;
3018  break;
3019  }
3020  // Move on to the next filter.
3021  continue;
3022  }
3023  // At this point we know that both filters are ConstantArrays. Loop over
3024  // operands to see whether every element of Filter is also an element of
3025  // LFilter. Since filters tend to be short this is probably faster than
3026  // using a method that scales nicely.
3027  ConstantArray *FArray = cast<ConstantArray>(Filter);
3028  bool AllFound = true;
3029  for (unsigned f = 0; f != FElts; ++f) {
3030  Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3031  AllFound = false;
3032  for (unsigned l = 0; l != LElts; ++l) {
3033  Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3034  if (LTypeInfo == FTypeInfo) {
3035  AllFound = true;
3036  break;
3037  }
3038  }
3039  if (!AllFound)
3040  break;
3041  }
3042  if (AllFound) {
3043  // Discard LFilter.
3044  NewClauses.erase(J);
3045  MakeNewInstruction = true;
3046  }
3047  // Move on to the next filter.
3048  }
3049  }
3050 
3051  // If we changed any of the clauses, replace the old landingpad instruction
3052  // with a new one.
3053  if (MakeNewInstruction) {
3055  NewClauses.size());
3056  for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
3057  NLI->addClause(NewClauses[i]);
3058  // A landing pad with no clauses must have the cleanup flag set. It is
3059  // theoretically possible, though highly unlikely, that we eliminated all
3060  // clauses. If so, force the cleanup flag to true.
3061  if (NewClauses.empty())
3062  CleanupFlag = true;
3063  NLI->setCleanup(CleanupFlag);
3064  return NLI;
3065  }
3066 
3067  // Even if none of the clauses changed, we may nonetheless have understood
3068  // that the cleanup flag is pointless. Clear it if so.
3069  if (LI.isCleanup() != CleanupFlag) {
3070  assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3071  LI.setCleanup(CleanupFlag);
3072  return &LI;
3073  }
3074 
3075  return nullptr;
3076 }
3077 
3078 /// Try to move the specified instruction from its current block into the
3079 /// beginning of DestBlock, which can only happen if it's safe to move the
3080 /// instruction past all of the instructions between it and the end of its
3081 /// block.
3082 static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
3083  assert(I->hasOneUse() && "Invariants didn't hold!");
3084  BasicBlock *SrcBlock = I->getParent();
3085 
3086  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3087  if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
3088  I->isTerminator())
3089  return false;
3090 
3091  // Do not sink static or dynamic alloca instructions. Static allocas must
3092  // remain in the entry block, and dynamic allocas must not be sunk in between
3093  // a stacksave / stackrestore pair, which would incorrectly shorten its
3094  // lifetime.
3095  if (isa<AllocaInst>(I))
3096  return false;
3097 
3098  // Do not sink into catchswitch blocks.
3099  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3100  return false;
3101 
3102  // Do not sink convergent call instructions.
3103  if (auto *CI = dyn_cast<CallInst>(I)) {
3104  if (CI->isConvergent())
3105  return false;
3106  }
3107  // We can only sink load instructions if there is nothing between the load and
3108  // the end of block that could change the value.
3109  if (I->mayReadFromMemory()) {
3110  for (BasicBlock::iterator Scan = I->getIterator(),
3111  E = I->getParent()->end();
3112  Scan != E; ++Scan)
3113  if (Scan->mayWriteToMemory())
3114  return false;
3115  }
3116  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3117  I->moveBefore(&*InsertPos);
3118  ++NumSunkInst;
3119 
3120  // Also sink all related debug uses from the source basic block. Otherwise we
3121  // get debug use before the def. Attempt to salvage debug uses first, to
3122  // maximise the range variables have location for. If we cannot salvage, then
3123  // mark the location undef: we know it was supposed to receive a new location
3124  // here, but that computation has been sunk.
3126  findDbgUsers(DbgUsers, I);
3127  for (auto *DII : reverse(DbgUsers)) {
3128  if (DII->getParent() == SrcBlock) {
3129  // dbg.value is in the same basic block as the sunk inst, see if we can
3130  // salvage it. Clone a new copy of the instruction: on success we need
3131  // both salvaged and unsalvaged copies.
3133  cast<DbgVariableIntrinsic>(DII->clone())};
3134 
3135  if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
3136  // We are unable to salvage: sink the cloned dbg.value, and mark the
3137  // original as undef, terminating any earlier variable location.
3138  LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
3139  TmpUser[0]->insertBefore(&*InsertPos);
3141  DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
3142  ValueAsMetadata::get(Undef)));
3143  } else {
3144  // We successfully salvaged: place the salvaged dbg.value in the
3145  // original location, and move the unmodified dbg.value to sink with
3146  // the sunk inst.
3147  TmpUser[0]->insertBefore(DII);
3148  DII->moveBefore(&*InsertPos);
3149  }
3150  }
3151  }
3152  return true;
3153 }
3154 
3156  while (!Worklist.isEmpty()) {
3157  Instruction *I = Worklist.RemoveOne();
3158  if (I == nullptr) continue; // skip null values.
3159 
3160  // Check to see if we can DCE the instruction.
3161  if (isInstructionTriviallyDead(I, &TLI)) {
3162  LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
3163  eraseInstFromFunction(*I);
3164  ++NumDeadInst;
3165  MadeIRChange = true;
3166  continue;
3167  }
3168 
3169  if (!DebugCounter::shouldExecute(VisitCounter))
3170  continue;
3171 
3172  // Instruction isn't dead, see if we can constant propagate it.
3173  if (!I->use_empty() &&
3174  (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
3175  if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
3176  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
3177  << '\n');
3178 
3179  // Add operands to the worklist.
3180  replaceInstUsesWith(*I, C);
3181  ++NumConstProp;
3182  if (isInstructionTriviallyDead(I, &TLI))
3183  eraseInstFromFunction(*I);
3184  MadeIRChange = true;
3185  continue;
3186  }
3187  }
3188 
3189  // In general, it is possible for computeKnownBits to determine all bits in
3190  // a value even when the operands are not all constants.
3191  Type *Ty = I->getType();
3192  if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
3193  KnownBits Known = computeKnownBits(I, /*Depth*/0, I);
3194  if (Known.isConstant()) {
3195  Constant *C = ConstantInt::get(Ty, Known.getConstant());
3196  LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C
3197  << " from: " << *I << '\n');
3198 
3199  // Add operands to the worklist.
3200  replaceInstUsesWith(*I, C);
3201  ++NumConstProp;
3202  if (isInstructionTriviallyDead(I, &TLI))
3203  eraseInstFromFunction(*I);
3204  MadeIRChange = true;
3205  continue;
3206  }
3207  }
3208 
3209  // See if we can trivially sink this instruction to a successor basic block.
3210  if (EnableCodeSinking && I->hasOneUse()) {
3211  BasicBlock *BB = I->getParent();
3212  Instruction *UserInst = cast<Instruction>(*I->user_begin());
3213  BasicBlock *UserParent;
3214 
3215  // Get the block the use occurs in.
3216  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
3217  UserParent = PN->getIncomingBlock(*I->use_begin());
3218  else
3219  UserParent = UserInst->getParent();
3220 
3221  if (UserParent != BB) {
3222  bool UserIsSuccessor = false;
3223  // See if the user is one of our successors.
3224  for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
3225  if (*SI == UserParent) {
3226  UserIsSuccessor = true;
3227  break;
3228  }
3229 
3230  // If the user is one of our immediate successors, and if that successor
3231  // only has us as a predecessors (we'd have to split the critical edge
3232  // otherwise), we can keep going.
3233  if (UserIsSuccessor && UserParent->getUniquePredecessor()) {
3234  // Okay, the CFG is simple enough, try to sink this instruction.
3235  if (TryToSinkInstruction(I, UserParent)) {
3236  LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
3237  MadeIRChange = true;
3238  // We'll add uses of the sunk instruction below, but since sinking
3239  // can expose opportunities for it's *operands* add them to the
3240  // worklist
3241  for (Use &U : I->operands())
3242  if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
3243  Worklist.Add(OpI);
3244  }
3245  }
3246  }
3247  }
3248 
3249  // Now that we have an instruction, try combining it to simplify it.
3250  Builder.SetInsertPoint(I);
3251  Builder.SetCurrentDebugLocation(I->getDebugLoc());
3252 
3253 #ifndef NDEBUG
3254  std::string OrigI;
3255 #endif
3256  LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
3257  LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
3258 
3259  if (Instruction *Result = visit(*I)) {
3260  ++NumCombined;
3261  // Should we replace the old instruction with a new one?
3262  if (Result != I) {
3263  LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
3264  << " New = " << *Result << '\n');
3265 
3266  if (I->getDebugLoc())
3267  Result->setDebugLoc(I->getDebugLoc());
3268  // Everything uses the new instruction now.
3269  I->replaceAllUsesWith(Result);
3270 
3271  // Move the name to the new instruction first.
3272  Result->takeName(I);
3273 
3274  // Push the new instruction and any users onto the worklist.
3275  Worklist.AddUsersToWorkList(*Result);
3276  Worklist.Add(Result);
3277 
3278  // Insert the new instruction into the basic block...
3279  BasicBlock *InstParent = I->getParent();
3280  BasicBlock::iterator InsertPos = I->getIterator();
3281 
3282  // If we replace a PHI with something that isn't a PHI, fix up the
3283  // insertion point.
3284  if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos))
3285  InsertPos = InstParent->getFirstInsertionPt();
3286 
3287  InstParent->getInstList().insert(InsertPos, Result);
3288 
3289  eraseInstFromFunction(*I);
3290  } else {
3291  LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
3292  << " New = " << *I << '\n');
3293 
3294  // If the instruction was modified, it's possible that it is now dead.
3295  // if so, remove it.
3296  if (isInstructionTriviallyDead(I, &TLI)) {
3297  eraseInstFromFunction(*I);
3298  } else {
3299  Worklist.AddUsersToWorkList(*I);
3300  Worklist.Add(I);
3301  }
3302  }
3303  MadeIRChange = true;
3304  }
3305  }
3306 
3307  Worklist.Zap();
3308  return MadeIRChange;
3309 }
3310 
3311 /// Walk the function in depth-first order, adding all reachable code to the
3312 /// worklist.
3313 ///
3314 /// This has a couple of tricks to make the code faster and more powerful. In
3315 /// particular, we constant fold and DCE instructions as we go, to avoid adding
3316 /// them to the worklist (this significantly speeds up instcombine on code where
3317 /// many instructions are dead or constant). Additionally, if we find a branch
3318 /// whose condition is a known constant, we only visit the reachable successors.
3321  InstCombineWorklist &ICWorklist,
3322  const TargetLibraryInfo *TLI) {
3323  bool MadeIRChange = false;
3325  Worklist.push_back(BB);
3326 
3327  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
3328  DenseMap<Constant *, Constant *> FoldedConstants;
3329 
3330  do {
3331  BB = Worklist.pop_back_val();
3332 
3333  // We have now visited this block! If we've already been here, ignore it.
3334  if (!Visited.insert(BB).second)
3335  continue;
3336 
3337  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
3338  Instruction *Inst = &*BBI++;
3339 
3340  // DCE instruction if trivially dead.
3341  if (isInstructionTriviallyDead(Inst, TLI)) {
3342  ++NumDeadInst;
3343  LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
3344  if (!salvageDebugInfo(*Inst))
3346  Inst->eraseFromParent();
3347  MadeIRChange = true;
3348  continue;
3349  }
3350 
3351  // ConstantProp instruction if trivially constant.
3352  if (!Inst->use_empty() &&
3353  (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
3354  if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
3355  LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
3356  << '\n');
3357  Inst->replaceAllUsesWith(C);
3358  ++NumConstProp;
3359  if (isInstructionTriviallyDead(Inst, TLI))
3360  Inst->eraseFromParent();
3361  MadeIRChange = true;
3362  continue;
3363  }
3364 
3365  // See if we can constant fold its operands.
3366  for (Use &U : Inst->operands()) {
3367  if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
3368  continue;
3369 
3370  auto *C = cast<Constant>(U);
3371  Constant *&FoldRes = FoldedConstants[C];
3372  if (!FoldRes)
3373  FoldRes = ConstantFoldConstant(C, DL, TLI);
3374  if (!FoldRes)
3375  FoldRes = C;
3376 
3377  if (FoldRes != C) {
3378  LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
3379  << "\n Old = " << *C
3380  << "\n New = " << *FoldRes << '\n');
3381  U = FoldRes;
3382  MadeIRChange = true;
3383  }
3384  }
3385 
3386  // Skip processing debug intrinsics in InstCombine. Processing these call instructions
3387  // consumes non-trivial amount of time and provides no value for the optimization.
3388  if (!isa<DbgInfoIntrinsic>(Inst))
3389  InstrsForInstCombineWorklist.push_back(Inst);
3390  }
3391 
3392  // Recursively visit successors. If this is a branch or switch on a
3393  // constant, only visit the reachable successor.
3394  Instruction *TI = BB->getTerminator();
3395  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
3396  if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
3397  bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
3398  BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
3399  Worklist.push_back(ReachableBB);
3400  continue;
3401  }
3402  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3403  if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
3404  Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
3405  continue;
3406  }
3407  }
3408 
3409  for (BasicBlock *SuccBB : successors(TI))
3410  Worklist.push_back(SuccBB);
3411  } while (!Worklist.empty());
3412 
3413  // Once we've found all of the instructions to add to instcombine's worklist,
3414  // add them in reverse order. This way instcombine will visit from the top
3415  // of the function down. This jives well with the way that it adds all uses
3416  // of instructions to the worklist after doing a transformation, thus avoiding
3417  // some N^2 behavior in pathological cases.
3418  ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
3419 
3420  return MadeIRChange;
3421 }
3422 
3423 /// Populate the IC worklist from a function, and prune any dead basic
3424 /// blocks discovered in the process.
3425 ///
3426 /// This also does basic constant propagation and other forward fixing to make
3427 /// the combiner itself run much faster.
3429  TargetLibraryInfo *TLI,
3430  InstCombineWorklist &ICWorklist) {
3431  bool MadeIRChange = false;
3432 
3433  // Do a depth-first traversal of the function, populate the worklist with
3434  // the reachable instructions. Ignore blocks that are not reachable. Keep
3435  // track of which blocks we visit.
3437  MadeIRChange |=
3438  AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
3439 
3440  // Do a quick scan over the function. If we find any blocks that are
3441  // unreachable, remove any instructions inside of them. This prevents
3442  // the instcombine code from having to deal with some bad special cases.
3443  for (BasicBlock &BB : F) {
3444  if (Visited.count(&BB))
3445  continue;
3446 
3447  unsigned NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
3448  MadeIRChange |= NumDeadInstInBB > 0;
3449  NumDeadInst += NumDeadInstInBB;
3450  }
3451 
3452  return MadeIRChange;
3453 }
3454 
3456  Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
3459  ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
3460  LoopInfo *LI = nullptr) {
3461  auto &DL = F.getParent()->getDataLayout();
3462  ExpensiveCombines |= EnableExpensiveCombines;
3463 
3464  /// Builder - This is an IRBuilder that automatically inserts new
3465  /// instructions into the worklist when they are created.
3467  F.getContext(), TargetFolder(DL),
3468  IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
3469  Worklist.Add(I);
3470  if (match(I, m_Intrinsic<Intrinsic::assume>()))
3471  AC.registerAssumption(cast<CallInst>(I));
3472  }));
3473 
3474  // Lower dbg.declare intrinsics otherwise their value may be clobbered
3475  // by instcombiner.
3476  bool MadeIRChange = false;
3478  MadeIRChange = LowerDbgDeclare(F);
3479 
3480  // Iterate while there is work to do.
3481  int Iteration = 0;
3482  while (true) {
3483  ++Iteration;
3484  LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
3485  << F.getName() << "\n");
3486 
3487  MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
3488 
3489  InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
3490  AC, TLI, DT, ORE, BFI, PSI, DL, LI);
3492 
3493  if (!IC.run())
3494  break;
3495  }
3496 
3497  return MadeIRChange || Iteration > 1;
3498 }
3499 
3502  auto &AC = AM.getResult<AssumptionAnalysis>(F);
3503  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
3504  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
3506 
3507  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
3508 
3509  auto *AA = &AM.getResult<AAManager>(F);
3510  const ModuleAnalysisManager &MAM =
3512  ProfileSummaryInfo *PSI =
3513  MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3514  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
3515  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
3516 
3517  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3518  BFI, PSI, ExpensiveCombines, LI))
3519  // No changes, all analyses are preserved.
3520  return PreservedAnalyses::all();
3521 
3522  // Mark all the analyses that instcombine updates as preserved.
3523  PreservedAnalyses PA;
3524  PA.preserveSet<CFGAnalyses>();
3525  PA.preserve<AAManager>();
3526  PA.preserve<BasicAA>();
3527  PA.preserve<GlobalsAA>();
3528  return PA;
3529 }
3530 
3532  AU.setPreservesCFG();
3544 }
3545 
3547  if (skipFunction(F))
3548  return false;
3549 
3550  // Required analyses.
3551  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3552  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3553  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
3554  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3555  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
3556 
3557  // Optional analyses.
3558  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
3559  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
3560  ProfileSummaryInfo *PSI =
3561  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3563  (PSI && PSI->hasProfileSummary()) ?
3564  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
3565  nullptr;
3566 
3567  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3568  BFI, PSI, ExpensiveCombines, LI);
3569 }
3570 
3572 
3574  "Combine redundant instructions", false, false)
3584  "Combine redundant instructions", false, false)
3585 
3586 // Initialization Routines
3589 }
3590 
3593 }
3594 
3596  return new InstructionCombiningPass(ExpensiveCombines);
3597 }
3598 
3601 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Type * getVectorElementType() const
Definition: Type.h:370
const NoneType None
Definition: None.h:23
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:761
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:28
uint64_t CallInst * C
Return a value (possibly void), from a function.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, InstCombineWorklist &ICWorklist)
Populate the IC worklist from a function, and prune any dead basic blocks discovered in the process...
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:699
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:86
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1333
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:653
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:375
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
void LLVMInitializeInstCombine(LLVMPassRegistryRef R)
static bool willNotOverflow(WithOverflowInst *WO, LazyValueInfo *LVI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:379
void swapSuccessors()
Swap the successors of this branch instruction.
static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo *TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
This class represents a op.with.overflow intrinsic.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
static Value * foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, InstCombiner::BuilderTy &Builder)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock *> *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction &#39;To&#39; is reachable from &#39;From&#39;, without passing through any blocks in Ex...
Definition: CFG.cpp:211
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
void push_back(const T &Elt)
Definition: SmallVector.h:211
Analysis providing profile information.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:899
br_match m_UnconditionalBr(BasicBlock *&Succ)
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic *> &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: Local.cpp:1540
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
Value * getCondition() const
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
static bool isCanonicalPredicate(CmpInst::Predicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:786
A cache of @llvm.assume calls within a function.
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1624
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1836
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
bool isTerminator() const
Definition: Instruction.h:128
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:131
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in .dbg intrinsics with undef.
Definition: Local.cpp:483
void Add(Instruction *I)
Add - Add the specified instruction to the worklist if it isn&#39;t already in it.
BasicBlock * getSuccessor(unsigned i) const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
An instruction for reading from memory.
Definition: Instructions.h:167
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1946
Hexagon Common GEP
Value * getCondition() const
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2239
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
iv Induction Variable Users
Definition: IVUsers.cpp:51
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:369
idx_iterator idx_end() const
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:701
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:175
op_iterator op_begin()
Definition: User.h:229
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:992
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
Definition: DataLayout.cpp:83
static LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
bool swapOperands()
Exchange the two operands to this instruction.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1218
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
AnalysisUsage & addRequired()
ArrayRef< unsigned > getIndices() const
Value * SimplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
This class represents a conversion between pointers from one address space to another.
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
This class represents the LLVM &#39;select&#39; instruction.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:808
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Attribute unwrap(LLVMAttributeRef Attr)
Definition: Attributes.h:194
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:416
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1285
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:419
Class to represent struct types.
Definition: DerivedTypes.h:232
Value * SimplifyGEPInst(Type *SrcTy, ArrayRef< Value *> Ops, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
The core instruction combiner logic.
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM)
See llvm::createInstructionCombiningPass function.
static cl::opt< bool > EnableExpensiveCombines("expensive-combines", cl::desc("Enable expensive instruction combines"))
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:952
This file provides an implementation of debug counters.
static bool shorter_filter(const Value *LHS, const Value *RHS)
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
Type * getSourceElementType() const
Definition: Instructions.h:970
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
This file implements a class to represent arbitrary precision integral constant values and operations...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:641
Instruction * visitReturnInst(ReturnInst &RI)
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
Instruction * visitBranchInst(BranchInst &BI)
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
Definition: Instruction.h:180
unsigned getNumIndices() const
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:669
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI, Instruction *AI)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:123
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Class to represent array types.
Definition: DerivedTypes.h:400
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
int32_t exactLogBase2() const
Definition: APInt.h:1787
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:998
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:81
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:31
An instruction for storing to memory.
Definition: Instructions.h:320
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
FunctionPass * createInstructionCombiningPass(bool ExpensiveCombines=true)
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1022
Value * getOperand(unsigned i) const
Definition: User.h:169
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS)
This function predicates factorization using distributive laws.
Class to represent pointers.
Definition: DerivedTypes.h:498
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
unsigned getAddressSpace() const
Returns the address space of this instruction&#39;s pointer type.
Definition: Instructions.h:982
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:105
bool hasProfileSummary()
Returns true if profile summary is available.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:61
bool hasAllZeroIndices() const
Return true if all of the indices of this GEP are zeros.
Definition: Operator.h:507
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
The landingpad instruction holds all of the information necessary to generate correct exception handl...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:175
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
Definition: PatternMatch.h:726
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
bool run()
Run the combiner over the entire worklist until it is empty.
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return &#39;true&#39; if the given typeinfo will match anything.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides the primary interface to the instcombine pass.
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1887
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
size_t size() const
Definition: BasicBlock.h:278
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:501
void AddInitialGroup(ArrayRef< Instruction *> List)
AddInitialGroup - Add the specified batch of stuff in reverse order.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Value * SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
brc_match< Cond_t > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:442
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:774
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:49
This instruction compares its operands according to the predicate given to the constructor.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:709
bool isBinaryOp() const
Definition: Instruction.h:130
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl...
Definition: Operator.h:66
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4212
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2316
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
op_range operands()
Definition: User.h:237
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static CastInst * CreatePointerBitCastOrAddrSpaceCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast or an AddrSpaceCast cast instruction.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
self_iterator getIterator()
Definition: ilist_node.h:81
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:73
Class to represent integer types.
Definition: DerivedTypes.h:39
Constant Vector Declarations.
Definition: Constants.h:499
The legacy pass manager&#39;s instcombine pass.
Definition: InstCombine.h:42
static Constant * getNot(Constant *C)
Definition: Constants.cpp:2222
void setSourceElementType(Type *Ty)
Definition: Instructions.h:972
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
const Value * getCondition() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:196
Type * getPointerOperandType() const
Method to return the pointer operand as a PointerType.
InstCombineWorklist - This is the worklist management logic for InstCombine.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
const Constant * stripPointerCasts() const
Definition: Constant.h:177
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
iterator erase(const_iterator CI)
Definition: SmallVector.h:438
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool LowerDbgDeclare(Function &F)
Lowers llvm.dbg.declare intrinsics into appropriate set of llvm.dbg.value intrinsics.
Definition: Local.cpp:1405
bool isVolatile() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isFilter(unsigned Idx) const
Return &#39;true&#39; if the clause and index Idx is a filter clause.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:311
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
A function analysis which provides an AssumptionCache.
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
TinyPtrVector< DbgVariableIntrinsic * > FindDbgAddrUses(Value *V)
Finds all intrinsics declaring local variables as living in the memory that &#39;V&#39; points to...
Definition: Local.cpp:1506
BlockVerifier::State From
static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool ExpensiveCombines=true, LoopInfo *LI=nullptr)
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:348
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:245
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:30
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
Provides information about what library functions are available for the current target.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
uint64_t getSizeInBytes() const
Definition: DataLayout.h:562
Instruction * visitFree(CallInst &FI)
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1646
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than it&#39;s terminator and any present EH pad instruct...
Definition: Local.cpp:1915
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:488
static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, SmallPtrSetImpl< BasicBlock *> &Visited, InstCombineWorklist &ICWorklist, const TargetLibraryInfo *TLI)
Walk the function in depth-first order, adding all reachable code to the worklist.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:789
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a &#39;Neg&#39; as &#39;sub 0, V&#39;.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
struct LLVMOpaquePassManager * LLVMPassManagerRef
Definition: Types.h:128
static Value * foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder)
Class to represent vector types.
Definition: DerivedTypes.h:424
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:547
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
ConstantArray - Constant Array Declarations.
Definition: Constants.h:413
Class for arbitrary precision integers.
Definition: APInt.h:69
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1222
bool isCleanup() const
Return &#39;true&#39; if this landingpad instruction is a cleanup.
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
CastClass_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
iterator_range< user_iterator > users()
Definition: Value.h:399
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property...
Definition: Operator.h:95
Instruction * visitSwitchInst(SwitchInst &SI)
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock...
Instruction * visitExtractValueInst(ExtractValueInst &EV)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1539
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition: KnownBits.h:161
const Value * getFalseValue() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Instruction * visitLandingPadInst(LandingPadInst &LI)
use_iterator use_begin()
Definition: Value.h:338
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2209
Analysis pass providing a never-invalidated alias analysis result.
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Provides an &#39;InsertHelper&#39; that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:72
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:353
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function&#39;s cache.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
void setCondition(Value *V)
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const
Accumulate the constant address offset of this GEP if possible.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:189
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool isCatch(unsigned Idx) const
Return &#39;true&#39; if the clause and index Idx is a catch clause.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:600
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:580
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
idx_iterator idx_begin() const
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2299
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited")
bool isUnconditional() const
void initializeInstructionCombiningPassPass(PassRegistry &)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
bool isMinValue() const
Determine if this is the smallest unsigned value.
Definition: APInt.h:436
void setCondition(Value *V)
Analysis pass providing the TargetLibraryInfo.
Multiway switch.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1804
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:933
INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_END(InstructionCombiningPass
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
const BasicBlock & front() const
Definition: Function.h:668
SmallVector< int, 16 > getShuffleMask() const
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void stable_sort(R &&Range)
Definition: STLExtras.h:1309
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1874
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:353
LLVM Value Representation.
Definition: Value.h:72
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1306
This file provides internal interfaces used to implement the InstCombine.
void clearSubclassOptionalData()
Clear the optional flags contained in this value.
Definition: Value.h:475
succ_range successors(Instruction *I)
Definition: CFG.h:259
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
OptimizationRemarkEmitter legacy analysis pass.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, StoreInst *SI, DIBuilder &Builder)
===---------------------------------------------------------------——===// Dbg Intrinsic utilities ...
Definition: Local.cpp:1306
Invoke instruction.
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:156
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:583
Type * getElementType() const
Definition: DerivedTypes.h:391
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned greater than
Definition: InstrTypes.h:732
bool isIntDivRem() const
Definition: Instruction.h:131
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
inst_range instructions(Function *F)
Definition: InstIterator.h:133
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
A container for analyses that lazily runs them and caches their results.
Type * getArrayElementType() const
Definition: Type.h:364
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:412
Instruction * visitAllocSite(Instruction &FI)
The optimization diagnostic interface.
Value * getRawDest() const
bool use_empty() const
Definition: Value.h:322
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
Type * getElementType() const
Definition: DerivedTypes.h:517
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp)
Return whether "(X LOp Y) ROp Z" is always equal to "(X ROp Z) LOp (Y ROp Z)".
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a &#39;Not&#39; as &#39;xor V, -1&#39; or &#39;xor -1, V&#39;.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
This instruction inserts a struct field of array element value into an aggregate value.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Legacy wrapper pass to provide the BasicAAResult object.
gep_type_iterator gep_type_begin(const User *GEP)
bool salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableIntrinsic *> Insns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition: Local.cpp:1633
static Constant * get(unsigned Opcode, Constant *C1, unsigned Flags=0, Type *OnlyIfReducedTy=nullptr)
get - Return a unary operator constant expression, folding if possible.
Definition: Constants.cpp:1815
user_iterator user_end()
Definition: Value.h:383