LLVM  14.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1 //===- InstCombineCasts.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for cast operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
17 #include "llvm/IR/DIBuilder.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/PatternMatch.h"
20 #include "llvm/Support/KnownBits.h"
22 #include <numeric>
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "instcombine"
27 
28 /// Analyze 'Val', seeing if it is a simple linear expression.
29 /// If so, decompose it, returning some value X, such that Val is
30 /// X*Scale+Offset.
31 ///
32 static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
33  uint64_t &Offset) {
34  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
35  Offset = CI->getZExtValue();
36  Scale = 0;
37  return ConstantInt::get(Val->getType(), 0);
38  }
39 
40  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
41  // Cannot look past anything that might overflow.
42  OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
43  if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
44  Scale = 1;
45  Offset = 0;
46  return Val;
47  }
48 
49  if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
50  if (I->getOpcode() == Instruction::Shl) {
51  // This is a value scaled by '1 << the shift amt'.
52  Scale = UINT64_C(1) << RHS->getZExtValue();
53  Offset = 0;
54  return I->getOperand(0);
55  }
56 
57  if (I->getOpcode() == Instruction::Mul) {
58  // This value is scaled by 'RHS'.
59  Scale = RHS->getZExtValue();
60  Offset = 0;
61  return I->getOperand(0);
62  }
63 
64  if (I->getOpcode() == Instruction::Add) {
65  // We have X+C. Check to see if we really have (X*C2)+C1,
66  // where C1 is divisible by C2.
67  unsigned SubScale;
68  Value *SubVal =
69  decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
70  Offset += RHS->getZExtValue();
71  Scale = SubScale;
72  return SubVal;
73  }
74  }
75  }
76 
77  // Otherwise, we can't look past this.
78  Scale = 1;
79  Offset = 0;
80  return Val;
81 }
82 
83 /// If we find a cast of an allocation instruction, try to eliminate the cast by
84 /// moving the type information into the alloc.
86  AllocaInst &AI) {
87  PointerType *PTy = cast<PointerType>(CI.getType());
88 
90  Builder.SetInsertPoint(&AI);
91 
92  // Get the type really allocated and the type casted to.
93  Type *AllocElTy = AI.getAllocatedType();
94  Type *CastElTy = PTy->getElementType();
95  if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
96 
97  // This optimisation does not work for cases where the cast type
98  // is scalable and the allocated type is not. This because we need to
99  // know how many times the casted type fits into the allocated type.
100  // For the opposite case where the allocated type is scalable and the
101  // cast type is not this leads to poor code quality due to the
102  // introduction of 'vscale' into the calculations. It seems better to
103  // bail out for this case too until we've done a proper cost-benefit
104  // analysis.
105  bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
106  bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
107  if (AllocIsScalable != CastIsScalable) return nullptr;
108 
109  Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
110  Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
111  if (CastElTyAlign < AllocElTyAlign) return nullptr;
112 
113  // If the allocation has multiple uses, only promote it if we are strictly
114  // increasing the alignment of the resultant allocation. If we keep it the
115  // same, we open the door to infinite loops of various kinds.
116  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
117 
118  // The alloc and cast types should be either both fixed or both scalable.
119  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
120  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
121  if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
122 
123  // If the allocation has multiple uses, only promote it if we're not
124  // shrinking the amount of memory being allocated.
125  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
126  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
127  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
128 
129  // See if we can satisfy the modulus by pulling a scale out of the array
130  // size argument.
131  unsigned ArraySizeScale;
132  uint64_t ArrayOffset;
133  Value *NumElements = // See if the array size is a decomposable linear expr.
134  decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
135 
136  // If we can now satisfy the modulus, by using a non-1 scale, we really can
137  // do the xform.
138  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
139  (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
140 
141  // We don't currently support arrays of scalable types.
142  assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
143 
144  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
145  Value *Amt = nullptr;
146  if (Scale == 1) {
147  Amt = NumElements;
148  } else {
149  Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
150  // Insert before the alloca, not before the cast.
151  Amt = Builder.CreateMul(Amt, NumElements);
152  }
153 
154  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
156  Offset, true);
157  Amt = Builder.CreateAdd(Amt, Off);
158  }
159 
160  AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
161  New->setAlignment(AI.getAlign());
162  New->takeName(&AI);
163  New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
164 
165  // If the allocation has multiple real uses, insert a cast and change all
166  // things that used it to use the new cast. This will also hack on CI, but it
167  // will die soon.
168  if (!AI.hasOneUse()) {
169  // New is the allocation instruction, pointer typed. AI is the original
170  // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
171  Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
172  replaceInstUsesWith(AI, NewCast);
173  eraseInstFromFunction(AI);
174  }
175  return replaceInstUsesWith(CI, New);
176 }
177 
178 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179 /// true for, actually insert the code to evaluate the expression.
181  bool isSigned) {
182  if (Constant *C = dyn_cast<Constant>(V)) {
183  C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
184  // If we got a constantexpr back, try to simplify it with DL info.
185  return ConstantFoldConstant(C, DL, &TLI);
186  }
187 
188  // Otherwise, it must be an instruction.
189  Instruction *I = cast<Instruction>(V);
190  Instruction *Res = nullptr;
191  unsigned Opc = I->getOpcode();
192  switch (Opc) {
193  case Instruction::Add:
194  case Instruction::Sub:
195  case Instruction::Mul:
196  case Instruction::And:
197  case Instruction::Or:
198  case Instruction::Xor:
199  case Instruction::AShr:
200  case Instruction::LShr:
201  case Instruction::Shl:
202  case Instruction::UDiv:
203  case Instruction::URem: {
204  Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
205  Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
207  break;
208  }
209  case Instruction::Trunc:
210  case Instruction::ZExt:
211  case Instruction::SExt:
212  // If the source type of the cast is the type we're trying for then we can
213  // just return the source. There's no need to insert it because it is not
214  // new.
215  if (I->getOperand(0)->getType() == Ty)
216  return I->getOperand(0);
217 
218  // Otherwise, must be the same type of cast, so just reinsert a new one.
219  // This also handles the case of zext(trunc(x)) -> zext(x).
220  Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
221  Opc == Instruction::SExt);
222  break;
223  case Instruction::Select: {
224  Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
225  Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
226  Res = SelectInst::Create(I->getOperand(0), True, False);
227  break;
228  }
229  case Instruction::PHI: {
230  PHINode *OPN = cast<PHINode>(I);
231  PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
232  for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
233  Value *V =
234  EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
235  NPN->addIncoming(V, OPN->getIncomingBlock(i));
236  }
237  Res = NPN;
238  break;
239  }
240  default:
241  // TODO: Can handle more cases here.
242  llvm_unreachable("Unreachable!");
243  }
244 
245  Res->takeName(I);
246  return InsertNewInstWith(Res, *I);
247 }
248 
250 InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
251  const CastInst *CI2) {
252  Type *SrcTy = CI1->getSrcTy();
253  Type *MidTy = CI1->getDestTy();
254  Type *DstTy = CI2->getDestTy();
255 
256  Instruction::CastOps firstOp = CI1->getOpcode();
257  Instruction::CastOps secondOp = CI2->getOpcode();
258  Type *SrcIntPtrTy =
259  SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
260  Type *MidIntPtrTy =
261  MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
262  Type *DstIntPtrTy =
263  DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
264  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
265  DstTy, SrcIntPtrTy, MidIntPtrTy,
266  DstIntPtrTy);
267 
268  // We don't want to form an inttoptr or ptrtoint that converts to an integer
269  // type that differs from the pointer size.
270  if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
271  (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
272  Res = 0;
273 
274  return Instruction::CastOps(Res);
275 }
276 
277 /// Implement the transforms common to all CastInst visitors.
279  Value *Src = CI.getOperand(0);
280  Type *Ty = CI.getType();
281 
282  // Try to eliminate a cast of a cast.
283  if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
284  if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
285  // The first cast (CSrc) is eliminable so we need to fix up or replace
286  // the second cast (CI). CSrc will then have a good chance of being dead.
287  auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
288  // Point debug users of the dying cast to the new one.
289  if (CSrc->hasOneUse())
290  replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
291  return Res;
292  }
293  }
294 
295  if (auto *Sel = dyn_cast<SelectInst>(Src)) {
296  // We are casting a select. Try to fold the cast into the select if the
297  // select does not have a compare instruction with matching operand types
298  // or the select is likely better done in a narrow type.
299  // Creating a select with operands that are different sizes than its
300  // condition may inhibit other folds and lead to worse codegen.
301  auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
302  if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
303  (CI.getOpcode() == Instruction::Trunc &&
304  shouldChangeType(CI.getSrcTy(), CI.getType()))) {
305  if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
306  replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
307  return NV;
308  }
309  }
310  }
311 
312  // If we are casting a PHI, then fold the cast into the PHI.
313  if (auto *PN = dyn_cast<PHINode>(Src)) {
314  // Don't do this if it would create a PHI node with an illegal type from a
315  // legal type.
316  if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
317  shouldChangeType(CI.getSrcTy(), CI.getType()))
318  if (Instruction *NV = foldOpIntoPhi(CI, PN))
319  return NV;
320  }
321 
322  // Canonicalize a unary shuffle after the cast if neither operation changes
323  // the size or element size of the input vector.
324  // TODO: We could allow size-changing ops if that doesn't harm codegen.
325  // cast (shuffle X, Mask) --> shuffle (cast X), Mask
326  Value *X;
328  if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
329  // TODO: Allow scalable vectors?
330  auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
331  auto *DestTy = dyn_cast<FixedVectorType>(Ty);
332  if (SrcTy && DestTy &&
333  SrcTy->getNumElements() == DestTy->getNumElements() &&
334  SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
335  Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
336  return new ShuffleVectorInst(CastX, Mask);
337  }
338  }
339 
340  return nullptr;
341 }
342 
343 /// Constants and extensions/truncates from the destination type are always
344 /// free to be evaluated in that type. This is a helper for canEvaluate*.
345 static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
346  if (isa<Constant>(V))
347  return true;
348  Value *X;
349  if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
350  X->getType() == Ty)
351  return true;
352 
353  return false;
354 }
355 
356 /// Filter out values that we can not evaluate in the destination type for free.
357 /// This is a helper for canEvaluate*.
358 static bool canNotEvaluateInType(Value *V, Type *Ty) {
359  assert(!isa<Constant>(V) && "Constant should already be handled.");
360  if (!isa<Instruction>(V))
361  return true;
362  // We don't extend or shrink something that has multiple uses -- doing so
363  // would require duplicating the instruction which isn't profitable.
364  if (!V->hasOneUse())
365  return true;
366 
367  return false;
368 }
369 
370 /// Return true if we can evaluate the specified expression tree as type Ty
371 /// instead of its larger type, and arrive with the same value.
372 /// This is used by code that tries to eliminate truncates.
373 ///
374 /// Ty will always be a type smaller than V. We should return true if trunc(V)
375 /// can be computed by computing V in the smaller type. If V is an instruction,
376 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
377 /// makes sense if x and y can be efficiently truncated.
378 ///
379 /// This function works on both vectors and scalars.
380 ///
382  Instruction *CxtI) {
383  if (canAlwaysEvaluateInType(V, Ty))
384  return true;
385  if (canNotEvaluateInType(V, Ty))
386  return false;
387 
388  auto *I = cast<Instruction>(V);
389  Type *OrigTy = V->getType();
390  switch (I->getOpcode()) {
391  case Instruction::Add:
392  case Instruction::Sub:
393  case Instruction::Mul:
394  case Instruction::And:
395  case Instruction::Or:
396  case Instruction::Xor:
397  // These operators can all arbitrarily be extended or truncated.
398  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
399  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
400 
401  case Instruction::UDiv:
402  case Instruction::URem: {
403  // UDiv and URem can be truncated if all the truncated bits are zero.
404  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
406  assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
407  APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
408  if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
409  IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
410  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
411  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
412  }
413  break;
414  }
415  case Instruction::Shl: {
416  // If we are truncating the result of this SHL, and if it's a shift of an
417  // inrange amount, we can always perform a SHL in a smaller type.
419  KnownBits AmtKnownBits =
420  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
421  if (AmtKnownBits.getMaxValue().ult(BitWidth))
422  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
423  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
424  break;
425  }
426  case Instruction::LShr: {
427  // If this is a truncate of a logical shr, we can truncate it to a smaller
428  // lshr iff we know that the bits we would otherwise be shifting in are
429  // already zeros.
430  // TODO: It is enough to check that the bits we would be shifting in are
431  // zero - use AmtKnownBits.getMaxValue().
432  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
434  KnownBits AmtKnownBits =
435  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
436  APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
437  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
438  IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
439  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
440  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
441  }
442  break;
443  }
444  case Instruction::AShr: {
445  // If this is a truncate of an arithmetic shr, we can truncate it to a
446  // smaller ashr iff we know that all the bits from the sign bit of the
447  // original type and the sign bit of the truncate type are similar.
448  // TODO: It is enough to check that the bits we would be shifting in are
449  // similar to sign bit of the truncate type.
450  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
452  KnownBits AmtKnownBits =
453  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
454  unsigned ShiftedBits = OrigBitWidth - BitWidth;
455  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
456  ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
457  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
458  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
459  break;
460  }
461  case Instruction::Trunc:
462  // trunc(trunc(x)) -> trunc(x)
463  return true;
464  case Instruction::ZExt:
465  case Instruction::SExt:
466  // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
467  // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
468  return true;
469  case Instruction::Select: {
470  SelectInst *SI = cast<SelectInst>(I);
471  return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
472  canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
473  }
474  case Instruction::PHI: {
475  // We can change a phi if we can change all operands. Note that we never
476  // get into trouble with cyclic PHIs here because we only consider
477  // instructions with a single use.
478  PHINode *PN = cast<PHINode>(I);
479  for (Value *IncValue : PN->incoming_values())
480  if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
481  return false;
482  return true;
483  }
484  default:
485  // TODO: Can handle more cases here.
486  break;
487  }
488 
489  return false;
490 }
491 
492 /// Given a vector that is bitcast to an integer, optionally logically
493 /// right-shifted, and truncated, convert it to an extractelement.
494 /// Example (big endian):
495 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
496 /// --->
497 /// extractelement <4 x i32> %X, 1
499  InstCombinerImpl &IC) {
500  Value *TruncOp = Trunc.getOperand(0);
501  Type *DestType = Trunc.getType();
502  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
503  return nullptr;
504 
505  Value *VecInput = nullptr;
506  ConstantInt *ShiftVal = nullptr;
507  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
508  m_LShr(m_BitCast(m_Value(VecInput)),
509  m_ConstantInt(ShiftVal)))) ||
510  !isa<VectorType>(VecInput->getType()))
511  return nullptr;
512 
513  VectorType *VecType = cast<VectorType>(VecInput->getType());
514  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
515  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
516  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
517 
518  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
519  return nullptr;
520 
521  // If the element type of the vector doesn't match the result type,
522  // bitcast it to a vector type that we can extract from.
523  unsigned NumVecElts = VecWidth / DestWidth;
524  if (VecType->getElementType() != DestType) {
525  VecType = FixedVectorType::get(DestType, NumVecElts);
526  VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
527  }
528 
529  unsigned Elt = ShiftAmount / DestWidth;
530  if (IC.getDataLayout().isBigEndian())
531  Elt = NumVecElts - 1 - Elt;
532 
533  return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
534 }
535 
536 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
537 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
538 Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
539  assert((isa<VectorType>(Trunc.getSrcTy()) ||
540  shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
541  "Don't narrow to an illegal scalar type");
542 
543  // Bail out on strange types. It is possible to handle some of these patterns
544  // even with non-power-of-2 sizes, but it is not a likely scenario.
545  Type *DestTy = Trunc.getType();
546  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
547  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
548  if (!isPowerOf2_32(NarrowWidth))
549  return nullptr;
550 
551  // First, find an or'd pair of opposite shifts:
552  // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
553  BinaryOperator *Or0, *Or1;
554  if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
555  return nullptr;
556 
557  Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
558  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
559  !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
560  Or0->getOpcode() == Or1->getOpcode())
561  return nullptr;
562 
563  // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
564  if (Or0->getOpcode() == BinaryOperator::LShr) {
565  std::swap(Or0, Or1);
566  std::swap(ShVal0, ShVal1);
567  std::swap(ShAmt0, ShAmt1);
568  }
569  assert(Or0->getOpcode() == BinaryOperator::Shl &&
570  Or1->getOpcode() == BinaryOperator::LShr &&
571  "Illegal or(shift,shift) pair");
572 
573  // Match the shift amount operands for a funnel/rotate pattern. This always
574  // matches a subtraction on the R operand.
575  auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
576  // The shift amounts may add up to the narrow bit width:
577  // (shl ShVal0, L) | (lshr ShVal1, Width - L)
578  // If this is a funnel shift (different operands are shifted), then the
579  // shift amount can not over-shift (create poison) in the narrow type.
580  unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
581  APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
582  if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
584  return L;
585 
586  // The following patterns currently only work for rotation patterns.
587  // TODO: Add more general funnel-shift compatible patterns.
588  if (ShVal0 != ShVal1)
589  return nullptr;
590 
591  // The shift amount may be masked with negation:
592  // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
593  Value *X;
594  unsigned Mask = Width - 1;
595  if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
597  return X;
598 
599  // Same as above, but the shift amount may be extended after masking:
600  if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
602  return X;
603 
604  return nullptr;
605  };
606 
607  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
608  bool IsFshl = true; // Sub on LSHR.
609  if (!ShAmt) {
610  ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
611  IsFshl = false; // Sub on SHL.
612  }
613  if (!ShAmt)
614  return nullptr;
615 
616  // The right-shifted value must have high zeros in the wide type (for example
617  // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
618  // truncated, so those do not matter.
619  APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
620  if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
621  return nullptr;
622 
623  // We have an unnecessarily wide rotate!
624  // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
625  // Narrow the inputs and convert to funnel shift intrinsic:
626  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
627  Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
628  Value *X, *Y;
629  X = Y = Builder.CreateTrunc(ShVal0, DestTy);
630  if (ShVal0 != ShVal1)
631  Y = Builder.CreateTrunc(ShVal1, DestTy);
632  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
633  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
634  return CallInst::Create(F, {X, Y, NarrowShAmt});
635 }
636 
637 /// Try to narrow the width of math or bitwise logic instructions by pulling a
638 /// truncate ahead of binary operators.
639 /// TODO: Transforms for truncated shifts should be moved into here.
640 Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
641  Type *SrcTy = Trunc.getSrcTy();
642  Type *DestTy = Trunc.getType();
643  if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
644  return nullptr;
645 
646  BinaryOperator *BinOp;
647  if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
648  return nullptr;
649 
650  Value *BinOp0 = BinOp->getOperand(0);
651  Value *BinOp1 = BinOp->getOperand(1);
652  switch (BinOp->getOpcode()) {
653  case Instruction::And:
654  case Instruction::Or:
655  case Instruction::Xor:
656  case Instruction::Add:
657  case Instruction::Sub:
658  case Instruction::Mul: {
659  Constant *C;
660  if (match(BinOp0, m_Constant(C))) {
661  // trunc (binop C, X) --> binop (trunc C', X)
662  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
663  Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
664  return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
665  }
666  if (match(BinOp1, m_Constant(C))) {
667  // trunc (binop X, C) --> binop (trunc X, C')
668  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
669  Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
670  return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
671  }
672  Value *X;
673  if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
674  // trunc (binop (ext X), Y) --> binop X, (trunc Y)
675  Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
676  return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
677  }
678  if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
679  // trunc (binop Y, (ext X)) --> binop (trunc Y), X
680  Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
681  return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
682  }
683  break;
684  }
685 
686  default: break;
687  }
688 
689  if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
690  return NarrowOr;
691 
692  return nullptr;
693 }
694 
695 /// Try to narrow the width of a splat shuffle. This could be generalized to any
696 /// shuffle with a constant operand, but we limit the transform to avoid
697 /// creating a shuffle type that targets may not be able to lower effectively.
700  auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
701  if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
702  is_splat(Shuf->getShuffleMask()) &&
703  Shuf->getType() == Shuf->getOperand(0)->getType()) {
704  // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
705  // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
706  Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
707  return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
708  }
709 
710  return nullptr;
711 }
712 
713 /// Try to narrow the width of an insert element. This could be generalized for
714 /// any vector constant, but we limit the transform to insertion into undef to
715 /// avoid potential backend problems from unsupported insertion widths. This
716 /// could also be extended to handle the case of inserting a scalar constant
717 /// into a vector variable.
720  Instruction::CastOps Opcode = Trunc.getOpcode();
721  assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
722  "Unexpected instruction for shrinking");
723 
724  auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
725  if (!InsElt || !InsElt->hasOneUse())
726  return nullptr;
727 
728  Type *DestTy = Trunc.getType();
729  Type *DestScalarTy = DestTy->getScalarType();
730  Value *VecOp = InsElt->getOperand(0);
731  Value *ScalarOp = InsElt->getOperand(1);
732  Value *Index = InsElt->getOperand(2);
733 
734  if (match(VecOp, m_Undef())) {
735  // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
736  // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
737  UndefValue *NarrowUndef = UndefValue::get(DestTy);
738  Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
739  return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
740  }
741 
742  return nullptr;
743 }
744 
746  if (Instruction *Result = commonCastTransforms(Trunc))
747  return Result;
748 
749  Value *Src = Trunc.getOperand(0);
750  Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
751  unsigned DestWidth = DestTy->getScalarSizeInBits();
752  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
753 
754  // Attempt to truncate the entire input expression tree to the destination
755  // type. Only do this if the dest type is a simple type, don't convert the
756  // expression tree to something weird like i93 unless the source is also
757  // strange.
758  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
759  canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
760 
761  // If this cast is a truncate, evaluting in a different type always
762  // eliminates the cast, so it is always a win.
763  LLVM_DEBUG(
764  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
765  " to avoid cast: "
766  << Trunc << '\n');
767  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
768  assert(Res->getType() == DestTy);
769  return replaceInstUsesWith(Trunc, Res);
770  }
771 
772  // For integer types, check if we can shorten the entire input expression to
773  // DestWidth * 2, which won't allow removing the truncate, but reducing the
774  // width may enable further optimizations, e.g. allowing for larger
775  // vectorization factors.
776  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
777  if (DestWidth * 2 < SrcWidth) {
778  auto *NewDestTy = DestITy->getExtendedType();
779  if (shouldChangeType(SrcTy, NewDestTy) &&
780  canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
781  LLVM_DEBUG(
782  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
783  " to reduce the width of operand of"
784  << Trunc << '\n');
785  Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
786  return new TruncInst(Res, DestTy);
787  }
788  }
789  }
790 
791  // Test if the trunc is the user of a select which is part of a
792  // minimum or maximum operation. If so, don't do any more simplification.
793  // Even simplifying demanded bits can break the canonical form of a
794  // min/max.
795  Value *LHS, *RHS;
796  if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
797  if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
798  return nullptr;
799 
800  // See if we can simplify any instructions used by the input whose sole
801  // purpose is to compute bits we don't care about.
802  if (SimplifyDemandedInstructionBits(Trunc))
803  return &Trunc;
804 
805  if (DestWidth == 1) {
806  Value *Zero = Constant::getNullValue(SrcTy);
807  if (DestTy->isIntegerTy()) {
808  // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
809  // TODO: We canonicalize to more instructions here because we are probably
810  // lacking equivalent analysis for trunc relative to icmp. There may also
811  // be codegen concerns. If those trunc limitations were removed, we could
812  // remove this transform.
813  Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
814  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
815  }
816 
817  // For vectors, we do not canonicalize all truncs to icmp, so optimize
818  // patterns that would be covered within visitICmpInst.
819  Value *X;
820  Constant *C;
821  if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
822  // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
823  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
824  Constant *MaskC = ConstantExpr::getShl(One, C);
825  Value *And = Builder.CreateAnd(X, MaskC);
826  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
827  }
829  m_Deferred(X))))) {
830  // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
831  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
832  Constant *MaskC = ConstantExpr::getShl(One, C);
833  MaskC = ConstantExpr::getOr(MaskC, One);
834  Value *And = Builder.CreateAnd(X, MaskC);
835  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
836  }
837  }
838 
839  Value *A, *B;
840  Constant *C;
841  if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
842  unsigned AWidth = A->getType()->getScalarSizeInBits();
843  unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
844  auto *OldSh = cast<Instruction>(Src);
845  bool IsExact = OldSh->isExact();
846 
847  // If the shift is small enough, all zero bits created by the shift are
848  // removed by the trunc.
850  APInt(SrcWidth, MaxShiftAmt)))) {
851  // trunc (lshr (sext A), C) --> ashr A, C
852  if (A->getType() == DestTy) {
853  Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
854  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
855  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
856  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
857  return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
858  : BinaryOperator::CreateAShr(A, ShAmt);
859  }
860  // The types are mismatched, so create a cast after shifting:
861  // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
862  if (Src->hasOneUse()) {
863  Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
864  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
865  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
866  Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
867  return CastInst::CreateIntegerCast(Shift, DestTy, true);
868  }
869  }
870  // TODO: Mask high bits with 'and'.
871  }
872 
873  // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
874  if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
875  unsigned MaxShiftAmt = SrcWidth - DestWidth;
876 
877  // If the shift is small enough, all zero/sign bits created by the shift are
878  // removed by the trunc.
880  APInt(SrcWidth, MaxShiftAmt)))) {
881  auto *OldShift = cast<Instruction>(Src);
882  bool IsExact = OldShift->isExact();
883  auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
884  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
885  Value *Shift =
886  OldShift->getOpcode() == Instruction::AShr
887  ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
888  : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
889  return CastInst::CreateTruncOrBitCast(Shift, DestTy);
890  }
891  }
892 
893  if (Instruction *I = narrowBinOp(Trunc))
894  return I;
895 
896  if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
897  return I;
898 
899  if (Instruction *I = shrinkInsertElt(Trunc, Builder))
900  return I;
901 
902  if (Src->hasOneUse() &&
903  (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
904  // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
905  // dest type is native and cst < dest size.
906  if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
907  !match(A, m_Shr(m_Value(), m_Constant()))) {
908  // Skip shifts of shift by constants. It undoes a combine in
909  // FoldShiftByConstant and is the extend in reg pattern.
910  APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
912  Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
913  return BinaryOperator::Create(Instruction::Shl, NewTrunc,
914  ConstantExpr::getTrunc(C, DestTy));
915  }
916  }
917  }
918 
919  if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
920  return I;
921 
922  // Whenever an element is extracted from a vector, and then truncated,
923  // canonicalize by converting it to a bitcast followed by an
924  // extractelement.
925  //
926  // Example (little endian):
927  // trunc (extractelement <4 x i64> %X, 0) to i32
928  // --->
929  // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
930  Value *VecOp;
931  ConstantInt *Cst;
932  if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
933  auto *VecOpTy = cast<VectorType>(VecOp->getType());
934  auto VecElts = VecOpTy->getElementCount();
935 
936  // A badly fit destination size would result in an invalid cast.
937  if (SrcWidth % DestWidth == 0) {
938  uint64_t TruncRatio = SrcWidth / DestWidth;
939  uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
940  uint64_t VecOpIdx = Cst->getZExtValue();
941  uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
942  : VecOpIdx * TruncRatio;
943  assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
944  "overflow 32-bits");
945 
946  auto *BitCastTo =
947  VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
948  Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
949  return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
950  }
951  }
952 
953  // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
954  if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
955  m_Value(B))))) {
956  unsigned AWidth = A->getType()->getScalarSizeInBits();
957  if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
958  Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
959  Value *NarrowCtlz =
960  Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
961  return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
962  }
963  }
964 
965  if (match(Src, m_VScale(DL))) {
966  if (Trunc.getFunction() &&
967  Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
968  Attribute Attr =
969  Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
970  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
971  if (Log2_32(MaxVScale.getValue()) < DestWidth) {
972  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
973  return replaceInstUsesWith(Trunc, VScale);
974  }
975  }
976  }
977  }
978 
979  return nullptr;
980 }
981 
982 Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
983  // If we are just checking for a icmp eq of a single bit and zext'ing it
984  // to an integer, then shift the bit to the appropriate place and then
985  // cast to integer to avoid the comparison.
986  const APInt *Op1CV;
987  if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
988 
989  // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
990  // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
991  if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
992  (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
993  Value *In = Cmp->getOperand(0);
994  Value *Sh = ConstantInt::get(In->getType(),
995  In->getType()->getScalarSizeInBits() - 1);
996  In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
997  if (In->getType() != Zext.getType())
998  In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
999 
1000  if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
1001  Constant *One = ConstantInt::get(In->getType(), 1);
1002  In = Builder.CreateXor(In, One, In->getName() + ".not");
1003  }
1004 
1005  return replaceInstUsesWith(Zext, In);
1006  }
1007 
1008  // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1009  // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1010  // zext (X == 1) to i32 --> X iff X has only the low bit set.
1011  // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
1012  // zext (X != 0) to i32 --> X iff X has only the low bit set.
1013  // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1014  // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
1015  // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1016  if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&
1017  // This only works for EQ and NE
1018  Cmp->isEquality()) {
1019  // If Op1C some other power of two, convert:
1020  KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
1021 
1022  APInt KnownZeroMask(~Known.Zero);
1023  if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
1024  bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
1025  if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {
1026  // (X&4) == 2 --> false
1027  // (X&4) != 2 --> true
1028  Constant *Res = ConstantInt::get(Zext.getType(), isNE);
1029  return replaceInstUsesWith(Zext, Res);
1030  }
1031 
1032  uint32_t ShAmt = KnownZeroMask.logBase2();
1033  Value *In = Cmp->getOperand(0);
1034  if (ShAmt) {
1035  // Perform a logical shr by shiftamt.
1036  // Insert the shift to put the result in the low bit.
1037  In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1038  In->getName() + ".lobit");
1039  }
1040 
1041  if (!Op1CV->isZero() == isNE) { // Toggle the low bit.
1042  Constant *One = ConstantInt::get(In->getType(), 1);
1043  In = Builder.CreateXor(In, One);
1044  }
1045 
1046  if (Zext.getType() == In->getType())
1047  return replaceInstUsesWith(Zext, In);
1048 
1049  Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1050  return replaceInstUsesWith(Zext, IntCast);
1051  }
1052  }
1053  }
1054 
1055  if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1056  // Test if a bit is clear/set using a shifted-one mask:
1057  // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1058  // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1059  Value *X, *ShAmt;
1060  if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1061  match(Cmp->getOperand(0),
1062  m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1063  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1064  X = Builder.CreateNot(X);
1065  Value *Lshr = Builder.CreateLShr(X, ShAmt);
1066  Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1067  return replaceInstUsesWith(Zext, And1);
1068  }
1069 
1070  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
1071  // It is also profitable to transform icmp eq into not(xor(A, B)) because
1072  // that may lead to additional simplifications.
1073  if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
1074  Value *LHS = Cmp->getOperand(0);
1075  Value *RHS = Cmp->getOperand(1);
1076 
1077  KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
1078  KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
1079 
1080  if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
1081  APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
1082  APInt UnknownBit = ~KnownBits;
1083  if (UnknownBit.countPopulation() == 1) {
1084  Value *Result = Builder.CreateXor(LHS, RHS);
1085 
1086  // Mask off any bits that are set and won't be shifted away.
1087  if (KnownLHS.One.uge(UnknownBit))
1088  Result = Builder.CreateAnd(Result,
1089  ConstantInt::get(ITy, UnknownBit));
1090 
1091  // Shift the bit we're testing down to the lsb.
1092  Result = Builder.CreateLShr(
1093  Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
1094 
1095  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1096  Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
1097  Result->takeName(Cmp);
1098  return replaceInstUsesWith(Zext, Result);
1099  }
1100  }
1101  }
1102  }
1103 
1104  return nullptr;
1105 }
1106 
1107 /// Determine if the specified value can be computed in the specified wider type
1108 /// and produce the same low bits. If not, return false.
1109 ///
1110 /// If this function returns true, it can also return a non-zero number of bits
1111 /// (in BitsToClear) which indicates that the value it computes is correct for
1112 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
1113 /// out. For example, to promote something like:
1114 ///
1115 /// %B = trunc i64 %A to i32
1116 /// %C = lshr i32 %B, 8
1117 /// %E = zext i32 %C to i64
1118 ///
1119 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1120 /// set to 8 to indicate that the promoted value needs to have bits 24-31
1121 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
1122 /// clear the top bits anyway, doing this has no extra cost.
1123 ///
1124 /// This function works on both vectors and scalars.
1125 static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1126  InstCombinerImpl &IC, Instruction *CxtI) {
1127  BitsToClear = 0;
1128  if (canAlwaysEvaluateInType(V, Ty))
1129  return true;
1130  if (canNotEvaluateInType(V, Ty))
1131  return false;
1132 
1133  auto *I = cast<Instruction>(V);
1134  unsigned Tmp;
1135  switch (I->getOpcode()) {
1136  case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1137  case Instruction::SExt: // zext(sext(x)) -> sext(x).
1138  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1139  return true;
1140  case Instruction::And:
1141  case Instruction::Or:
1142  case Instruction::Xor:
1143  case Instruction::Add:
1144  case Instruction::Sub:
1145  case Instruction::Mul:
1146  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1147  !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1148  return false;
1149  // These can all be promoted if neither operand has 'bits to clear'.
1150  if (BitsToClear == 0 && Tmp == 0)
1151  return true;
1152 
1153  // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1154  // other side, BitsToClear is ok.
1155  if (Tmp == 0 && I->isBitwiseLogicOp()) {
1156  // We use MaskedValueIsZero here for generality, but the case we care
1157  // about the most is constant RHS.
1158  unsigned VSize = V->getType()->getScalarSizeInBits();
1159  if (IC.MaskedValueIsZero(I->getOperand(1),
1160  APInt::getHighBitsSet(VSize, BitsToClear),
1161  0, CxtI)) {
1162  // If this is an And instruction and all of the BitsToClear are
1163  // known to be zero we can reset BitsToClear.
1164  if (I->getOpcode() == Instruction::And)
1165  BitsToClear = 0;
1166  return true;
1167  }
1168  }
1169 
1170  // Otherwise, we don't know how to analyze this BitsToClear case yet.
1171  return false;
1172 
1173  case Instruction::Shl: {
1174  // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1175  // upper bits we can reduce BitsToClear by the shift amount.
1176  const APInt *Amt;
1177  if (match(I->getOperand(1), m_APInt(Amt))) {
1178  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1179  return false;
1180  uint64_t ShiftAmt = Amt->getZExtValue();
1181  BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1182  return true;
1183  }
1184  return false;
1185  }
1186  case Instruction::LShr: {
1187  // We can promote lshr(x, cst) if we can promote x. This requires the
1188  // ultimate 'and' to clear out the high zero bits we're clearing out though.
1189  const APInt *Amt;
1190  if (match(I->getOperand(1), m_APInt(Amt))) {
1191  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1192  return false;
1193  BitsToClear += Amt->getZExtValue();
1194  if (BitsToClear > V->getType()->getScalarSizeInBits())
1195  BitsToClear = V->getType()->getScalarSizeInBits();
1196  return true;
1197  }
1198  // Cannot promote variable LSHR.
1199  return false;
1200  }
1201  case Instruction::Select:
1202  if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1203  !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1204  // TODO: If important, we could handle the case when the BitsToClear are
1205  // known zero in the disagreeing side.
1206  Tmp != BitsToClear)
1207  return false;
1208  return true;
1209 
1210  case Instruction::PHI: {
1211  // We can change a phi if we can change all operands. Note that we never
1212  // get into trouble with cyclic PHIs here because we only consider
1213  // instructions with a single use.
1214  PHINode *PN = cast<PHINode>(I);
1215  if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1216  return false;
1217  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1218  if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1219  // TODO: If important, we could handle the case when the BitsToClear
1220  // are known zero in the disagreeing input.
1221  Tmp != BitsToClear)
1222  return false;
1223  return true;
1224  }
1225  default:
1226  // TODO: Can handle more cases here.
1227  return false;
1228  }
1229 }
1230 
1232  // If this zero extend is only used by a truncate, let the truncate be
1233  // eliminated before we try to optimize this zext.
1234  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1235  return nullptr;
1236 
1237  // If one of the common conversion will work, do it.
1238  if (Instruction *Result = commonCastTransforms(CI))
1239  return Result;
1240 
1241  Value *Src = CI.getOperand(0);
1242  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1243 
1244  // Try to extend the entire expression tree to the wide destination type.
1245  unsigned BitsToClear;
1246  if (shouldChangeType(SrcTy, DestTy) &&
1247  canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
1248  assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1249  "Can't clear more bits than in SrcTy");
1250 
1251  // Okay, we can transform this! Insert the new expression now.
1252  LLVM_DEBUG(
1253  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1254  " to avoid zero extend: "
1255  << CI << '\n');
1256  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1257  assert(Res->getType() == DestTy);
1258 
1259  // Preserve debug values referring to Src if the zext is its last use.
1260  if (auto *SrcOp = dyn_cast<Instruction>(Src))
1261  if (SrcOp->hasOneUse())
1262  replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
1263 
1264  uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
1265  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1266 
1267  // If the high bits are already filled with zeros, just replace this
1268  // cast with the result.
1269  if (MaskedValueIsZero(Res,
1270  APInt::getHighBitsSet(DestBitSize,
1271  DestBitSize-SrcBitsKept),
1272  0, &CI))
1273  return replaceInstUsesWith(CI, Res);
1274 
1275  // We need to emit an AND to clear the high bits.
1276  Constant *C = ConstantInt::get(Res->getType(),
1277  APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1278  return BinaryOperator::CreateAnd(Res, C);
1279  }
1280 
1281  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1282  // types and if the sizes are just right we can convert this into a logical
1283  // 'and' which will be much cheaper than the pair of casts.
1284  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1285  // TODO: Subsume this into EvaluateInDifferentType.
1286 
1287  // Get the sizes of the types involved. We know that the intermediate type
1288  // will be smaller than A or C, but don't know the relation between A and C.
1289  Value *A = CSrc->getOperand(0);
1290  unsigned SrcSize = A->getType()->getScalarSizeInBits();
1291  unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1292  unsigned DstSize = CI.getType()->getScalarSizeInBits();
1293  // If we're actually extending zero bits, then if
1294  // SrcSize < DstSize: zext(a & mask)
1295  // SrcSize == DstSize: a & mask
1296  // SrcSize > DstSize: trunc(a) & mask
1297  if (SrcSize < DstSize) {
1298  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1299  Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1300  Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1301  return new ZExtInst(And, CI.getType());
1302  }
1303 
1304  if (SrcSize == DstSize) {
1305  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1306  return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1307  AndValue));
1308  }
1309  if (SrcSize > DstSize) {
1310  Value *Trunc = Builder.CreateTrunc(A, CI.getType());
1311  APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1312  return BinaryOperator::CreateAnd(Trunc,
1313  ConstantInt::get(Trunc->getType(),
1314  AndValue));
1315  }
1316  }
1317 
1318  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
1319  return transformZExtICmp(Cmp, CI);
1320 
1321  // zext(trunc(X) & C) -> (X & zext(C)).
1322  Constant *C;
1323  Value *X;
1324  if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1325  X->getType() == CI.getType())
1326  return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
1327 
1328  // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1329  Value *And;
1330  if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1332  X->getType() == CI.getType()) {
1333  Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
1334  return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1335  }
1336 
1337  if (match(Src, m_VScale(DL))) {
1338  if (CI.getFunction() &&
1339  CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1340  Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
1341  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1342  unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1343  if (Log2_32(MaxVScale.getValue()) < TypeWidth) {
1344  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1345  return replaceInstUsesWith(CI, VScale);
1346  }
1347  }
1348  }
1349  }
1350 
1351  return nullptr;
1352 }
1353 
1354 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1355 Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
1356  Instruction &CI) {
1357  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
1358  ICmpInst::Predicate Pred = ICI->getPredicate();
1359 
1360  // Don't bother if Op1 isn't of vector or integer type.
1361  if (!Op1->getType()->isIntOrIntVectorTy())
1362  return nullptr;
1363 
1364  if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
1365  (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
1366  // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
1367  // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
1368  Value *Sh = ConstantInt::get(Op0->getType(),
1369  Op0->getType()->getScalarSizeInBits() - 1);
1370  Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1371  if (In->getType() != CI.getType())
1372  In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
1373 
1374  if (Pred == ICmpInst::ICMP_SGT)
1375  In = Builder.CreateNot(In, In->getName() + ".not");
1376  return replaceInstUsesWith(CI, In);
1377  }
1378 
1379  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1380  // If we know that only one bit of the LHS of the icmp can be set and we
1381  // have an equality comparison with zero or a power of 2, we can transform
1382  // the icmp and sext into bitwise/integer operations.
1383  if (ICI->hasOneUse() &&
1384  ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1385  KnownBits Known = computeKnownBits(Op0, 0, &CI);
1386 
1387  APInt KnownZeroMask(~Known.Zero);
1388  if (KnownZeroMask.isPowerOf2()) {
1389  Value *In = ICI->getOperand(0);
1390 
1391  // If the icmp tests for a known zero bit we can constant fold it.
1392  if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1393  Value *V = Pred == ICmpInst::ICMP_NE ?
1395  ConstantInt::getNullValue(CI.getType());
1396  return replaceInstUsesWith(CI, V);
1397  }
1398 
1399  if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1400  // sext ((x & 2^n) == 0) -> (x >> n) - 1
1401  // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1402  unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
1403  // Perform a right shift to place the desired bit in the LSB.
1404  if (ShiftAmt)
1405  In = Builder.CreateLShr(In,
1406  ConstantInt::get(In->getType(), ShiftAmt));
1407 
1408  // At this point "In" is either 1 or 0. Subtract 1 to turn
1409  // {1, 0} -> {0, -1}.
1410  In = Builder.CreateAdd(In,
1411  ConstantInt::getAllOnesValue(In->getType()),
1412  "sext");
1413  } else {
1414  // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1415  // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1416  unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
1417  // Perform a left shift to place the desired bit in the MSB.
1418  if (ShiftAmt)
1419  In = Builder.CreateShl(In,
1420  ConstantInt::get(In->getType(), ShiftAmt));
1421 
1422  // Distribute the bit over the whole bit width.
1423  In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1424  KnownZeroMask.getBitWidth() - 1), "sext");
1425  }
1426 
1427  if (CI.getType() == In->getType())
1428  return replaceInstUsesWith(CI, In);
1429  return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
1430  }
1431  }
1432  }
1433 
1434  return nullptr;
1435 }
1436 
1437 /// Return true if we can take the specified value and return it as type Ty
1438 /// without inserting any new casts and without changing the value of the common
1439 /// low bits. This is used by code that tries to promote integer operations to
1440 /// a wider types will allow us to eliminate the extension.
1441 ///
1442 /// This function works on both vectors and scalars.
1443 ///
1444 static bool canEvaluateSExtd(Value *V, Type *Ty) {
1446  "Can't sign extend type to a smaller type");
1447  if (canAlwaysEvaluateInType(V, Ty))
1448  return true;
1449  if (canNotEvaluateInType(V, Ty))
1450  return false;
1451 
1452  auto *I = cast<Instruction>(V);
1453  switch (I->getOpcode()) {
1454  case Instruction::SExt: // sext(sext(x)) -> sext(x)
1455  case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1456  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1457  return true;
1458  case Instruction::And:
1459  case Instruction::Or:
1460  case Instruction::Xor:
1461  case Instruction::Add:
1462  case Instruction::Sub:
1463  case Instruction::Mul:
1464  // These operators can all arbitrarily be extended if their inputs can.
1465  return canEvaluateSExtd(I->getOperand(0), Ty) &&
1466  canEvaluateSExtd(I->getOperand(1), Ty);
1467 
1468  //case Instruction::Shl: TODO
1469  //case Instruction::LShr: TODO
1470 
1471  case Instruction::Select:
1472  return canEvaluateSExtd(I->getOperand(1), Ty) &&
1473  canEvaluateSExtd(I->getOperand(2), Ty);
1474 
1475  case Instruction::PHI: {
1476  // We can change a phi if we can change all operands. Note that we never
1477  // get into trouble with cyclic PHIs here because we only consider
1478  // instructions with a single use.
1479  PHINode *PN = cast<PHINode>(I);
1480  for (Value *IncValue : PN->incoming_values())
1481  if (!canEvaluateSExtd(IncValue, Ty)) return false;
1482  return true;
1483  }
1484  default:
1485  // TODO: Can handle more cases here.
1486  break;
1487  }
1488 
1489  return false;
1490 }
1491 
1493  // If this sign extend is only used by a truncate, let the truncate be
1494  // eliminated before we try to optimize this sext.
1495  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1496  return nullptr;
1497 
1498  if (Instruction *I = commonCastTransforms(CI))
1499  return I;
1500 
1501  Value *Src = CI.getOperand(0);
1502  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1503  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1504  unsigned DestBitSize = DestTy->getScalarSizeInBits();
1505 
1506  // If we know that the value being extended is positive, we can use a zext
1507  // instead.
1508  KnownBits Known = computeKnownBits(Src, 0, &CI);
1509  if (Known.isNonNegative())
1510  return CastInst::Create(Instruction::ZExt, Src, DestTy);
1511 
1512  // Try to extend the entire expression tree to the wide destination type.
1513  if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1514  // Okay, we can transform this! Insert the new expression now.
1515  LLVM_DEBUG(
1516  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1517  " to avoid sign extend: "
1518  << CI << '\n');
1519  Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1520  assert(Res->getType() == DestTy);
1521 
1522  // If the high bits are already filled with sign bit, just replace this
1523  // cast with the result.
1524  if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
1525  return replaceInstUsesWith(CI, Res);
1526 
1527  // We need to emit a shl + ashr to do the sign extend.
1528  Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1529  return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1530  ShAmt);
1531  }
1532 
1533  Value *X;
1534  if (match(Src, m_Trunc(m_Value(X)))) {
1535  // If the input has more sign bits than bits truncated, then convert
1536  // directly to final type.
1537  unsigned XBitSize = X->getType()->getScalarSizeInBits();
1538  if (ComputeNumSignBits(X, 0, &CI) > XBitSize - SrcBitSize)
1539  return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1540 
1541  // If input is a trunc from the destination type, then convert into shifts.
1542  if (Src->hasOneUse() && X->getType() == DestTy) {
1543  // sext (trunc X) --> ashr (shl X, C), C
1544  Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1545  return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1546  }
1547 
1548  // If we are replacing shifted-in high zero bits with sign bits, convert
1549  // the logic shift to arithmetic shift and eliminate the cast to
1550  // intermediate type:
1551  // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1552  Value *Y;
1553  if (Src->hasOneUse() &&
1554  match(X, m_LShr(m_Value(Y),
1555  m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1556  Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1557  return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1558  }
1559  }
1560 
1561  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
1562  return transformSExtICmp(ICI, CI);
1563 
1564  // If the input is a shl/ashr pair of a same constant, then this is a sign
1565  // extension from a smaller value. If we could trust arbitrary bitwidth
1566  // integers, we could turn this into a truncate to the smaller bit and then
1567  // use a sext for the whole extension. Since we don't, look deeper and check
1568  // for a truncate. If the source and dest are the same type, eliminate the
1569  // trunc and extend and just do shifts. For example, turn:
1570  // %a = trunc i32 %i to i8
1571  // %b = shl i8 %a, C
1572  // %c = ashr i8 %b, C
1573  // %d = sext i8 %c to i32
1574  // into:
1575  // %a = shl i32 %i, 32-(8-C)
1576  // %d = ashr i32 %a, 32-(8-C)
1577  Value *A = nullptr;
1578  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1579  Constant *BA = nullptr, *CA = nullptr;
1580  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1581  m_Constant(CA))) &&
1582  BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1583  Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1584  Constant *NumLowbitsLeft = ConstantExpr::getSub(
1585  ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1586  Constant *NewShAmt = ConstantExpr::getSub(
1587  ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1588  NumLowbitsLeft);
1589  NewShAmt =
1591  A = Builder.CreateShl(A, NewShAmt, CI.getName());
1592  return BinaryOperator::CreateAShr(A, NewShAmt);
1593  }
1594 
1595  // Splatting a bit of constant-index across a value:
1596  // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1597  // TODO: If the dest type is different, use a cast (adjust use check).
1598  if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1599  m_SpecificInt(SrcBitSize - 1)))) &&
1600  X->getType() == DestTy) {
1601  Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1602  Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
1603  Value *Shl = Builder.CreateShl(X, ShlAmtC);
1604  return BinaryOperator::CreateAShr(Shl, AshrAmtC);
1605  }
1606 
1607  if (match(Src, m_VScale(DL))) {
1608  if (CI.getFunction() &&
1609  CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1610  Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
1611  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1612  if (Log2_32(MaxVScale.getValue()) < (SrcBitSize - 1)) {
1613  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1614  return replaceInstUsesWith(CI, VScale);
1615  }
1616  }
1617  }
1618  }
1619 
1620  return nullptr;
1621 }
1622 
1623 /// Return a Constant* for the specified floating-point constant if it fits
1624 /// in the specified FP type without changing its value.
1625 static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1626  bool losesInfo;
1627  APFloat F = CFP->getValueAPF();
1628  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1629  return !losesInfo;
1630 }
1631 
1633  if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1634  return nullptr; // No constant folding of this.
1635  // See if the value can be truncated to half and then reextended.
1636  if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1637  return Type::getHalfTy(CFP->getContext());
1638  // See if the value can be truncated to float and then reextended.
1639  if (fitsInFPType(CFP, APFloat::IEEEsingle()))
1640  return Type::getFloatTy(CFP->getContext());
1641  if (CFP->getType()->isDoubleTy())
1642  return nullptr; // Won't shrink.
1643  if (fitsInFPType(CFP, APFloat::IEEEdouble()))
1644  return Type::getDoubleTy(CFP->getContext());
1645  // Don't try to shrink to various long double types.
1646  return nullptr;
1647 }
1648 
1649 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1650 // type we can safely truncate all elements to.
1651 // TODO: Make these support undef elements.
1653  auto *CV = dyn_cast<Constant>(V);
1654  auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1655  if (!CV || !CVVTy)
1656  return nullptr;
1657 
1658  Type *MinType = nullptr;
1659 
1660  unsigned NumElts = CVVTy->getNumElements();
1661 
1662  // For fixed-width vectors we find the minimal type by looking
1663  // through the constant values of the vector.
1664  for (unsigned i = 0; i != NumElts; ++i) {
1665  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1666  if (!CFP)
1667  return nullptr;
1668 
1669  Type *T = shrinkFPConstant(CFP);
1670  if (!T)
1671  return nullptr;
1672 
1673  // If we haven't found a type yet or this type has a larger mantissa than
1674  // our previous type, this is our new minimal type.
1675  if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1676  MinType = T;
1677  }
1678 
1679  // Make a vector type from the minimal type.
1680  return FixedVectorType::get(MinType, NumElts);
1681 }
1682 
1683 /// Find the minimum FP type we can safely truncate to.
1685  if (auto *FPExt = dyn_cast<FPExtInst>(V))
1686  return FPExt->getOperand(0)->getType();
1687 
1688  // If this value is a constant, return the constant in the smallest FP type
1689  // that can accurately represent it. This allows us to turn
1690  // (float)((double)X+2.0) into x+2.0f.
1691  if (auto *CFP = dyn_cast<ConstantFP>(V))
1692  if (Type *T = shrinkFPConstant(CFP))
1693  return T;
1694 
1695  // We can only correctly find a minimum type for a scalable vector when it is
1696  // a splat. For splats of constant values the fpext is wrapped up as a
1697  // ConstantExpr.
1698  if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1699  if (FPCExt->getOpcode() == Instruction::FPExt)
1700  return FPCExt->getOperand(0)->getType();
1701 
1702  // Try to shrink a vector of FP constants. This returns nullptr on scalable
1703  // vectors
1704  if (Type *T = shrinkFPConstantVector(V))
1705  return T;
1706 
1707  return V->getType();
1708 }
1709 
1710 /// Return true if the cast from integer to FP can be proven to be exact for all
1711 /// possible inputs (the conversion does not lose any precision).
1713  CastInst::CastOps Opcode = I.getOpcode();
1714  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1715  "Unexpected cast");
1716  Value *Src = I.getOperand(0);
1717  Type *SrcTy = Src->getType();
1718  Type *FPTy = I.getType();
1719  bool IsSigned = Opcode == Instruction::SIToFP;
1720  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1721 
1722  // Easy case - if the source integer type has less bits than the FP mantissa,
1723  // then the cast must be exact.
1724  int DestNumSigBits = FPTy->getFPMantissaWidth();
1725  if (SrcSize <= DestNumSigBits)
1726  return true;
1727 
1728  // Cast from FP to integer and back to FP is independent of the intermediate
1729  // integer width because of poison on overflow.
1730  Value *F;
1731  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1732  // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1733  // potential rounding of negative FP input values.
1734  int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1735  if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1736  SrcNumSigBits++;
1737 
1738  // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1739  // significant bits than the destination (and make sure neither type is
1740  // weird -- ppc_fp128).
1741  if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1742  SrcNumSigBits <= DestNumSigBits)
1743  return true;
1744  }
1745 
1746  // TODO:
1747  // Try harder to find if the source integer type has less significant bits.
1748  // For example, compute number of sign bits or compute low bit mask.
1749  return false;
1750 }
1751 
1753  if (Instruction *I = commonCastTransforms(FPT))
1754  return I;
1755 
1756  // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1757  // simplify this expression to avoid one or more of the trunc/extend
1758  // operations if we can do so without changing the numerical results.
1759  //
1760  // The exact manner in which the widths of the operands interact to limit
1761  // what we can and cannot do safely varies from operation to operation, and
1762  // is explained below in the various case statements.
1763  Type *Ty = FPT.getType();
1764  auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1765  if (BO && BO->hasOneUse()) {
1766  Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1767  Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1768  unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1769  unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1770  unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1771  unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1772  unsigned DstWidth = Ty->getFPMantissaWidth();
1773  switch (BO->getOpcode()) {
1774  default: break;
1775  case Instruction::FAdd:
1776  case Instruction::FSub:
1777  // For addition and subtraction, the infinitely precise result can
1778  // essentially be arbitrarily wide; proving that double rounding
1779  // will not occur because the result of OpI is exact (as we will for
1780  // FMul, for example) is hopeless. However, we *can* nonetheless
1781  // frequently know that double rounding cannot occur (or that it is
1782  // innocuous) by taking advantage of the specific structure of
1783  // infinitely-precise results that admit double rounding.
1784  //
1785  // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1786  // to represent both sources, we can guarantee that the double
1787  // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1788  // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1789  // for proof of this fact).
1790  //
1791  // Note: Figueroa does not consider the case where DstFormat !=
1792  // SrcFormat. It's possible (likely even!) that this analysis
1793  // could be tightened for those cases, but they are rare (the main
1794  // case of interest here is (float)((double)float + float)).
1795  if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1796  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1797  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1798  Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1799  RI->copyFastMathFlags(BO);
1800  return RI;
1801  }
1802  break;
1803  case Instruction::FMul:
1804  // For multiplication, the infinitely precise result has at most
1805  // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1806  // that such a value can be exactly represented, then no double
1807  // rounding can possibly occur; we can safely perform the operation
1808  // in the destination format if it can represent both sources.
1809  if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1810  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1811  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1812  return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
1813  }
1814  break;
1815  case Instruction::FDiv:
1816  // For division, we use again use the bound from Figueroa's
1817  // dissertation. I am entirely certain that this bound can be
1818  // tightened in the unbalanced operand case by an analysis based on
1819  // the diophantine rational approximation bound, but the well-known
1820  // condition used here is a good conservative first pass.
1821  // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1822  if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1823  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1824  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1825  return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
1826  }
1827  break;
1828  case Instruction::FRem: {
1829  // Remainder is straightforward. Remainder is always exact, so the
1830  // type of OpI doesn't enter into things at all. We simply evaluate
1831  // in whichever source type is larger, then convert to the
1832  // destination type.
1833  if (SrcWidth == OpWidth)
1834  break;
1835  Value *LHS, *RHS;
1836  if (LHSWidth == SrcWidth) {
1837  LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1838  RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1839  } else {
1840  LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1841  RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1842  }
1843 
1844  Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1845  return CastInst::CreateFPCast(ExactResult, Ty);
1846  }
1847  }
1848  }
1849 
1850  // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1851  Value *X;
1852  Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1853  if (Op && Op->hasOneUse()) {
1854  // FIXME: The FMF should propagate from the fptrunc, not the source op.
1856  if (isa<FPMathOperator>(Op))
1857  Builder.setFastMathFlags(Op->getFastMathFlags());
1858 
1859  if (match(Op, m_FNeg(m_Value(X)))) {
1860  Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1861 
1862  return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1863  }
1864 
1865  // If we are truncating a select that has an extended operand, we can
1866  // narrow the other operand and do the select as a narrow op.
1867  Value *Cond, *X, *Y;
1868  if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1869  X->getType() == Ty) {
1870  // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1871  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1872  Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1873  return replaceInstUsesWith(FPT, Sel);
1874  }
1875  if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1876  X->getType() == Ty) {
1877  // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1878  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1879  Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1880  return replaceInstUsesWith(FPT, Sel);
1881  }
1882  }
1883 
1884  if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1885  switch (II->getIntrinsicID()) {
1886  default: break;
1887  case Intrinsic::ceil:
1888  case Intrinsic::fabs:
1889  case Intrinsic::floor:
1890  case Intrinsic::nearbyint:
1891  case Intrinsic::rint:
1892  case Intrinsic::round:
1893  case Intrinsic::roundeven:
1894  case Intrinsic::trunc: {
1895  Value *Src = II->getArgOperand(0);
1896  if (!Src->hasOneUse())
1897  break;
1898 
1899  // Except for fabs, this transformation requires the input of the unary FP
1900  // operation to be itself an fpext from the type to which we're
1901  // truncating.
1902  if (II->getIntrinsicID() != Intrinsic::fabs) {
1903  FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1904  if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1905  break;
1906  }
1907 
1908  // Do unary FP operation on smaller type.
1909  // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1910  Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1911  Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
1912  II->getIntrinsicID(), Ty);
1914  II->getOperandBundlesAsDefs(OpBundles);
1915  CallInst *NewCI =
1916  CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1917  NewCI->copyFastMathFlags(II);
1918  return NewCI;
1919  }
1920  }
1921  }
1922 
1923  if (Instruction *I = shrinkInsertElt(FPT, Builder))
1924  return I;
1925 
1926  Value *Src = FPT.getOperand(0);
1927  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1928  auto *FPCast = cast<CastInst>(Src);
1929  if (isKnownExactCastIntToFP(*FPCast))
1930  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1931  }
1932 
1933  return nullptr;
1934 }
1935 
1937  // If the source operand is a cast from integer to FP and known exact, then
1938  // cast the integer operand directly to the destination type.
1939  Type *Ty = FPExt.getType();
1940  Value *Src = FPExt.getOperand(0);
1941  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1942  auto *FPCast = cast<CastInst>(Src);
1943  if (isKnownExactCastIntToFP(*FPCast))
1944  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1945  }
1946 
1947  return commonCastTransforms(FPExt);
1948 }
1949 
1950 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1951 /// This is safe if the intermediate type has enough bits in its mantissa to
1952 /// accurately represent all values of X. For example, this won't work with
1953 /// i64 -> float -> i64.
1955  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1956  return nullptr;
1957 
1958  auto *OpI = cast<CastInst>(FI.getOperand(0));
1959  Value *X = OpI->getOperand(0);
1960  Type *XType = X->getType();
1961  Type *DestType = FI.getType();
1962  bool IsOutputSigned = isa<FPToSIInst>(FI);
1963 
1964  // Since we can assume the conversion won't overflow, our decision as to
1965  // whether the input will fit in the float should depend on the minimum
1966  // of the input range and output range.
1967 
1968  // This means this is also safe for a signed input and unsigned output, since
1969  // a negative input would lead to undefined behavior.
1970  if (!isKnownExactCastIntToFP(*OpI)) {
1971  // The first cast may not round exactly based on the source integer width
1972  // and FP width, but the overflow UB rules can still allow this to fold.
1973  // If the destination type is narrow, that means the intermediate FP value
1974  // must be large enough to hold the source value exactly.
1975  // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1976  int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
1977  if (OutputSize > OpI->getType()->getFPMantissaWidth())
1978  return nullptr;
1979  }
1980 
1981  if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1982  bool IsInputSigned = isa<SIToFPInst>(OpI);
1983  if (IsInputSigned && IsOutputSigned)
1984  return new SExtInst(X, DestType);
1985  return new ZExtInst(X, DestType);
1986  }
1987  if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1988  return new TruncInst(X, DestType);
1989 
1990  assert(XType == DestType && "Unexpected types for int to FP to int casts");
1991  return replaceInstUsesWith(FI, X);
1992 }
1993 
1995  if (Instruction *I = foldItoFPtoI(FI))
1996  return I;
1997 
1998  return commonCastTransforms(FI);
1999 }
2000 
2002  if (Instruction *I = foldItoFPtoI(FI))
2003  return I;
2004 
2005  return commonCastTransforms(FI);
2006 }
2007 
2009  return commonCastTransforms(CI);
2010 }
2011 
2013  return commonCastTransforms(CI);
2014 }
2015 
2017  // If the source integer type is not the intptr_t type for this target, do a
2018  // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2019  // cast to be exposed to other transforms.
2020  unsigned AS = CI.getAddressSpace();
2021  if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2022  DL.getPointerSizeInBits(AS)) {
2023  Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2024  DL.getIntPtrType(CI.getContext(), AS));
2025  Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2026  return new IntToPtrInst(P, CI.getType());
2027  }
2028 
2029  if (Instruction *I = commonCastTransforms(CI))
2030  return I;
2031 
2032  return nullptr;
2033 }
2034 
2035 /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
2037  Value *Src = CI.getOperand(0);
2038 
2039  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
2040  // If casting the result of a getelementptr instruction with no offset, turn
2041  // this into a cast of the original pointer!
2042  if (GEP->hasAllZeroIndices() &&
2043  // If CI is an addrspacecast and GEP changes the poiner type, merging
2044  // GEP into CI would undo canonicalizing addrspacecast with different
2045  // pointer types, causing infinite loops.
2046  (!isa<AddrSpaceCastInst>(CI) ||
2047  GEP->getType() == GEP->getPointerOperandType())) {
2048  // Changing the cast operand is usually not a good idea but it is safe
2049  // here because the pointer operand is being replaced with another
2050  // pointer operand so the opcode doesn't need to change.
2051  return replaceOperand(CI, 0, GEP->getOperand(0));
2052  }
2053  }
2054 
2055  return commonCastTransforms(CI);
2056 }
2057 
2059  // If the destination integer type is not the intptr_t type for this target,
2060  // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2061  // to be exposed to other transforms.
2062  Value *SrcOp = CI.getPointerOperand();
2063  Type *SrcTy = SrcOp->getType();
2064  Type *Ty = CI.getType();
2065  unsigned AS = CI.getPointerAddressSpace();
2066  unsigned TySize = Ty->getScalarSizeInBits();
2067  unsigned PtrSize = DL.getPointerSizeInBits(AS);
2068  if (TySize != PtrSize) {
2069  Type *IntPtrTy =
2070  SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2071  Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2072  return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2073  }
2074 
2075  if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
2076  // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2077  // While this can increase the number of instructions it doesn't actually
2078  // increase the overall complexity since the arithmetic is just part of
2079  // the GEP otherwise.
2080  if (GEP->hasOneUse() &&
2081  isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2082  return replaceInstUsesWith(CI,
2083  Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
2084  /*isSigned=*/false));
2085  }
2086  }
2087 
2088  Value *Vec, *Scalar, *Index;
2090  m_Value(Scalar), m_Value(Index)))) &&
2091  Vec->getType() == Ty) {
2092  assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2093  // Convert the scalar to int followed by insert to eliminate one cast:
2094  // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2095  Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2096  return InsertElementInst::Create(Vec, NewCast, Index);
2097  }
2098 
2099  return commonPointerCastTransforms(CI);
2100 }
2101 
2102 /// This input value (which is known to have vector type) is being zero extended
2103 /// or truncated to the specified vector type. Since the zext/trunc is done
2104 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2105 /// endianness will impact which end of the vector that is extended or
2106 /// truncated.
2107 ///
2108 /// A vector is always stored with index 0 at the lowest address, which
2109 /// corresponds to the most significant bits for a big endian stored integer and
2110 /// the least significant bits for little endian. A trunc/zext of an integer
2111 /// impacts the big end of the integer. Thus, we need to add/remove elements at
2112 /// the front of the vector for big endian targets, and the back of the vector
2113 /// for little endian targets.
2114 ///
2115 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2116 ///
2117 /// The source and destination vector types may have different element types.
2118 static Instruction *
2120  InstCombinerImpl &IC) {
2121  // We can only do this optimization if the output is a multiple of the input
2122  // element size, or the input is a multiple of the output element size.
2123  // Convert the input type to have the same element type as the output.
2124  VectorType *SrcTy = cast<VectorType>(InVal->getType());
2125 
2126  if (SrcTy->getElementType() != DestTy->getElementType()) {
2127  // The input types don't need to be identical, but for now they must be the
2128  // same size. There is no specific reason we couldn't handle things like
2129  // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2130  // there yet.
2131  if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2133  return nullptr;
2134 
2135  SrcTy =
2137  cast<FixedVectorType>(SrcTy)->getNumElements());
2138  InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2139  }
2140 
2141  bool IsBigEndian = IC.getDataLayout().isBigEndian();
2142  unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2143  unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2144 
2145  assert(SrcElts != DestElts && "Element counts should be different.");
2146 
2147  // Now that the element types match, get the shuffle mask and RHS of the
2148  // shuffle to use, which depends on whether we're increasing or decreasing the
2149  // size of the input.
2150  SmallVector<int, 16> ShuffleMaskStorage;
2151  ArrayRef<int> ShuffleMask;
2152  Value *V2;
2153 
2154  // Produce an identify shuffle mask for the src vector.
2155  ShuffleMaskStorage.resize(SrcElts);
2156  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
2157 
2158  if (SrcElts > DestElts) {
2159  // If we're shrinking the number of elements (rewriting an integer
2160  // truncate), just shuffle in the elements corresponding to the least
2161  // significant bits from the input and use poison as the second shuffle
2162  // input.
2163  V2 = PoisonValue::get(SrcTy);
2164  // Make sure the shuffle mask selects the "least significant bits" by
2165  // keeping elements from back of the src vector for big endian, and from the
2166  // front for little endian.
2167  ShuffleMask = ShuffleMaskStorage;
2168  if (IsBigEndian)
2169  ShuffleMask = ShuffleMask.take_back(DestElts);
2170  else
2171  ShuffleMask = ShuffleMask.take_front(DestElts);
2172  } else {
2173  // If we're increasing the number of elements (rewriting an integer zext),
2174  // shuffle in all of the elements from InVal. Fill the rest of the result
2175  // elements with zeros from a constant zero.
2176  V2 = Constant::getNullValue(SrcTy);
2177  // Use first elt from V2 when indicating zero in the shuffle mask.
2178  uint32_t NullElt = SrcElts;
2179  // Extend with null values in the "most significant bits" by adding elements
2180  // in front of the src vector for big endian, and at the back for little
2181  // endian.
2182  unsigned DeltaElts = DestElts - SrcElts;
2183  if (IsBigEndian)
2184  ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2185  else
2186  ShuffleMaskStorage.append(DeltaElts, NullElt);
2187  ShuffleMask = ShuffleMaskStorage;
2188  }
2189 
2190  return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2191 }
2192 
2193 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2194  return Value % Ty->getPrimitiveSizeInBits() == 0;
2195 }
2196 
2197 static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2198  return Value / Ty->getPrimitiveSizeInBits();
2199 }
2200 
2201 /// V is a value which is inserted into a vector of VecEltTy.
2202 /// Look through the value to see if we can decompose it into
2203 /// insertions into the vector. See the example in the comment for
2204 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2205 /// The type of V is always a non-zero multiple of VecEltTy's size.
2206 /// Shift is the number of bits between the lsb of V and the lsb of
2207 /// the vector.
2208 ///
2209 /// This returns false if the pattern can't be matched or true if it can,
2210 /// filling in Elements with the elements found here.
2211 static bool collectInsertionElements(Value *V, unsigned Shift,
2212  SmallVectorImpl<Value *> &Elements,
2213  Type *VecEltTy, bool isBigEndian) {
2214  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2215  "Shift should be a multiple of the element type size");
2216 
2217  // Undef values never contribute useful bits to the result.
2218  if (isa<UndefValue>(V)) return true;
2219 
2220  // If we got down to a value of the right type, we win, try inserting into the
2221  // right element.
2222  if (V->getType() == VecEltTy) {
2223  // Inserting null doesn't actually insert any elements.
2224  if (Constant *C = dyn_cast<Constant>(V))
2225  if (C->isNullValue())
2226  return true;
2227 
2228  unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2229  if (isBigEndian)
2230  ElementIndex = Elements.size() - ElementIndex - 1;
2231 
2232  // Fail if multiple elements are inserted into this slot.
2233  if (Elements[ElementIndex])
2234  return false;
2235 
2236  Elements[ElementIndex] = V;
2237  return true;
2238  }
2239 
2240  if (Constant *C = dyn_cast<Constant>(V)) {
2241  // Figure out the # elements this provides, and bitcast it or slice it up
2242  // as required.
2243  unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2244  VecEltTy);
2245  // If the constant is the size of a vector element, we just need to bitcast
2246  // it to the right type so it gets properly inserted.
2247  if (NumElts == 1)
2249  Shift, Elements, VecEltTy, isBigEndian);
2250 
2251  // Okay, this is a constant that covers multiple elements. Slice it up into
2252  // pieces and insert each element-sized piece into the vector.
2253  if (!isa<IntegerType>(C->getType()))
2255  C->getType()->getPrimitiveSizeInBits()));
2256  unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2257  Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2258 
2259  for (unsigned i = 0; i != NumElts; ++i) {
2260  unsigned ShiftI = Shift+i*ElementSize;
2261  Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2262  ShiftI));
2263  Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2264  if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2265  isBigEndian))
2266  return false;
2267  }
2268  return true;
2269  }
2270 
2271  if (!V->hasOneUse()) return false;
2272 
2273  Instruction *I = dyn_cast<Instruction>(V);
2274  if (!I) return false;
2275  switch (I->getOpcode()) {
2276  default: return false; // Unhandled case.
2277  case Instruction::BitCast:
2278  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2279  isBigEndian);
2280  case Instruction::ZExt:
2281  if (!isMultipleOfTypeSize(
2282  I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2283  VecEltTy))
2284  return false;
2285  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2286  isBigEndian);
2287  case Instruction::Or:
2288  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2289  isBigEndian) &&
2290  collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2291  isBigEndian);
2292  case Instruction::Shl: {
2293  // Must be shifting by a constant that is a multiple of the element size.
2294  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2295  if (!CI) return false;
2296  Shift += CI->getZExtValue();
2297  if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2298  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2299  isBigEndian);
2300  }
2301 
2302  }
2303 }
2304 
2305 
2306 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2307 /// assemble the elements of the vector manually.
2308 /// Try to rip the code out and replace it with insertelements. This is to
2309 /// optimize code like this:
2310 ///
2311 /// %tmp37 = bitcast float %inc to i32
2312 /// %tmp38 = zext i32 %tmp37 to i64
2313 /// %tmp31 = bitcast float %inc5 to i32
2314 /// %tmp32 = zext i32 %tmp31 to i64
2315 /// %tmp33 = shl i64 %tmp32, 32
2316 /// %ins35 = or i64 %tmp33, %tmp38
2317 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2318 ///
2319 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2321  InstCombinerImpl &IC) {
2322  auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2323  Value *IntInput = CI.getOperand(0);
2324 
2325  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2326  if (!collectInsertionElements(IntInput, 0, Elements,
2327  DestVecTy->getElementType(),
2328  IC.getDataLayout().isBigEndian()))
2329  return nullptr;
2330 
2331  // If we succeeded, we know that all of the element are specified by Elements
2332  // or are zero if Elements has a null entry. Recast this as a set of
2333  // insertions.
2334  Value *Result = Constant::getNullValue(CI.getType());
2335  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2336  if (!Elements[i]) continue; // Unset element.
2337 
2338  Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2339  IC.Builder.getInt32(i));
2340  }
2341 
2342  return Result;
2343 }
2344 
2345 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2346 /// vector followed by extract element. The backend tends to handle bitcasts of
2347 /// vectors better than bitcasts of scalars because vector registers are
2348 /// usually not type-specific like scalar integer or scalar floating-point.
2350  InstCombinerImpl &IC) {
2351  // TODO: Create and use a pattern matcher for ExtractElementInst.
2352  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
2353  if (!ExtElt || !ExtElt->hasOneUse())
2354  return nullptr;
2355 
2356  // The bitcast must be to a vectorizable type, otherwise we can't make a new
2357  // type to extract from.
2358  Type *DestType = BitCast.getType();
2359  if (!VectorType::isValidElementType(DestType))
2360  return nullptr;
2361 
2362  auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
2363  auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
2364  NewVecType, "bc");
2365  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
2366 }
2367 
2368 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2371  Type *DestTy = BitCast.getType();
2372  BinaryOperator *BO;
2373  if (!DestTy->isIntOrIntVectorTy() ||
2374  !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2375  !BO->isBitwiseLogicOp())
2376  return nullptr;
2377 
2378  // FIXME: This transform is restricted to vector types to avoid backend
2379  // problems caused by creating potentially illegal operations. If a fix-up is
2380  // added to handle that situation, we can remove this check.
2381  if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2382  return nullptr;
2383 
2384  Value *X;
2385  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2386  X->getType() == DestTy && !isa<Constant>(X)) {
2387  // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2388  Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2389  return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2390  }
2391 
2392  if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2393  X->getType() == DestTy && !isa<Constant>(X)) {
2394  // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2395  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2396  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2397  }
2398 
2399  // Canonicalize vector bitcasts to come before vector bitwise logic with a
2400  // constant. This eases recognition of special constants for later ops.
2401  // Example:
2402  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2403  Constant *C;
2404  if (match(BO->getOperand(1), m_Constant(C))) {
2405  // bitcast (logic X, C) --> logic (bitcast X, C')
2406  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2407  Value *CastedC = Builder.CreateBitCast(C, DestTy);
2408  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2409  }
2410 
2411  return nullptr;
2412 }
2413 
2414 /// Change the type of a select if we can eliminate a bitcast.
2417  Value *Cond, *TVal, *FVal;
2418  if (!match(BitCast.getOperand(0),
2419  m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2420  return nullptr;
2421 
2422  // A vector select must maintain the same number of elements in its operands.
2423  Type *CondTy = Cond->getType();
2424  Type *DestTy = BitCast.getType();
2425  if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2426  if (!DestTy->isVectorTy() ||
2427  CondVTy->getElementCount() !=
2428  cast<VectorType>(DestTy)->getElementCount())
2429  return nullptr;
2430 
2431  // FIXME: This transform is restricted from changing the select between
2432  // scalars and vectors to avoid backend problems caused by creating
2433  // potentially illegal operations. If a fix-up is added to handle that
2434  // situation, we can remove this check.
2435  if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2436  return nullptr;
2437 
2438  auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2439  Value *X;
2440  if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2441  !isa<Constant>(X)) {
2442  // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2443  Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2444  return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2445  }
2446 
2447  if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2448  !isa<Constant>(X)) {
2449  // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2450  Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2451  return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2452  }
2453 
2454  return nullptr;
2455 }
2456 
2457 /// Check if all users of CI are StoreInsts.
2458 static bool hasStoreUsersOnly(CastInst &CI) {
2459  for (User *U : CI.users()) {
2460  if (!isa<StoreInst>(U))
2461  return false;
2462  }
2463  return true;
2464 }
2465 
2466 /// This function handles following case
2467 ///
2468 /// A -> B cast
2469 /// PHI
2470 /// B -> A cast
2471 ///
2472 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2473 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2474 Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2475  PHINode *PN) {
2476  // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2477  if (hasStoreUsersOnly(CI))
2478  return nullptr;
2479 
2480  Value *Src = CI.getOperand(0);
2481  Type *SrcTy = Src->getType(); // Type B
2482  Type *DestTy = CI.getType(); // Type A
2483 
2484  SmallVector<PHINode *, 4> PhiWorklist;
2485  SmallSetVector<PHINode *, 4> OldPhiNodes;
2486 
2487  // Find all of the A->B casts and PHI nodes.
2488  // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2489  // OldPhiNodes is used to track all known PHI nodes, before adding a new
2490  // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2491  PhiWorklist.push_back(PN);
2492  OldPhiNodes.insert(PN);
2493  while (!PhiWorklist.empty()) {
2494  auto *OldPN = PhiWorklist.pop_back_val();
2495  for (Value *IncValue : OldPN->incoming_values()) {
2496  if (isa<Constant>(IncValue))
2497  continue;
2498 
2499  if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2500  // If there is a sequence of one or more load instructions, each loaded
2501  // value is used as address of later load instruction, bitcast is
2502  // necessary to change the value type, don't optimize it. For
2503  // simplicity we give up if the load address comes from another load.
2504  Value *Addr = LI->getOperand(0);
2505  if (Addr == &CI || isa<LoadInst>(Addr))
2506  return nullptr;
2507  // Don't tranform "load <256 x i32>, <256 x i32>*" to
2508  // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2509  // TODO: Remove this check when bitcast between vector and x86_amx
2510  // is replaced with a specific intrinsic.
2511  if (DestTy->isX86_AMXTy())
2512  return nullptr;
2513  if (LI->hasOneUse() && LI->isSimple())
2514  continue;
2515  // If a LoadInst has more than one use, changing the type of loaded
2516  // value may create another bitcast.
2517  return nullptr;
2518  }
2519 
2520  if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2521  if (OldPhiNodes.insert(PNode))
2522  PhiWorklist.push_back(PNode);
2523  continue;
2524  }
2525 
2526  auto *BCI = dyn_cast<BitCastInst>(IncValue);
2527  // We can't handle other instructions.
2528  if (!BCI)
2529  return nullptr;
2530 
2531  // Verify it's a A->B cast.
2532  Type *TyA = BCI->getOperand(0)->getType();
2533  Type *TyB = BCI->getType();
2534  if (TyA != DestTy || TyB != SrcTy)
2535  return nullptr;
2536  }
2537  }
2538 
2539  // Check that each user of each old PHI node is something that we can
2540  // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2541  for (auto *OldPN : OldPhiNodes) {
2542  for (User *V : OldPN->users()) {
2543  if (auto *SI = dyn_cast<StoreInst>(V)) {
2544  if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2545  return nullptr;
2546  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2547  // Verify it's a B->A cast.
2548  Type *TyB = BCI->getOperand(0)->getType();
2549  Type *TyA = BCI->getType();
2550  if (TyA != DestTy || TyB != SrcTy)
2551  return nullptr;
2552  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2553  // As long as the user is another old PHI node, then even if we don't
2554  // rewrite it, the PHI web we're considering won't have any users
2555  // outside itself, so it'll be dead.
2556  if (!OldPhiNodes.contains(PHI))
2557  return nullptr;
2558  } else {
2559  return nullptr;
2560  }
2561  }
2562  }
2563 
2564  // For each old PHI node, create a corresponding new PHI node with a type A.
2566  for (auto *OldPN : OldPhiNodes) {
2567  Builder.SetInsertPoint(OldPN);
2568  PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2569  NewPNodes[OldPN] = NewPN;
2570  }
2571 
2572  // Fill in the operands of new PHI nodes.
2573  for (auto *OldPN : OldPhiNodes) {
2574  PHINode *NewPN = NewPNodes[OldPN];
2575  for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2576  Value *V = OldPN->getOperand(j);
2577  Value *NewV = nullptr;
2578  if (auto *C = dyn_cast<Constant>(V)) {
2579  NewV = ConstantExpr::getBitCast(C, DestTy);
2580  } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2581  // Explicitly perform load combine to make sure no opposing transform
2582  // can remove the bitcast in the meantime and trigger an infinite loop.
2583  Builder.SetInsertPoint(LI);
2584  NewV = combineLoadToNewType(*LI, DestTy);
2585  // Remove the old load and its use in the old phi, which itself becomes
2586  // dead once the whole transform finishes.
2587  replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
2588  eraseInstFromFunction(*LI);
2589  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2590  NewV = BCI->getOperand(0);
2591  } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2592  NewV = NewPNodes[PrevPN];
2593  }
2594  assert(NewV);
2595  NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2596  }
2597  }
2598 
2599  // Traverse all accumulated PHI nodes and process its users,
2600  // which are Stores and BitcCasts. Without this processing
2601  // NewPHI nodes could be replicated and could lead to extra
2602  // moves generated after DeSSA.
2603  // If there is a store with type B, change it to type A.
2604 
2605 
2606  // Replace users of BitCast B->A with NewPHI. These will help
2607  // later to get rid off a closure formed by OldPHI nodes.
2608  Instruction *RetVal = nullptr;
2609  for (auto *OldPN : OldPhiNodes) {
2610  PHINode *NewPN = NewPNodes[OldPN];
2611  for (User *V : make_early_inc_range(OldPN->users())) {
2612  if (auto *SI = dyn_cast<StoreInst>(V)) {
2613  assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2614  Builder.SetInsertPoint(SI);
2615  auto *NewBC =
2616  cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2617  SI->setOperand(0, NewBC);
2618  Worklist.push(SI);
2619  assert(hasStoreUsersOnly(*NewBC));
2620  }
2621  else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2622  Type *TyB = BCI->getOperand(0)->getType();
2623  Type *TyA = BCI->getType();
2624  assert(TyA == DestTy && TyB == SrcTy);
2625  (void) TyA;
2626  (void) TyB;
2627  Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2628  if (BCI == &CI)
2629  RetVal = I;
2630  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2631  assert(OldPhiNodes.contains(PHI));
2632  (void) PHI;
2633  } else {
2634  llvm_unreachable("all uses should be handled");
2635  }
2636  }
2637  }
2638 
2639  return RetVal;
2640 }
2641 
2643  const DataLayout &DL) {
2644  Value *Src = CI.getOperand(0);
2645  PointerType *SrcPTy = cast<PointerType>(Src->getType());
2646  PointerType *DstPTy = cast<PointerType>(CI.getType());
2647 
2648  // Bitcasts involving opaque pointers cannot be converted into a GEP.
2649  if (SrcPTy->isOpaque() || DstPTy->isOpaque())
2650  return nullptr;
2651 
2652  Type *DstElTy = DstPTy->getElementType();
2653  Type *SrcElTy = SrcPTy->getElementType();
2654 
2655  // When the type pointed to is not sized the cast cannot be
2656  // turned into a gep.
2657  if (!SrcElTy->isSized())
2658  return nullptr;
2659 
2660  // If the source and destination are pointers, and this cast is equivalent
2661  // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2662  // This can enhance SROA and other transforms that want type-safe pointers.
2663  unsigned NumZeros = 0;
2664  while (SrcElTy && SrcElTy != DstElTy) {
2665  SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
2666  ++NumZeros;
2667  }
2668 
2669  // If we found a path from the src to dest, create the getelementptr now.
2670  if (SrcElTy == DstElTy) {
2671  SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
2673  GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
2674 
2675  // If the source pointer is dereferenceable, then assume it points to an
2676  // allocated object and apply "inbounds" to the GEP.
2677  bool CanBeNull, CanBeFreed;
2678  if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
2679  // In a non-default address space (not 0), a null pointer can not be
2680  // assumed inbounds, so ignore that case (dereferenceable_or_null).
2681  // The reason is that 'null' is not treated differently in these address
2682  // spaces, and we consequently ignore the 'gep inbounds' special case
2683  // for 'null' which allows 'inbounds' on 'null' if the indices are
2684  // zeros.
2685  if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
2686  GEP->setIsInBounds();
2687  }
2688  return GEP;
2689  }
2690  return nullptr;
2691 }
2692 
2694  // If the operands are integer typed then apply the integer transforms,
2695  // otherwise just apply the common ones.
2696  Value *Src = CI.getOperand(0);
2697  Type *SrcTy = Src->getType();
2698  Type *DestTy = CI.getType();
2699 
2700  // Get rid of casts from one type to the same type. These are useless and can
2701  // be replaced by the operand.
2702  if (DestTy == Src->getType())
2703  return replaceInstUsesWith(CI, Src);
2704 
2705  if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2706  // If we are casting a alloca to a pointer to a type of the same
2707  // size, rewrite the allocation instruction to allocate the "right" type.
2708  // There is no need to modify malloc calls because it is their bitcast that
2709  // needs to be cleaned up.
2710  if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2711  if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2712  return V;
2713 
2715  return I;
2716  }
2717 
2718  if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2719  // Beware: messing with this target-specific oddity may cause trouble.
2720  if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2721  Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2722  return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2724  }
2725 
2726  if (isa<IntegerType>(SrcTy)) {
2727  // If this is a cast from an integer to vector, check to see if the input
2728  // is a trunc or zext of a bitcast from vector. If so, we can replace all
2729  // the casts with a shuffle and (potentially) a bitcast.
2730  if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2731  CastInst *SrcCast = cast<CastInst>(Src);
2732  if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2733  if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2735  BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2736  return I;
2737  }
2738 
2739  // If the input is an 'or' instruction, we may be doing shifts and ors to
2740  // assemble the elements of the vector manually. Try to rip the code out
2741  // and replace it with insertelements.
2742  if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2743  return replaceInstUsesWith(CI, V);
2744  }
2745  }
2746 
2747  if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2748  if (SrcVTy->getNumElements() == 1) {
2749  // If our destination is not a vector, then make this a straight
2750  // scalar-scalar cast.
2751  if (!DestTy->isVectorTy()) {
2752  Value *Elem =
2753  Builder.CreateExtractElement(Src,
2755  return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2756  }
2757 
2758  // Otherwise, see if our source is an insert. If so, then use the scalar
2759  // component directly:
2760  // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2761  if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2762  return new BitCastInst(InsElt->getOperand(1), DestTy);
2763  }
2764 
2765  // Convert an artificial vector insert into more analyzable bitwise logic.
2766  unsigned BitWidth = DestTy->getScalarSizeInBits();
2767  Value *X, *Y;
2768  uint64_t IndexC;
2770  m_Value(Y), m_ConstantInt(IndexC)))) &&
2771  DestTy->isIntegerTy() && X->getType() == DestTy &&
2772  Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2773  // Adjust for big endian - the LSBs are at the high index.
2774  if (DL.isBigEndian())
2775  IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2776 
2777  // We only handle (endian-normalized) insert to index 0. Any other insert
2778  // would require a left-shift, so that is an extra instruction.
2779  if (IndexC == 0) {
2780  // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2781  unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2782  APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2783  Value *AndX = Builder.CreateAnd(X, MaskC);
2784  Value *ZextY = Builder.CreateZExt(Y, DestTy);
2785  return BinaryOperator::CreateOr(AndX, ZextY);
2786  }
2787  }
2788  }
2789 
2790  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2791  // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2792  // a bitcast to a vector with the same # elts.
2793  Value *ShufOp0 = Shuf->getOperand(0);
2794  Value *ShufOp1 = Shuf->getOperand(1);
2795  auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2796  auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2797  if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2798  cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2799  ShufElts == SrcVecElts) {
2800  BitCastInst *Tmp;
2801  // If either of the operands is a cast from CI.getType(), then
2802  // evaluating the shuffle in the casted destination's type will allow
2803  // us to eliminate at least one cast.
2804  if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2805  Tmp->getOperand(0)->getType() == DestTy) ||
2806  ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2807  Tmp->getOperand(0)->getType() == DestTy)) {
2808  Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2809  Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2810  // Return a new shuffle vector. Use the same element ID's, as we
2811  // know the vector types match #elts.
2812  return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2813  }
2814  }
2815 
2816  // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2817  // a byte-swap:
2818  // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2819  // TODO: We should match the related pattern for bitreverse.
2820  if (DestTy->isIntegerTy() &&
2821  DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2822  SrcTy->getScalarSizeInBits() == 8 &&
2823  ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
2824  Shuf->isReverse()) {
2825  assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2826  assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2827  Function *Bswap =
2828  Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
2829  Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2830  return CallInst::Create(Bswap, { ScalarX });
2831  }
2832  }
2833 
2834  // Handle the A->B->A cast, and there is an intervening PHI node.
2835  if (PHINode *PN = dyn_cast<PHINode>(Src))
2836  if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2837  return I;
2838 
2839  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2840  return I;
2841 
2843  return I;
2844 
2845  if (Instruction *I = foldBitCastSelect(CI, Builder))
2846  return I;
2847 
2848  if (SrcTy->isPointerTy())
2849  return commonPointerCastTransforms(CI);
2850  return commonCastTransforms(CI);
2851 }
2852 
2854  // If the destination pointer element type is not the same as the source's
2855  // first do a bitcast to the destination type, and then the addrspacecast.
2856  // This allows the cast to be exposed to other transforms.
2857  Value *Src = CI.getOperand(0);
2858  PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
2859  PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
2860 
2861  if (!SrcTy->hasSameElementTypeAs(DestTy)) {
2862  Type *MidTy =
2864  // Handle vectors of pointers.
2865  if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2866  MidTy = VectorType::get(MidTy, VT->getElementCount());
2867 
2868  Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
2869  return new AddrSpaceCastInst(NewBitCast, CI.getType());
2870  }
2871 
2872  return commonPointerCastTransforms(CI);
2873 }
i
i
Definition: README.txt:29
shrinkInsertElt
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
Definition: InstCombineCasts.cpp:718
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:263
llvm::InstCombinerImpl::visitFPToSI
Instruction * visitFPToSI(FPToSIInst &FI)
Definition: InstCombineCasts.cpp:2001
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:367
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2743
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:215
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1399
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::Type::isX86_MMXTy
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:172
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:125
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2172
T
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:673
llvm::Function
Definition: Function.h:62
llvm::Attribute
Definition: Attributes.h:53
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
convertBitCastToGEP
static Instruction * convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder, const DataLayout &DL)
Definition: InstCombineCasts.cpp:2642
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1127
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
canonicalizeBitCastExtElt
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
Definition: InstCombineCasts.cpp:2349
collectInsertionElements
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
Definition: InstCombineCasts.cpp:2211
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5218
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2158
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:104
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
isMultipleOfTypeSize
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2193
llvm::BinaryOperator::CreateFDivFMF
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:274
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
hasStoreUsersOnly
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
Definition: InstCombineCasts.cpp:2458
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3152
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2282
llvm::CastInst::isEliminableCastPair
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
Definition: Instructions.cpp:2929
foldBitCastSelect
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2415
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1954
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:748
Shift
bool Shift
Definition: README.txt:468
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1889
llvm::InstCombinerImpl::visitAddrSpaceCast
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Definition: InstCombineCasts.cpp:2853
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::PatternMatch::m_FPToUI
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
Definition: PatternMatch.h:1682
canAlwaysEvaluateInType
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
Definition: InstCombineCasts.cpp:345
llvm::InstCombinerImpl::visitBitCast
Instruction * visitBitCast(BitCastInst &CI)
Definition: InstCombineCasts.cpp:2693
isBigEndian
static Optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition: CombinerHelper.cpp:110
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::Optional< unsigned >
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1133
llvm::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: ValueTracking.cpp:391
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2316
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
shrinkSplatShuffle
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
Definition: InstCombineCasts.cpp:698
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3318
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:644
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::OverflowingBinaryOperator::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:95
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6234
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2249
llvm::AllocaInst::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:109
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::PatternMatch::m_VScale
VScaleVal_match m_VScale(const DataLayout &DL)
Definition: PatternMatch.h:2487
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:798
llvm::InstCombinerImpl::visitFPExt
Instruction * visitFPExt(CastInst &CI)
Definition: InstCombineCasts.cpp:1936
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Type::getWithNewType
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
Definition: DerivedTypes.h:721
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:685
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::IntToPtrInst::getAddressSpace
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
Definition: Instructions.h:5149
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1567
KnownBits.h
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2750
llvm::PtrToIntInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:5200
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1154
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::KnownBits::isNonNegative
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1355
llvm::SPF_UNKNOWN
@ SPF_UNKNOWN
Definition: ValueTracking.h:686
canEvaluateSExtd
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
Definition: InstCombineCasts.cpp:1444
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1772
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5258
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2753
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:367
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
shrinkFPConstantVector
static Type * shrinkFPConstantVector(Value *V)
Definition: InstCombineCasts.cpp:1652
llvm::Type::getPPC_FP128Ty
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:234
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:657
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_SpecificIntAllowUndef
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:871
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:229
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:747
llvm::InstCombinerImpl::visitSIToFP
Instruction * visitSIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2012
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::InstCombinerImpl::commonCastTransforms
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Definition: InstCombineCasts.cpp:278
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
decomposeSimpleLinearExpr
static Value * decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset)
Analyze 'Val', seeing if it is a simple linear expression.
Definition: InstCombineCasts.cpp:32
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1521
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::CastInst::CreateFPCast
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
Definition: Instructions.cpp:3346
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:652
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:803
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1043
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:394
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2256
optimizeVectorResizeWithIntegerBitCasts
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
Definition: InstCombineCasts.cpp:2119
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:100
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:609
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1804
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
PatternMatch.h
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1539
llvm::CastInst::CreateTruncOrBitCast
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Definition: Instructions.cpp:3228
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::PointerType::isOpaque
bool isOpaque() const
Definition: DerivedTypes.h:678
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:683
optimizeIntegerToVectorInsertions
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
Definition: InstCombineCasts.cpp:2320
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2749
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
llvm::ArrayRef::take_back
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:233
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::InstCombinerImpl::visitPtrToInt
Instruction * visitPtrToInt(PtrToIntInst &CI)
Definition: InstCombineCasts.cpp:2058
llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1115
llvm::PtrToIntInst::getPointerOperand
Value * getPointerOperand()
Gets the pointer operand.
Definition: Instructions.h:5193
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::PatternMatch::m_Shr
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1313
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:677
llvm::InstCombinerImpl::visitTrunc
Instruction * visitTrunc(TruncInst &CI)
Definition: InstCombineCasts.cpp:745
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:245
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2144
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:626
llvm::InstCombinerImpl::visitZExt
Instruction * visitZExt(ZExtInst &CI)
Definition: InstCombineCasts.cpp:1231
llvm::APFloat
Definition: APFloat.h:701
llvm::Type::getFPMantissaWidth
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition: Type.cpp:196
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1979
llvm::InstCombinerImpl::PromoteCastOfAllocation
Instruction * PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI)
If we find a cast of an allocation instruction, try to eliminate the cast by moving the type informat...
Definition: InstCombineCasts.cpp:85
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:166
llvm::Constant::isElementWiseEqual
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:282
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1190
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
shrinkFPConstant
static Type * shrinkFPConstant(ConstantFP *CFP)
Definition: InstCombineCasts.cpp:1632
llvm::Attribute::getVScaleRangeMax
Optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or None when unknown.
Definition: Attributes.cpp:366
llvm::BinaryOperator::CreateFMulFMF
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:269
uint64_t
llvm::FPToSIInst
This class represents a cast from floating point to signed integer.
Definition: Instructions.h:5085
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::InstCombinerImpl::visitIntToPtr
Instruction * visitIntToPtr(IntToPtrInst &CI)
Definition: InstCombineCasts.cpp:2016
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4773
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2807
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1109
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:445
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::InstCombinerImpl::visitFPToUI
Instruction * visitFPToUI(FPToUIInst &FI)
Definition: InstCombineCasts.cpp:1994
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1384
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2815
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:642
llvm::ConstantExpr::getOr
static Constant * getOr(Constant *C1, Constant *C2)
Definition: Constants.cpp:2802
DIBuilder.h
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1103
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1279
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:224
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1000
llvm::InstCombinerImpl::EvaluateInDifferentType
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
Definition: InstCombineCasts.cpp:180
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1741
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1816
llvm::FPToUIInst
This class represents a cast from floating point to unsigned integer.
Definition: Instructions.h:5046
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::replaceAllDbgUsesWith
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2049
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4812
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:750
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:959
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:863
foldBitCastBitwiseLogic
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2369
llvm::ArrayRef< int >
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::InstCombinerImpl::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:487
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:68
DataLayout.h
llvm::PatternMatch::m_SpecificInt_ICMP
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:604
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ConstantExpr::getUMin
static Constant * getUMin(Constant *C1, Constant *C2)
Definition: Constants.cpp:2810
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:746
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5167
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2120
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4929
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:245
llvm::OverflowingBinaryOperator
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:71
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:223
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:431
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1044
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4851
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::InstCombinerImpl::visitSExt
Instruction * visitSExt(SExtInst &CI)
Definition: InstCombineCasts.cpp:1492
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1697
llvm::InstCombinerImpl::commonPointerCastTransforms
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
Definition: InstCombineCasts.cpp:2036
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
canEvaluateTruncated
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
Definition: InstCombineCasts.cpp:381
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::KnownBits
Definition: KnownBits.h:23
llvm::PointerType::hasSameElementTypeAs
bool hasSameElementTypeAs(PointerType *Other)
Return true if both pointer types have the same element type.
Definition: DerivedTypes.h:701
llvm::OverflowingBinaryOperator::hasNoSignedWrap
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:101
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:226
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2699
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:226
canEvaluateZExtd
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
Definition: InstCombineCasts.cpp:1125
llvm::PatternMatch::m_FPToSI
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
Definition: PatternMatch.h:1687
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:196
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::GetElementPtrInst::getTypeAtIndex
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Definition: Instructions.cpp:1728
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantExpr::getLShr
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2822
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5124
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:414
llvm::PatternMatch::m_LogicalShift
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1321
llvm::Constant::mergeUndefsWith
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:824
fitsInFPType
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
Definition: InstCombineCasts.cpp:1625
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:789
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2012
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::AllocaInst::isUsedWithInAlloca
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:143
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
getMinimumFPType
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
Definition: InstCombineCasts.cpp:1684
llvm::PointerType::getWithSamePointeeType
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer is ...
Definition: DerivedTypes.h:666
llvm::FPTruncInst
This class represents a truncation of floating point types.
Definition: Instructions.h:4890
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:678
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:811
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2773
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::PHINode
Definition: Instructions.h:2657
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
getTypeSizeIndex
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2197
llvm::SmallVectorImpl< Value * >
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1173
llvm::PatternMatch::m_IntToPtr
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
Definition: PatternMatch.h:1615
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2271
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::InstCombinerImpl::foldItoFPtoI
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Definition: InstCombineCasts.cpp:1954
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:234
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:382
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
canNotEvaluateInType
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
Definition: InstCombineCasts.cpp:358
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1621
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2750
llvm::InstCombinerImpl::visitFPTrunc
Instruction * visitFPTrunc(FPTruncInst &CI)
Definition: InstCombineCasts.cpp:1752
llvm::InstCombinerImpl::visitUIToFP
Instruction * visitUIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2008
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1121
llvm::InstCombinerImpl::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:482
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:228
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::Type::isX86_AMXTy
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:175
isKnownExactCastIntToFP
static bool isKnownExactCastIntToFP(CastInst &I)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
Definition: InstCombineCasts.cpp:1712
SetVector.h
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:166
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
foldVecTruncToExtElt
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
Definition: InstCombineCasts.cpp:498
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:782
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1823