LLVM  16.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <numeric>
43 #include <utility>
44 
45 #define DEBUG_TYPE "instcombine"
46 
47 using namespace llvm;
48 using namespace PatternMatch;
49 
50 STATISTIC(NumAggregateReconstructionsSimplified,
51  "Number of aggregate reconstructions turned into reuse of the "
52  "original aggregate");
53 
54 /// Return true if the value is cheaper to scalarize than it is to leave as a
55 /// vector operation. If the extract index \p EI is a constant integer then
56 /// some operations may be cheap to scalarize.
57 ///
58 /// FIXME: It's possible to create more instructions than previously existed.
59 static bool cheapToScalarize(Value *V, Value *EI) {
60  ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
61 
62  // If we can pick a scalar constant value out of a vector, that is free.
63  if (auto *C = dyn_cast<Constant>(V))
64  return CEI || C->getSplatValue();
65 
66  if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
67  ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
68  // Index needs to be lower than the minimum size of the vector, because
69  // for scalable vector, the vector size is known at run time.
70  return CEI->getValue().ult(EC.getKnownMinValue());
71  }
72 
73  // An insertelement to the same constant index as our extract will simplify
74  // to the scalar inserted element. An insertelement to a different constant
75  // index is irrelevant to our extract.
77  return CEI;
78 
79  if (match(V, m_OneUse(m_Load(m_Value()))))
80  return true;
81 
82  if (match(V, m_OneUse(m_UnOp())))
83  return true;
84 
85  Value *V0, *V1;
86  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
87  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
88  return true;
89 
90  CmpInst::Predicate UnusedPred;
91  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
92  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
93  return true;
94 
95  return false;
96 }
97 
98 // If we have a PHI node with a vector type that is only used to feed
99 // itself and be an operand of extractelement at a constant location,
100 // try to replace the PHI of the vector type with a PHI of a scalar type.
101 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
102  PHINode *PN) {
104  // The users we want the PHI to have are:
105  // 1) The EI ExtractElement (we already know this)
106  // 2) Possibly more ExtractElements with the same index.
107  // 3) Another operand, which will feed back into the PHI.
108  Instruction *PHIUser = nullptr;
109  for (auto *U : PN->users()) {
110  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
111  if (EI.getIndexOperand() == EU->getIndexOperand())
112  Extracts.push_back(EU);
113  else
114  return nullptr;
115  } else if (!PHIUser) {
116  PHIUser = cast<Instruction>(U);
117  } else {
118  return nullptr;
119  }
120  }
121 
122  if (!PHIUser)
123  return nullptr;
124 
125  // Verify that this PHI user has one use, which is the PHI itself,
126  // and that it is a binary operation which is cheap to scalarize.
127  // otherwise return nullptr.
128  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
129  !(isa<BinaryOperator>(PHIUser)) ||
130  !cheapToScalarize(PHIUser, EI.getIndexOperand()))
131  return nullptr;
132 
133  // Create a scalar PHI node that will replace the vector PHI node
134  // just before the current PHI node.
135  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
137  // Scalarize each PHI operand.
138  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
139  Value *PHIInVal = PN->getIncomingValue(i);
140  BasicBlock *inBB = PN->getIncomingBlock(i);
141  Value *Elt = EI.getIndexOperand();
142  // If the operand is the PHI induction variable:
143  if (PHIInVal == PHIUser) {
144  // Scalarize the binary operation. Its first operand is the
145  // scalar PHI, and the second operand is extracted from the other
146  // vector operand.
147  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
148  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
149  Value *Op = InsertNewInstWith(
150  ExtractElementInst::Create(B0->getOperand(opId), Elt,
151  B0->getOperand(opId)->getName() + ".Elt"),
152  *B0);
153  Value *newPHIUser = InsertNewInstWith(
155  scalarPHI, Op, B0), *B0);
156  scalarPHI->addIncoming(newPHIUser, inBB);
157  } else {
158  // Scalarize PHI input:
159  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
160  // Insert the new instruction into the predecessor basic block.
161  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
162  BasicBlock::iterator InsertPos;
163  if (pos && !isa<PHINode>(pos)) {
164  InsertPos = ++pos->getIterator();
165  } else {
166  InsertPos = inBB->getFirstInsertionPt();
167  }
168 
169  InsertNewInstWith(newEI, *InsertPos);
170 
171  scalarPHI->addIncoming(newEI, inBB);
172  }
173  }
174 
175  for (auto *E : Extracts)
176  replaceInstUsesWith(*E, scalarPHI);
177 
178  return &EI;
179 }
180 
181 Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
182  Value *X;
183  uint64_t ExtIndexC;
184  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
185  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
186  return nullptr;
187 
188  ElementCount NumElts =
189  cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
190  Type *DestTy = Ext.getType();
191  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
192  bool IsBigEndian = DL.isBigEndian();
193 
194  // If we are casting an integer to vector and extracting a portion, that is
195  // a shift-right and truncate.
196  if (X->getType()->isIntegerTy()) {
197  assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
198  "Expected fixed vector type for bitcast from scalar integer");
199 
200  // Big endian requires adjusting the extract index since MSB is at index 0.
201  // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
202  // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
203  if (IsBigEndian)
204  ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
205  unsigned ShiftAmountC = ExtIndexC * DestWidth;
206  if (!ShiftAmountC ||
207  (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
208  Ext.getVectorOperand()->hasOneUse())) {
209  if (ShiftAmountC)
210  X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
211  if (DestTy->isFloatingPointTy()) {
212  Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
213  Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
214  return new BitCastInst(Trunc, DestTy);
215  }
216  return new TruncInst(X, DestTy);
217  }
218  }
219 
220  if (!X->getType()->isVectorTy())
221  return nullptr;
222 
223  // If this extractelement is using a bitcast from a vector of the same number
224  // of elements, see if we can find the source element from the source vector:
225  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
226  auto *SrcTy = cast<VectorType>(X->getType());
227  ElementCount NumSrcElts = SrcTy->getElementCount();
228  if (NumSrcElts == NumElts)
229  if (Value *Elt = findScalarElement(X, ExtIndexC))
230  return new BitCastInst(Elt, DestTy);
231 
232  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
233  "Src and Dst must be the same sort of vector type");
234 
235  // If the source elements are wider than the destination, try to shift and
236  // truncate a subset of scalar bits of an insert op.
237  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
238  Value *Scalar;
239  Value *Vec;
240  uint64_t InsIndexC;
241  if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
242  m_ConstantInt(InsIndexC))))
243  return nullptr;
244 
245  // The extract must be from the subset of vector elements that we inserted
246  // into. Example: if we inserted element 1 of a <2 x i64> and we are
247  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
248  // of elements 4-7 of the bitcasted vector.
249  unsigned NarrowingRatio =
250  NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
251 
252  if (ExtIndexC / NarrowingRatio != InsIndexC) {
253  // Remove insertelement, if we don't use the inserted element.
254  // extractelement (bitcast (insertelement (Vec, b)), a) ->
255  // extractelement (bitcast (Vec), a)
256  // FIXME: this should be removed to SimplifyDemandedVectorElts,
257  // once scale vectors are supported.
258  if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
259  Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
260  return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
261  }
262  return nullptr;
263  }
264 
265  // We are extracting part of the original scalar. How that scalar is
266  // inserted into the vector depends on the endian-ness. Example:
267  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
268  // +--+--+--+--+--+--+--+--+
269  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
270  // extelt <4 x i16> V', 3: | |S2|S3|
271  // +--+--+--+--+--+--+--+--+
272  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
273  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
274  // In this example, we must right-shift little-endian. Big-endian is just a
275  // truncate.
276  unsigned Chunk = ExtIndexC % NarrowingRatio;
277  if (IsBigEndian)
278  Chunk = NarrowingRatio - 1 - Chunk;
279 
280  // Bail out if this is an FP vector to FP vector sequence. That would take
281  // more instructions than we started with unless there is no shift, and it
282  // may not be handled as well in the backend.
283  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
284  bool NeedDestBitcast = DestTy->isFloatingPointTy();
285  if (NeedSrcBitcast && NeedDestBitcast)
286  return nullptr;
287 
288  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
289  unsigned ShAmt = Chunk * DestWidth;
290 
291  // TODO: This limitation is more strict than necessary. We could sum the
292  // number of new instructions and subtract the number eliminated to know if
293  // we can proceed.
294  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
295  if (NeedSrcBitcast || NeedDestBitcast)
296  return nullptr;
297 
298  if (NeedSrcBitcast) {
299  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
300  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
301  }
302 
303  if (ShAmt) {
304  // Bail out if we could end with more instructions than we started with.
305  if (!Ext.getVectorOperand()->hasOneUse())
306  return nullptr;
307  Scalar = Builder.CreateLShr(Scalar, ShAmt);
308  }
309 
310  if (NeedDestBitcast) {
311  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
312  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
313  }
314  return new TruncInst(Scalar, DestTy);
315  }
316 
317  return nullptr;
318 }
319 
320 /// Find elements of V demanded by UserInstr.
322  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
323 
324  // Conservatively assume that all elements are needed.
325  APInt UsedElts(APInt::getAllOnes(VWidth));
326 
327  switch (UserInstr->getOpcode()) {
328  case Instruction::ExtractElement: {
329  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
330  assert(EEI->getVectorOperand() == V);
331  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
332  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
333  UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
334  }
335  break;
336  }
337  case Instruction::ShuffleVector: {
338  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
339  unsigned MaskNumElts =
340  cast<FixedVectorType>(UserInstr->getType())->getNumElements();
341 
342  UsedElts = APInt(VWidth, 0);
343  for (unsigned i = 0; i < MaskNumElts; i++) {
344  unsigned MaskVal = Shuffle->getMaskValue(i);
345  if (MaskVal == -1u || MaskVal >= 2 * VWidth)
346  continue;
347  if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
348  UsedElts.setBit(MaskVal);
349  if (Shuffle->getOperand(1) == V &&
350  ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
351  UsedElts.setBit(MaskVal - VWidth);
352  }
353  break;
354  }
355  default:
356  break;
357  }
358  return UsedElts;
359 }
360 
361 /// Find union of elements of V demanded by all its users.
362 /// If it is known by querying findDemandedEltsBySingleUser that
363 /// no user demands an element of V, then the corresponding bit
364 /// remains unset in the returned value.
366  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
367 
368  APInt UnionUsedElts(VWidth, 0);
369  for (const Use &U : V->uses()) {
370  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
371  UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
372  } else {
373  UnionUsedElts = APInt::getAllOnes(VWidth);
374  break;
375  }
376 
377  if (UnionUsedElts.isAllOnes())
378  break;
379  }
380 
381  return UnionUsedElts;
382 }
383 
384 /// Given a constant index for a extractelement or insertelement instruction,
385 /// return it with the canonical type if it isn't already canonical. We
386 /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
387 /// matter, we just want a consistent type to simplify CSE.
389  const unsigned IndexBW = IndexC->getType()->getBitWidth();
390  if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
391  return nullptr;
392  return ConstantInt::get(IndexC->getContext(),
393  IndexC->getValue().zextOrTrunc(64));
394 }
395 
397  Value *SrcVec = EI.getVectorOperand();
398  Value *Index = EI.getIndexOperand();
399  if (Value *V = simplifyExtractElementInst(SrcVec, Index,
400  SQ.getWithInstruction(&EI)))
401  return replaceInstUsesWith(EI, V);
402 
403  // extractelt (select %x, %vec1, %vec2), %const ->
404  // select %x, %vec1[%const], %vec2[%const]
405  // TODO: Support constant folding of multiple select operands:
406  // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
407  // If the extractelement will for instance try to do out of bounds accesses
408  // because of the values of %c1 and/or %c2, the sequence could be optimized
409  // early. This is currently not possible because constant folding will reach
410  // an unreachable assertion if it doesn't find a constant operand.
411  if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
412  if (SI->getCondition()->getType()->isIntegerTy() &&
413  isa<Constant>(EI.getIndexOperand()))
414  if (Instruction *R = FoldOpIntoSelect(EI, SI))
415  return R;
416 
417  // If extracting a specified index from the vector, see if we can recursively
418  // find a previously computed scalar that was inserted into the vector.
419  auto *IndexC = dyn_cast<ConstantInt>(Index);
420  if (IndexC) {
421  // Canonicalize type of constant indices to i64 to simplify CSE
422  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
423  return replaceOperand(EI, 1, NewIdx);
424 
426  unsigned NumElts = EC.getKnownMinValue();
427 
428  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
429  Intrinsic::ID IID = II->getIntrinsicID();
430  // Index needs to be lower than the minimum size of the vector, because
431  // for scalable vector, the vector size is known at run time.
432  if (IID == Intrinsic::experimental_stepvector &&
433  IndexC->getValue().ult(NumElts)) {
434  Type *Ty = EI.getType();
435  unsigned BitWidth = Ty->getIntegerBitWidth();
436  Value *Idx;
437  // Return index when its value does not exceed the allowed limit
438  // for the element type of the vector, otherwise return undefined.
439  if (IndexC->getValue().getActiveBits() <= BitWidth)
440  Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
441  else
442  Idx = UndefValue::get(Ty);
443  return replaceInstUsesWith(EI, Idx);
444  }
445  }
446 
447  // InstSimplify should handle cases where the index is invalid.
448  // For fixed-length vector, it's invalid to extract out-of-range element.
449  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
450  return nullptr;
451 
452  if (Instruction *I = foldBitcastExtElt(EI))
453  return I;
454 
455  // If there's a vector PHI feeding a scalar use through this extractelement
456  // instruction, try to scalarize the PHI.
457  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
458  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
459  return ScalarPHI;
460  }
461 
462  // TODO come up with a n-ary matcher that subsumes both unary and
463  // binary matchers.
464  UnaryOperator *UO;
465  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
466  // extelt (unop X), Index --> unop (extelt X, Index)
467  Value *X = UO->getOperand(0);
468  Value *E = Builder.CreateExtractElement(X, Index);
470  }
471 
472  BinaryOperator *BO;
473  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
474  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
475  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
476  Value *E0 = Builder.CreateExtractElement(X, Index);
477  Value *E1 = Builder.CreateExtractElement(Y, Index);
478  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
479  }
480 
481  Value *X, *Y;
482  CmpInst::Predicate Pred;
483  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
484  cheapToScalarize(SrcVec, Index)) {
485  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
486  Value *E0 = Builder.CreateExtractElement(X, Index);
487  Value *E1 = Builder.CreateExtractElement(Y, Index);
488  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
489  }
490 
491  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
492  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
493  // instsimplify already handled the case where the indices are constants
494  // and equal by value, if both are constants, they must not be the same
495  // value, extract from the pre-inserted value instead.
496  if (isa<Constant>(IE->getOperand(2)) && IndexC)
497  return replaceOperand(EI, 0, IE->getOperand(0));
498  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
499  auto *VecType = cast<VectorType>(GEP->getType());
500  ElementCount EC = VecType->getElementCount();
501  uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
502  if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
503  // Find out why we have a vector result - these are a few examples:
504  // 1. We have a scalar pointer and a vector of indices, or
505  // 2. We have a vector of pointers and a scalar index, or
506  // 3. We have a vector of pointers and a vector of indices, etc.
507  // Here we only consider combining when there is exactly one vector
508  // operand, since the optimization is less obviously a win due to
509  // needing more than one extractelements.
510 
511  unsigned VectorOps =
512  llvm::count_if(GEP->operands(), [](const Value *V) {
513  return isa<VectorType>(V->getType());
514  });
515  if (VectorOps == 1) {
516  Value *NewPtr = GEP->getPointerOperand();
517  if (isa<VectorType>(NewPtr->getType()))
518  NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
519 
520  SmallVector<Value *> NewOps;
521  for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
522  Value *Op = GEP->getOperand(I);
523  if (isa<VectorType>(Op->getType()))
524  NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
525  else
526  NewOps.push_back(Op);
527  }
528 
530  GEP->getSourceElementType(), NewPtr, NewOps);
531  NewGEP->setIsInBounds(GEP->isInBounds());
532  return NewGEP;
533  }
534  }
535  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
536  // If this is extracting an element from a shufflevector, figure out where
537  // it came from and extract from the appropriate input element instead.
538  // Restrict the following transformation to fixed-length vector.
539  if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
540  int SrcIdx =
541  SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
542  Value *Src;
543  unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
544  ->getNumElements();
545 
546  if (SrcIdx < 0)
547  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
548  if (SrcIdx < (int)LHSWidth)
549  Src = SVI->getOperand(0);
550  else {
551  SrcIdx -= LHSWidth;
552  Src = SVI->getOperand(1);
553  }
556  Src, ConstantInt::get(Int32Ty, SrcIdx, false));
557  }
558  } else if (auto *CI = dyn_cast<CastInst>(I)) {
559  // Canonicalize extractelement(cast) -> cast(extractelement).
560  // Bitcasts can change the number of vector elements, and they cost
561  // nothing.
562  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
563  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
564  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
565  }
566  }
567  }
568 
569  // Run demanded elements after other transforms as this can drop flags on
570  // binops. If there's two paths to the same final result, we prefer the
571  // one which doesn't force us to drop flags.
572  if (IndexC) {
574  unsigned NumElts = EC.getKnownMinValue();
575  // This instruction only demands the single element from the input vector.
576  // Skip for scalable type, the number of elements is unknown at
577  // compile-time.
578  if (!EC.isScalable() && NumElts != 1) {
579  // If the input vector has a single use, simplify it based on this use
580  // property.
581  if (SrcVec->hasOneUse()) {
582  APInt UndefElts(NumElts, 0);
583  APInt DemandedElts(NumElts, 0);
584  DemandedElts.setBit(IndexC->getZExtValue());
585  if (Value *V =
586  SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
587  return replaceOperand(EI, 0, V);
588  } else {
589  // If the input vector has multiple uses, simplify it based on a union
590  // of all elements used.
591  APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
592  if (!DemandedElts.isAllOnes()) {
593  APInt UndefElts(NumElts, 0);
594  if (Value *V = SimplifyDemandedVectorElts(
595  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
596  true /* AllowMultipleUsers */)) {
597  if (V != SrcVec) {
598  SrcVec->replaceAllUsesWith(V);
599  return &EI;
600  }
601  }
602  }
603  }
604  }
605  }
606  return nullptr;
607 }
608 
609 /// If V is a shuffle of values that ONLY returns elements from either LHS or
610 /// RHS, return the shuffle mask and true. Otherwise, return false.
613  assert(LHS->getType() == RHS->getType() &&
614  "Invalid CollectSingleShuffleElements");
615  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
616 
617  if (match(V, m_Undef())) {
618  Mask.assign(NumElts, -1);
619  return true;
620  }
621 
622  if (V == LHS) {
623  for (unsigned i = 0; i != NumElts; ++i)
624  Mask.push_back(i);
625  return true;
626  }
627 
628  if (V == RHS) {
629  for (unsigned i = 0; i != NumElts; ++i)
630  Mask.push_back(i + NumElts);
631  return true;
632  }
633 
634  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
635  // If this is an insert of an extract from some other vector, include it.
636  Value *VecOp = IEI->getOperand(0);
637  Value *ScalarOp = IEI->getOperand(1);
638  Value *IdxOp = IEI->getOperand(2);
639 
640  if (!isa<ConstantInt>(IdxOp))
641  return false;
642  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
643 
644  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
645  // We can handle this if the vector we are inserting into is
646  // transitively ok.
647  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
648  // If so, update the mask to reflect the inserted undef.
649  Mask[InsertedIdx] = -1;
650  return true;
651  }
652  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
653  if (isa<ConstantInt>(EI->getOperand(1))) {
654  unsigned ExtractedIdx =
655  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
656  unsigned NumLHSElts =
657  cast<FixedVectorType>(LHS->getType())->getNumElements();
658 
659  // This must be extracting from either LHS or RHS.
660  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
661  // We can handle this if the vector we are inserting into is
662  // transitively ok.
663  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
664  // If so, update the mask to reflect the inserted value.
665  if (EI->getOperand(0) == LHS) {
666  Mask[InsertedIdx % NumElts] = ExtractedIdx;
667  } else {
668  assert(EI->getOperand(0) == RHS);
669  Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
670  }
671  return true;
672  }
673  }
674  }
675  }
676  }
677 
678  return false;
679 }
680 
681 /// If we have insertion into a vector that is wider than the vector that we
682 /// are extracting from, try to widen the source vector to allow a single
683 /// shufflevector to replace one or more insert/extract pairs.
685  ExtractElementInst *ExtElt,
686  InstCombinerImpl &IC) {
687  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
688  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
689  unsigned NumInsElts = InsVecType->getNumElements();
690  unsigned NumExtElts = ExtVecType->getNumElements();
691 
692  // The inserted-to vector must be wider than the extracted-from vector.
693  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
694  NumExtElts >= NumInsElts)
695  return;
696 
697  // Create a shuffle mask to widen the extended-from vector using poison
698  // values. The mask selects all of the values of the original vector followed
699  // by as many poison values as needed to create a vector of the same length
700  // as the inserted-to vector.
701  SmallVector<int, 16> ExtendMask;
702  for (unsigned i = 0; i < NumExtElts; ++i)
703  ExtendMask.push_back(i);
704  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
705  ExtendMask.push_back(-1);
706 
707  Value *ExtVecOp = ExtElt->getVectorOperand();
708  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
709  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
710  ? ExtVecOpInst->getParent()
711  : ExtElt->getParent();
712 
713  // TODO: This restriction matches the basic block check below when creating
714  // new extractelement instructions. If that limitation is removed, this one
715  // could also be removed. But for now, we just bail out to ensure that we
716  // will replace the extractelement instruction that is feeding our
717  // insertelement instruction. This allows the insertelement to then be
718  // replaced by a shufflevector. If the insertelement is not replaced, we can
719  // induce infinite looping because there's an optimization for extractelement
720  // that will delete our widening shuffle. This would trigger another attempt
721  // here to create that shuffle, and we spin forever.
722  if (InsertionBlock != InsElt->getParent())
723  return;
724 
725  // TODO: This restriction matches the check in visitInsertElementInst() and
726  // prevents an infinite loop caused by not turning the extract/insert pair
727  // into a shuffle. We really should not need either check, but we're lacking
728  // folds for shufflevectors because we're afraid to generate shuffle masks
729  // that the backend can't handle.
730  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
731  return;
732 
733  auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
734 
735  // Insert the new shuffle after the vector operand of the extract is defined
736  // (as long as it's not a PHI) or at the start of the basic block of the
737  // extract, so any subsequent extracts in the same basic block can use it.
738  // TODO: Insert before the earliest ExtractElementInst that is replaced.
739  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
740  WideVec->insertAfter(ExtVecOpInst);
741  else
742  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
743 
744  // Replace extracts from the original narrow vector with extracts from the new
745  // wide vector.
746  for (User *U : ExtVecOp->users()) {
747  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
748  if (!OldExt || OldExt->getParent() != WideVec->getParent())
749  continue;
750  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
751  NewExt->insertAfter(OldExt);
752  IC.replaceInstUsesWith(*OldExt, NewExt);
753  }
754 }
755 
756 /// We are building a shuffle to create V, which is a sequence of insertelement,
757 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
758 /// not rely on the second vector source. Return a std::pair containing the
759 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
760 /// parameter as required.
761 ///
762 /// Note: we intentionally don't try to fold earlier shuffles since they have
763 /// often been chosen carefully to be efficiently implementable on the target.
764 using ShuffleOps = std::pair<Value *, Value *>;
765 
767  Value *PermittedRHS,
768  InstCombinerImpl &IC) {
769  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
770  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
771 
772  if (match(V, m_Undef())) {
773  Mask.assign(NumElts, -1);
774  return std::make_pair(
775  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
776  }
777 
778  if (isa<ConstantAggregateZero>(V)) {
779  Mask.assign(NumElts, 0);
780  return std::make_pair(V, nullptr);
781  }
782 
783  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
784  // If this is an insert of an extract from some other vector, include it.
785  Value *VecOp = IEI->getOperand(0);
786  Value *ScalarOp = IEI->getOperand(1);
787  Value *IdxOp = IEI->getOperand(2);
788 
789  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
790  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
791  unsigned ExtractedIdx =
792  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
793  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
794 
795  // Either the extracted from or inserted into vector must be RHSVec,
796  // otherwise we'd end up with a shuffle of three inputs.
797  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
798  Value *RHS = EI->getOperand(0);
799  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
800  assert(LR.second == nullptr || LR.second == RHS);
801 
802  if (LR.first->getType() != RHS->getType()) {
803  // Although we are giving up for now, see if we can create extracts
804  // that match the inserts for another round of combining.
805  replaceExtractElements(IEI, EI, IC);
806 
807  // We tried our best, but we can't find anything compatible with RHS
808  // further up the chain. Return a trivial shuffle.
809  for (unsigned i = 0; i < NumElts; ++i)
810  Mask[i] = i;
811  return std::make_pair(V, nullptr);
812  }
813 
814  unsigned NumLHSElts =
815  cast<FixedVectorType>(RHS->getType())->getNumElements();
816  Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
817  return std::make_pair(LR.first, RHS);
818  }
819 
820  if (VecOp == PermittedRHS) {
821  // We've gone as far as we can: anything on the other side of the
822  // extractelement will already have been converted into a shuffle.
823  unsigned NumLHSElts =
824  cast<FixedVectorType>(EI->getOperand(0)->getType())
825  ->getNumElements();
826  for (unsigned i = 0; i != NumElts; ++i)
827  Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
828  return std::make_pair(EI->getOperand(0), PermittedRHS);
829  }
830 
831  // If this insertelement is a chain that comes from exactly these two
832  // vectors, return the vector and the effective shuffle.
833  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
834  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
835  Mask))
836  return std::make_pair(EI->getOperand(0), PermittedRHS);
837  }
838  }
839  }
840 
841  // Otherwise, we can't do anything fancy. Return an identity vector.
842  for (unsigned i = 0; i != NumElts; ++i)
843  Mask.push_back(i);
844  return std::make_pair(V, nullptr);
845 }
846 
847 /// Look for chain of insertvalue's that fully define an aggregate, and trace
848 /// back the values inserted, see if they are all were extractvalue'd from
849 /// the same source aggregate from the exact same element indexes.
850 /// If they were, just reuse the source aggregate.
851 /// This potentially deals with PHI indirections.
853  InsertValueInst &OrigIVI) {
854  Type *AggTy = OrigIVI.getType();
855  unsigned NumAggElts;
856  switch (AggTy->getTypeID()) {
857  case Type::StructTyID:
858  NumAggElts = AggTy->getStructNumElements();
859  break;
860  case Type::ArrayTyID:
861  NumAggElts = AggTy->getArrayNumElements();
862  break;
863  default:
864  llvm_unreachable("Unhandled aggregate type?");
865  }
866 
867  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
868  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
869  // FIXME: any interesting patterns to be caught with larger limit?
870  assert(NumAggElts > 0 && "Aggregate should have elements.");
871  if (NumAggElts > 2)
872  return nullptr;
873 
874  static constexpr auto NotFound = None;
875  static constexpr auto FoundMismatch = nullptr;
876 
877  // Try to find a value of each element of an aggregate.
878  // FIXME: deal with more complex, not one-dimensional, aggregate types
879  SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
880 
881  // Do we know values for each element of the aggregate?
882  auto KnowAllElts = [&AggElts]() {
883  return !llvm::is_contained(AggElts, NotFound);
884  };
885 
886  int Depth = 0;
887 
888  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
889  // every element being overwritten twice, which should never happen.
890  static const int DepthLimit = 2 * NumAggElts;
891 
892  // Recurse up the chain of `insertvalue` aggregate operands until either we've
893  // reconstructed full initializer or can't visit any more `insertvalue`'s.
894  for (InsertValueInst *CurrIVI = &OrigIVI;
895  Depth < DepthLimit && CurrIVI && !KnowAllElts();
896  CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
897  ++Depth) {
898  auto *InsertedValue =
899  dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
900  if (!InsertedValue)
901  return nullptr; // Inserted value must be produced by an instruction.
902 
903  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
904 
905  // Don't bother with more than single-level aggregates.
906  if (Indices.size() != 1)
907  return nullptr; // FIXME: deal with more complex aggregates?
908 
909  // Now, we may have already previously recorded the value for this element
910  // of an aggregate. If we did, that means the CurrIVI will later be
911  // overwritten with the already-recorded value. But if not, let's record it!
912  Optional<Instruction *> &Elt = AggElts[Indices.front()];
913  Elt = Elt.value_or(InsertedValue);
914 
915  // FIXME: should we handle chain-terminating undef base operand?
916  }
917 
918  // Was that sufficient to deduce the full initializer for the aggregate?
919  if (!KnowAllElts())
920  return nullptr; // Give up then.
921 
922  // We now want to find the source[s] of the aggregate elements we've found.
923  // And with "source" we mean the original aggregate[s] from which
924  // the inserted elements were extracted. This may require PHI translation.
925 
926  enum class AggregateDescription {
927  /// When analyzing the value that was inserted into an aggregate, we did
928  /// not manage to find defining `extractvalue` instruction to analyze.
929  NotFound,
930  /// When analyzing the value that was inserted into an aggregate, we did
931  /// manage to find defining `extractvalue` instruction[s], and everything
932  /// matched perfectly - aggregate type, element insertion/extraction index.
933  Found,
934  /// When analyzing the value that was inserted into an aggregate, we did
935  /// manage to find defining `extractvalue` instruction, but there was
936  /// a mismatch: either the source type from which the extraction was didn't
937  /// match the aggregate type into which the insertion was,
938  /// or the extraction/insertion channels mismatched,
939  /// or different elements had different source aggregates.
940  FoundMismatch
941  };
942  auto Describe = [](Optional<Value *> SourceAggregate) {
943  if (SourceAggregate == NotFound)
945  if (*SourceAggregate == FoundMismatch)
946  return AggregateDescription::FoundMismatch;
947  return AggregateDescription::Found;
948  };
949 
950  // Given the value \p Elt that was being inserted into element \p EltIdx of an
951  // aggregate AggTy, see if \p Elt was originally defined by an
952  // appropriate extractvalue (same element index, same aggregate type).
953  // If found, return the source aggregate from which the extraction was.
954  // If \p PredBB is provided, does PHI translation of an \p Elt first.
955  auto FindSourceAggregate =
956  [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
958  // For now(?), only deal with, at most, a single level of PHI indirection.
959  if (UseBB && PredBB)
960  Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
961  // FIXME: deal with multiple levels of PHI indirection?
962 
963  // Did we find an extraction?
964  auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
965  if (!EVI)
966  return NotFound;
967 
968  Value *SourceAggregate = EVI->getAggregateOperand();
969 
970  // Is the extraction from the same type into which the insertion was?
971  if (SourceAggregate->getType() != AggTy)
972  return FoundMismatch;
973  // And the element index doesn't change between extraction and insertion?
974  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
975  return FoundMismatch;
976 
977  return SourceAggregate; // AggregateDescription::Found
978  };
979 
980  // Given elements AggElts that were constructing an aggregate OrigIVI,
981  // see if we can find appropriate source aggregate for each of the elements,
982  // and see it's the same aggregate for each element. If so, return it.
983  auto FindCommonSourceAggregate =
984  [&](Optional<BasicBlock *> UseBB,
986  Optional<Value *> SourceAggregate;
987 
988  for (auto I : enumerate(AggElts)) {
989  assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
990  "We don't store nullptr in SourceAggregate!");
991  assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
992  (I.index() != 0) &&
993  "SourceAggregate should be valid after the first element,");
994 
995  // For this element, is there a plausible source aggregate?
996  // FIXME: we could special-case undef element, IFF we know that in the
997  // source aggregate said element isn't poison.
998  Optional<Value *> SourceAggregateForElement =
999  FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1000 
1001  // Okay, what have we found? Does that correlate with previous findings?
1002 
1003  // Regardless of whether or not we have previously found source
1004  // aggregate for previous elements (if any), if we didn't find one for
1005  // this element, passthrough whatever we have just found.
1006  if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1007  return SourceAggregateForElement;
1008 
1009  // Okay, we have found source aggregate for this element.
1010  // Let's see what we already know from previous elements, if any.
1011  switch (Describe(SourceAggregate)) {
1013  // This is apparently the first element that we have examined.
1014  SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1015  continue; // Great, now look at next element.
1016  case AggregateDescription::Found:
1017  // We have previously already successfully examined other elements.
1018  // Is this the same source aggregate we've found for other elements?
1019  if (*SourceAggregateForElement != *SourceAggregate)
1020  return FoundMismatch;
1021  continue; // Still the same aggregate, look at next element.
1022  case AggregateDescription::FoundMismatch:
1023  llvm_unreachable("Can't happen. We would have early-exited then.");
1024  };
1025  }
1026 
1027  assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1028  "Must be a valid Value");
1029  return *SourceAggregate;
1030  };
1031 
1032  Optional<Value *> SourceAggregate;
1033 
1034  // Can we find the source aggregate without looking at predecessors?
1035  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
1036  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1037  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1038  return nullptr; // Conflicting source aggregates!
1039  ++NumAggregateReconstructionsSimplified;
1040  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1041  }
1042 
1043  // Okay, apparently we need to look at predecessors.
1044 
1045  // We should be smart about picking the "use" basic block, which will be the
1046  // merge point for aggregate, where we'll insert the final PHI that will be
1047  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1048  // We should look in which blocks each of the AggElts is being defined,
1049  // they all should be defined in the same basic block.
1050  BasicBlock *UseBB = nullptr;
1051 
1052  for (const Optional<Instruction *> &I : AggElts) {
1053  BasicBlock *BB = (*I)->getParent();
1054  // If it's the first instruction we've encountered, record the basic block.
1055  if (!UseBB) {
1056  UseBB = BB;
1057  continue;
1058  }
1059  // Otherwise, this must be the same basic block we've seen previously.
1060  if (UseBB != BB)
1061  return nullptr;
1062  }
1063 
1064  // If *all* of the elements are basic-block-independent, meaning they are
1065  // either function arguments, or constant expressions, then if we didn't
1066  // handle them without predecessor-aware handling, we won't handle them now.
1067  if (!UseBB)
1068  return nullptr;
1069 
1070  // If we didn't manage to find source aggregate without looking at
1071  // predecessors, and there are no predecessors to look at, then we're done.
1072  if (pred_empty(UseBB))
1073  return nullptr;
1074 
1075  // Arbitrary predecessor count limit.
1076  static const int PredCountLimit = 64;
1077 
1078  // Cache the (non-uniqified!) list of predecessors in a vector,
1079  // checking the limit at the same time for efficiency.
1080  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1081  for (BasicBlock *Pred : predecessors(UseBB)) {
1082  // Don't bother if there are too many predecessors.
1083  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1084  return nullptr;
1085  Preds.emplace_back(Pred);
1086  }
1087 
1088  // For each predecessor, what is the source aggregate,
1089  // from which all the elements were originally extracted from?
1090  // Note that we want for the map to have stable iteration order!
1091  SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1092  for (BasicBlock *Pred : Preds) {
1093  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1094  SourceAggregates.insert({Pred, nullptr});
1095  // Did we already evaluate this predecessor?
1096  if (!IV.second)
1097  continue;
1098 
1099  // Let's hope that when coming from predecessor Pred, all elements of the
1100  // aggregate produced by OrigIVI must have been originally extracted from
1101  // the same aggregate. Is that so? Can we find said original aggregate?
1102  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1103  if (Describe(SourceAggregate) != AggregateDescription::Found)
1104  return nullptr; // Give up.
1105  IV.first->second = *SourceAggregate;
1106  }
1107 
1108  // All good! Now we just need to thread the source aggregates here.
1109  // Note that we have to insert the new PHI here, ourselves, because we can't
1110  // rely on InstCombinerImpl::run() inserting it into the right basic block.
1111  // Note that the same block can be a predecessor more than once,
1112  // and we need to preserve that invariant for the PHI node.
1114  Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1115  auto *PHI =
1116  Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1117  for (BasicBlock *Pred : Preds)
1118  PHI->addIncoming(SourceAggregates[Pred], Pred);
1119 
1120  ++NumAggregateReconstructionsSimplified;
1121  return replaceInstUsesWith(OrigIVI, PHI);
1122 }
1123 
1124 /// Try to find redundant insertvalue instructions, like the following ones:
1125 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1126 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1127 /// Here the second instruction inserts values at the same indices, as the
1128 /// first one, making the first one redundant.
1129 /// It should be transformed to:
1130 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1132  bool IsRedundant = false;
1133  ArrayRef<unsigned int> FirstIndices = I.getIndices();
1134 
1135  // If there is a chain of insertvalue instructions (each of them except the
1136  // last one has only one use and it's another insertvalue insn from this
1137  // chain), check if any of the 'children' uses the same indices as the first
1138  // instruction. In this case, the first one is redundant.
1139  Value *V = &I;
1140  unsigned Depth = 0;
1141  while (V->hasOneUse() && Depth < 10) {
1142  User *U = V->user_back();
1143  auto UserInsInst = dyn_cast<InsertValueInst>(U);
1144  if (!UserInsInst || U->getOperand(0) != V)
1145  break;
1146  if (UserInsInst->getIndices() == FirstIndices) {
1147  IsRedundant = true;
1148  break;
1149  }
1150  V = UserInsInst;
1151  Depth++;
1152  }
1153 
1154  if (IsRedundant)
1155  return replaceInstUsesWith(I, I.getOperand(0));
1156 
1157  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1158  return NewI;
1159 
1160  return nullptr;
1161 }
1162 
1164  // Can not analyze scalable type, the number of elements is not a compile-time
1165  // constant.
1166  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1167  return false;
1168 
1169  int MaskSize = Shuf.getShuffleMask().size();
1170  int VecSize =
1171  cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1172 
1173  // A vector select does not change the size of the operands.
1174  if (MaskSize != VecSize)
1175  return false;
1176 
1177  // Each mask element must be undefined or choose a vector element from one of
1178  // the source operands without crossing vector lanes.
1179  for (int i = 0; i != MaskSize; ++i) {
1180  int Elt = Shuf.getMaskValue(i);
1181  if (Elt != -1 && Elt != i && Elt != i + VecSize)
1182  return false;
1183  }
1184 
1185  return true;
1186 }
1187 
1188 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1189 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1190 /// shufflevector(insertelt(X, %k, 0), poison, zero)
1192  // We are interested in the last insert in a chain. So if this insert has a
1193  // single user and that user is an insert, bail.
1194  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1195  return nullptr;
1196 
1197  VectorType *VecTy = InsElt.getType();
1198  // Can not handle scalable type, the number of elements is not a compile-time
1199  // constant.
1200  if (isa<ScalableVectorType>(VecTy))
1201  return nullptr;
1202  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1203 
1204  // Do not try to do this for a one-element vector, since that's a nop,
1205  // and will cause an inf-loop.
1206  if (NumElements == 1)
1207  return nullptr;
1208 
1209  Value *SplatVal = InsElt.getOperand(1);
1210  InsertElementInst *CurrIE = &InsElt;
1211  SmallBitVector ElementPresent(NumElements, false);
1212  InsertElementInst *FirstIE = nullptr;
1213 
1214  // Walk the chain backwards, keeping track of which indices we inserted into,
1215  // until we hit something that isn't an insert of the splatted value.
1216  while (CurrIE) {
1217  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1218  if (!Idx || CurrIE->getOperand(1) != SplatVal)
1219  return nullptr;
1220 
1221  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1222  // Check none of the intermediate steps have any additional uses, except
1223  // for the root insertelement instruction, which can be re-used, if it
1224  // inserts at position 0.
1225  if (CurrIE != &InsElt &&
1226  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1227  return nullptr;
1228 
1229  ElementPresent[Idx->getZExtValue()] = true;
1230  FirstIE = CurrIE;
1231  CurrIE = NextIE;
1232  }
1233 
1234  // If this is just a single insertelement (not a sequence), we are done.
1235  if (FirstIE == &InsElt)
1236  return nullptr;
1237 
1238  // If we are not inserting into an undef vector, make sure we've seen an
1239  // insert into every element.
1240  // TODO: If the base vector is not undef, it might be better to create a splat
1241  // and then a select-shuffle (blend) with the base vector.
1242  if (!match(FirstIE->getOperand(0), m_Undef()))
1243  if (!ElementPresent.all())
1244  return nullptr;
1245 
1246  // Create the insert + shuffle.
1247  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1248  PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1249  Constant *Zero = ConstantInt::get(Int32Ty, 0);
1250  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1251  FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1252 
1253  // Splat from element 0, but replace absent elements with undef in the mask.
1254  SmallVector<int, 16> Mask(NumElements, 0);
1255  for (unsigned i = 0; i != NumElements; ++i)
1256  if (!ElementPresent[i])
1257  Mask[i] = -1;
1258 
1259  return new ShuffleVectorInst(FirstIE, Mask);
1260 }
1261 
1262 /// Try to fold an insert element into an existing splat shuffle by changing
1263 /// the shuffle's mask to include the index of this insert element.
1265  // Check if the vector operand of this insert is a canonical splat shuffle.
1266  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1267  if (!Shuf || !Shuf->isZeroEltSplat())
1268  return nullptr;
1269 
1270  // Bail out early if shuffle is scalable type. The number of elements in
1271  // shuffle mask is unknown at compile-time.
1272  if (isa<ScalableVectorType>(Shuf->getType()))
1273  return nullptr;
1274 
1275  // Check for a constant insertion index.
1276  uint64_t IdxC;
1277  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1278  return nullptr;
1279 
1280  // Check if the splat shuffle's input is the same as this insert's scalar op.
1281  Value *X = InsElt.getOperand(1);
1282  Value *Op0 = Shuf->getOperand(0);
1283  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1284  return nullptr;
1285 
1286  // Replace the shuffle mask element at the index of this insert with a zero.
1287  // For example:
1288  // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1289  // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1290  unsigned NumMaskElts =
1291  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1292  SmallVector<int, 16> NewMask(NumMaskElts);
1293  for (unsigned i = 0; i != NumMaskElts; ++i)
1294  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1295 
1296  return new ShuffleVectorInst(Op0, NewMask);
1297 }
1298 
1299 /// Try to fold an extract+insert element into an existing identity shuffle by
1300 /// changing the shuffle's mask to include the index of this insert element.
1302  // Check if the vector operand of this insert is an identity shuffle.
1303  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1304  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1305  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1306  return nullptr;
1307 
1308  // Bail out early if shuffle is scalable type. The number of elements in
1309  // shuffle mask is unknown at compile-time.
1310  if (isa<ScalableVectorType>(Shuf->getType()))
1311  return nullptr;
1312 
1313  // Check for a constant insertion index.
1314  uint64_t IdxC;
1315  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1316  return nullptr;
1317 
1318  // Check if this insert's scalar op is extracted from the identity shuffle's
1319  // input vector.
1320  Value *Scalar = InsElt.getOperand(1);
1321  Value *X = Shuf->getOperand(0);
1322  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1323  return nullptr;
1324 
1325  // Replace the shuffle mask element at the index of this extract+insert with
1326  // that same index value.
1327  // For example:
1328  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1329  unsigned NumMaskElts =
1330  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1331  SmallVector<int, 16> NewMask(NumMaskElts);
1332  ArrayRef<int> OldMask = Shuf->getShuffleMask();
1333  for (unsigned i = 0; i != NumMaskElts; ++i) {
1334  if (i != IdxC) {
1335  // All mask elements besides the inserted element remain the same.
1336  NewMask[i] = OldMask[i];
1337  } else if (OldMask[i] == (int)IdxC) {
1338  // If the mask element was already set, there's nothing to do
1339  // (demanded elements analysis may unset it later).
1340  return nullptr;
1341  } else {
1342  assert(OldMask[i] == UndefMaskElem &&
1343  "Unexpected shuffle mask element for identity shuffle");
1344  NewMask[i] = IdxC;
1345  }
1346  }
1347 
1348  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1349 }
1350 
1351 /// If we have an insertelement instruction feeding into another insertelement
1352 /// and the 2nd is inserting a constant into the vector, canonicalize that
1353 /// constant insertion before the insertion of a variable:
1354 ///
1355 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1356 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1357 ///
1358 /// This has the potential of eliminating the 2nd insertelement instruction
1359 /// via constant folding of the scalar constant into a vector constant.
1362  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1363  if (!InsElt1 || !InsElt1->hasOneUse())
1364  return nullptr;
1365 
1366  Value *X, *Y;
1367  Constant *ScalarC;
1368  ConstantInt *IdxC1, *IdxC2;
1369  if (match(InsElt1->getOperand(0), m_Value(X)) &&
1370  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1371  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1372  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1373  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1374  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1375  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1376  }
1377 
1378  return nullptr;
1379 }
1380 
1381 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1382 /// --> shufflevector X, CVec', Mask'
1384  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1385  // Bail out if the parent has more than one use. In that case, we'd be
1386  // replacing the insertelt with a shuffle, and that's not a clear win.
1387  if (!Inst || !Inst->hasOneUse())
1388  return nullptr;
1389  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1390  // The shuffle must have a constant vector operand. The insertelt must have
1391  // a constant scalar being inserted at a constant position in the vector.
1392  Constant *ShufConstVec, *InsEltScalar;
1393  uint64_t InsEltIndex;
1394  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1395  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1396  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1397  return nullptr;
1398 
1399  // Adding an element to an arbitrary shuffle could be expensive, but a
1400  // shuffle that selects elements from vectors without crossing lanes is
1401  // assumed cheap.
1402  // If we're just adding a constant into that shuffle, it will still be
1403  // cheap.
1404  if (!isShuffleEquivalentToSelect(*Shuf))
1405  return nullptr;
1406 
1407  // From the above 'select' check, we know that the mask has the same number
1408  // of elements as the vector input operands. We also know that each constant
1409  // input element is used in its lane and can not be used more than once by
1410  // the shuffle. Therefore, replace the constant in the shuffle's constant
1411  // vector with the insertelt constant. Replace the constant in the shuffle's
1412  // mask vector with the insertelt index plus the length of the vector
1413  // (because the constant vector operand of a shuffle is always the 2nd
1414  // operand).
1415  ArrayRef<int> Mask = Shuf->getShuffleMask();
1416  unsigned NumElts = Mask.size();
1417  SmallVector<Constant *, 16> NewShufElts(NumElts);
1418  SmallVector<int, 16> NewMaskElts(NumElts);
1419  for (unsigned I = 0; I != NumElts; ++I) {
1420  if (I == InsEltIndex) {
1421  NewShufElts[I] = InsEltScalar;
1422  NewMaskElts[I] = InsEltIndex + NumElts;
1423  } else {
1424  // Copy over the existing values.
1425  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1426  NewMaskElts[I] = Mask[I];
1427  }
1428 
1429  // Bail if we failed to find an element.
1430  if (!NewShufElts[I])
1431  return nullptr;
1432  }
1433 
1434  // Create new operands for a shuffle that includes the constant of the
1435  // original insertelt. The old shuffle will be dead now.
1436  return new ShuffleVectorInst(Shuf->getOperand(0),
1437  ConstantVector::get(NewShufElts), NewMaskElts);
1438  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1439  // Transform sequences of insertelements ops with constant data/indexes into
1440  // a single shuffle op.
1441  // Can not handle scalable type, the number of elements needed to create
1442  // shuffle mask is not a compile-time constant.
1443  if (isa<ScalableVectorType>(InsElt.getType()))
1444  return nullptr;
1445  unsigned NumElts =
1446  cast<FixedVectorType>(InsElt.getType())->getNumElements();
1447 
1448  uint64_t InsertIdx[2];
1449  Constant *Val[2];
1450  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1451  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1452  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1453  !match(IEI->getOperand(1), m_Constant(Val[1])))
1454  return nullptr;
1455  SmallVector<Constant *, 16> Values(NumElts);
1456  SmallVector<int, 16> Mask(NumElts);
1457  auto ValI = std::begin(Val);
1458  // Generate new constant vector and mask.
1459  // We have 2 values/masks from the insertelements instructions. Insert them
1460  // into new value/mask vectors.
1461  for (uint64_t I : InsertIdx) {
1462  if (!Values[I]) {
1463  Values[I] = *ValI;
1464  Mask[I] = NumElts + I;
1465  }
1466  ++ValI;
1467  }
1468  // Remaining values are filled with 'undef' values.
1469  for (unsigned I = 0; I < NumElts; ++I) {
1470  if (!Values[I]) {
1471  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1472  Mask[I] = I;
1473  }
1474  }
1475  // Create new operands for a shuffle that includes the constant of the
1476  // original insertelt.
1477  return new ShuffleVectorInst(IEI->getOperand(0),
1478  ConstantVector::get(Values), Mask);
1479  }
1480  return nullptr;
1481 }
1482 
1483 /// If both the base vector and the inserted element are extended from the same
1484 /// type, do the insert element in the narrow source type followed by extend.
1485 /// TODO: This can be extended to include other cast opcodes, but particularly
1486 /// if we create a wider insertelement, make sure codegen is not harmed.
1489  // We are creating a vector extend. If the original vector extend has another
1490  // use, that would mean we end up with 2 vector extends, so avoid that.
1491  // TODO: We could ease the use-clause to "if at least one op has one use"
1492  // (assuming that the source types match - see next TODO comment).
1493  Value *Vec = InsElt.getOperand(0);
1494  if (!Vec->hasOneUse())
1495  return nullptr;
1496 
1497  Value *Scalar = InsElt.getOperand(1);
1498  Value *X, *Y;
1499  CastInst::CastOps CastOpcode;
1500  if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1501  CastOpcode = Instruction::FPExt;
1502  else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1503  CastOpcode = Instruction::SExt;
1504  else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1505  CastOpcode = Instruction::ZExt;
1506  else
1507  return nullptr;
1508 
1509  // TODO: We can allow mismatched types by creating an intermediate cast.
1510  if (X->getType()->getScalarType() != Y->getType())
1511  return nullptr;
1512 
1513  // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1514  Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1515  return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1516 }
1517 
1518 /// Try to convert scalar extraction ops (shift+trunc) with insertelt to
1519 /// bitcast and shuffle:
1520 /// inselt V, (lshr (trunc X)), IndexC --> shuffle (bitcast X), V, Mask
1521 static Instruction *foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian,
1523  // inselt undef, (trunc T), IndexC
1524  // TODO: Allow any base vector value.
1525  // TODO: The one-use limitation could be removed for some cases (eg, no
1526  // extra shuffle is needed and a shift is eliminated).
1527  auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1528  Value *T, *V = InsElt.getOperand(0);
1529  uint64_t IndexC;
1530  if (!VTy || !match(InsElt.getOperand(1), m_OneUse(m_Trunc(m_Value(T)))) ||
1531  !match(InsElt.getOperand(2), m_ConstantInt(IndexC)) ||
1532  !match(V, m_Undef()))
1533  return nullptr;
1534 
1535  Type *SrcTy = T->getType();
1536  unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1537  unsigned VecEltWidth = VTy->getScalarSizeInBits();
1538  if (ScalarWidth % VecEltWidth != 0)
1539  return nullptr;
1540 
1541  unsigned NumEltsInScalar = ScalarWidth / VecEltWidth;
1542  Value *X = T;
1543  if ((IsBigEndian && IndexC == NumEltsInScalar - 1) ||
1544  (!IsBigEndian && IndexC == 0)) {
1545  // The insert is to the LSB end of the vector (depends on endian).
1546  // That's all we need.
1547  } else {
1548  // TODO: Look through a shift-right and translate the insert index.
1549  return nullptr;
1550  }
1551 
1552  // Bitcast the scalar to a vector type with the destination element type.
1553  Type *CastTy = FixedVectorType::get(VTy->getElementType(), NumEltsInScalar);
1554  Value *VecX = Builder.CreateBitCast(X, CastTy, "vec." + X->getName());
1555 
1556  unsigned NumElts = VTy->getNumElements();
1557  if (NumElts > NumEltsInScalar) {
1558  // Pad the source vector with undef elements, so it matches the dest type.
1559  SmallVector<int> IdentityPaddedMask(NumElts, UndefMaskElem);
1560  for (unsigned i = 0; i != NumEltsInScalar; ++i)
1561  IdentityPaddedMask[i] = i;
1562  VecX = Builder.CreateShuffleVector(VecX, IdentityPaddedMask);
1563  } else if (NumElts < NumEltsInScalar) {
1564  // Narrow the source vector, so it matches the dest type.
1565  SmallVector<int> IdentityExtractMask(NumElts);
1566  std::iota(IdentityExtractMask.begin(), IdentityExtractMask.end(), 0);
1567  VecX = Builder.CreateShuffleVector(VecX, IdentityExtractMask);
1568  }
1569 
1570  // Insert the truncated element using a select-shuffle. All lanes but one are
1571  // from the base vector V.
1572  SmallVector<int> SelectMask(NumElts);
1573  std::iota(SelectMask.begin(), SelectMask.end(), 0);
1574  SelectMask[IndexC] = (int)IndexC + NumElts;
1575  return new ShuffleVectorInst(V, VecX, SelectMask);
1576 }
1577 
1579  Value *VecOp = IE.getOperand(0);
1580  Value *ScalarOp = IE.getOperand(1);
1581  Value *IdxOp = IE.getOperand(2);
1582 
1583  if (auto *V = simplifyInsertElementInst(
1584  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1585  return replaceInstUsesWith(IE, V);
1586 
1587  // Canonicalize type of constant indices to i64 to simplify CSE
1588  if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp))
1589  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1590  return replaceOperand(IE, 2, NewIdx);
1591 
1592  // If the scalar is bitcast and inserted into undef, do the insert in the
1593  // source type followed by bitcast.
1594  // TODO: Generalize for insert into any constant, not just undef?
1595  Value *ScalarSrc;
1596  if (match(VecOp, m_Undef()) &&
1597  match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1598  (ScalarSrc->getType()->isIntegerTy() ||
1599  ScalarSrc->getType()->isFloatingPointTy())) {
1600  // inselt undef, (bitcast ScalarSrc), IdxOp -->
1601  // bitcast (inselt undef, ScalarSrc, IdxOp)
1602  Type *ScalarTy = ScalarSrc->getType();
1603  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1604  UndefValue *NewUndef = UndefValue::get(VecTy);
1605  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1606  return new BitCastInst(NewInsElt, IE.getType());
1607  }
1608 
1609  // If the vector and scalar are both bitcast from the same element type, do
1610  // the insert in that source type followed by bitcast.
1611  Value *VecSrc;
1612  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1613  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1614  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1615  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1616  cast<VectorType>(VecSrc->getType())->getElementType() ==
1617  ScalarSrc->getType()) {
1618  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1619  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1620  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1621  return new BitCastInst(NewInsElt, IE.getType());
1622  }
1623 
1624  // If the inserted element was extracted from some other fixed-length vector
1625  // and both indexes are valid constants, try to turn this into a shuffle.
1626  // Can not handle scalable vector type, the number of elements needed to
1627  // create shuffle mask is not a compile-time constant.
1628  uint64_t InsertedIdx, ExtractedIdx;
1629  Value *ExtVecOp;
1630  if (isa<FixedVectorType>(IE.getType()) &&
1631  match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1632  match(ScalarOp,
1633  m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1634  isa<FixedVectorType>(ExtVecOp->getType()) &&
1635  ExtractedIdx <
1636  cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1637  // TODO: Looking at the user(s) to determine if this insert is a
1638  // fold-to-shuffle opportunity does not match the usual instcombine
1639  // constraints. We should decide if the transform is worthy based only
1640  // on this instruction and its operands, but that may not work currently.
1641  //
1642  // Here, we are trying to avoid creating shuffles before reaching
1643  // the end of a chain of extract-insert pairs. This is complicated because
1644  // we do not generally form arbitrary shuffle masks in instcombine
1645  // (because those may codegen poorly), but collectShuffleElements() does
1646  // exactly that.
1647  //
1648  // The rules for determining what is an acceptable target-independent
1649  // shuffle mask are fuzzy because they evolve based on the backend's
1650  // capabilities and real-world impact.
1651  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1652  if (!Insert.hasOneUse())
1653  return true;
1654  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1655  if (!InsertUser)
1656  return true;
1657  return false;
1658  };
1659 
1660  // Try to form a shuffle from a chain of extract-insert ops.
1661  if (isShuffleRootCandidate(IE)) {
1663  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1664 
1665  // The proposed shuffle may be trivial, in which case we shouldn't
1666  // perform the combine.
1667  if (LR.first != &IE && LR.second != &IE) {
1668  // We now have a shuffle of LHS, RHS, Mask.
1669  if (LR.second == nullptr)
1670  LR.second = UndefValue::get(LR.first->getType());
1671  return new ShuffleVectorInst(LR.first, LR.second, Mask);
1672  }
1673  }
1674  }
1675 
1676  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1677  unsigned VWidth = VecTy->getNumElements();
1678  APInt UndefElts(VWidth, 0);
1679  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1680  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1681  if (V != &IE)
1682  return replaceInstUsesWith(IE, V);
1683  return &IE;
1684  }
1685  }
1686 
1688  return Shuf;
1689 
1690  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1691  return NewInsElt;
1692 
1693  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1694  return Broadcast;
1695 
1696  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1697  return Splat;
1698 
1699  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1700  return IdentityShuf;
1701 
1703  return Ext;
1704 
1705  if (Instruction *Shuf = foldTruncInsElt(IE, DL.isBigEndian(), Builder))
1706  return Shuf;
1707 
1708  return nullptr;
1709 }
1710 
1711 /// Return true if we can evaluate the specified expression tree if the vector
1712 /// elements were shuffled in a different order.
1714  unsigned Depth = 5) {
1715  // We can always reorder the elements of a constant.
1716  if (isa<Constant>(V))
1717  return true;
1718 
1719  // We won't reorder vector arguments. No IPO here.
1720  Instruction *I = dyn_cast<Instruction>(V);
1721  if (!I) return false;
1722 
1723  // Two users may expect different orders of the elements. Don't try it.
1724  if (!I->hasOneUse())
1725  return false;
1726 
1727  if (Depth == 0) return false;
1728 
1729  switch (I->getOpcode()) {
1730  case Instruction::UDiv:
1731  case Instruction::SDiv:
1732  case Instruction::URem:
1733  case Instruction::SRem:
1734  // Propagating an undefined shuffle mask element to integer div/rem is not
1735  // allowed because those opcodes can create immediate undefined behavior
1736  // from an undefined element in an operand.
1737  if (llvm::is_contained(Mask, -1))
1738  return false;
1739  [[fallthrough]];
1740  case Instruction::Add:
1741  case Instruction::FAdd:
1742  case Instruction::Sub:
1743  case Instruction::FSub:
1744  case Instruction::Mul:
1745  case Instruction::FMul:
1746  case Instruction::FDiv:
1747  case Instruction::FRem:
1748  case Instruction::Shl:
1749  case Instruction::LShr:
1750  case Instruction::AShr:
1751  case Instruction::And:
1752  case Instruction::Or:
1753  case Instruction::Xor:
1754  case Instruction::ICmp:
1755  case Instruction::FCmp:
1756  case Instruction::Trunc:
1757  case Instruction::ZExt:
1758  case Instruction::SExt:
1759  case Instruction::FPToUI:
1760  case Instruction::FPToSI:
1761  case Instruction::UIToFP:
1762  case Instruction::SIToFP:
1763  case Instruction::FPTrunc:
1764  case Instruction::FPExt:
1765  case Instruction::GetElementPtr: {
1766  // Bail out if we would create longer vector ops. We could allow creating
1767  // longer vector ops, but that may result in more expensive codegen.
1768  Type *ITy = I->getType();
1769  if (ITy->isVectorTy() &&
1770  Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1771  return false;
1772  for (Value *Operand : I->operands()) {
1773  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1774  return false;
1775  }
1776  return true;
1777  }
1778  case Instruction::InsertElement: {
1779  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1780  if (!CI) return false;
1781  int ElementNumber = CI->getLimitedValue();
1782 
1783  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1784  // can't put an element into multiple indices.
1785  bool SeenOnce = false;
1786  for (int I : Mask) {
1787  if (I == ElementNumber) {
1788  if (SeenOnce)
1789  return false;
1790  SeenOnce = true;
1791  }
1792  }
1793  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1794  }
1795  }
1796  return false;
1797 }
1798 
1799 /// Rebuild a new instruction just like 'I' but with the new operands given.
1800 /// In the event of type mismatch, the type of the operands is correct.
1802  // We don't want to use the IRBuilder here because we want the replacement
1803  // instructions to appear next to 'I', not the builder's insertion point.
1804  switch (I->getOpcode()) {
1805  case Instruction::Add:
1806  case Instruction::FAdd:
1807  case Instruction::Sub:
1808  case Instruction::FSub:
1809  case Instruction::Mul:
1810  case Instruction::FMul:
1811  case Instruction::UDiv:
1812  case Instruction::SDiv:
1813  case Instruction::FDiv:
1814  case Instruction::URem:
1815  case Instruction::SRem:
1816  case Instruction::FRem:
1817  case Instruction::Shl:
1818  case Instruction::LShr:
1819  case Instruction::AShr:
1820  case Instruction::And:
1821  case Instruction::Or:
1822  case Instruction::Xor: {
1823  BinaryOperator *BO = cast<BinaryOperator>(I);
1824  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1825  BinaryOperator *New =
1826  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1827  NewOps[0], NewOps[1], "", BO);
1828  if (isa<OverflowingBinaryOperator>(BO)) {
1829  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1830  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1831  }
1832  if (isa<PossiblyExactOperator>(BO)) {
1833  New->setIsExact(BO->isExact());
1834  }
1835  if (isa<FPMathOperator>(BO))
1836  New->copyFastMathFlags(I);
1837  return New;
1838  }
1839  case Instruction::ICmp:
1840  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1841  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1842  NewOps[0], NewOps[1]);
1843  case Instruction::FCmp:
1844  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1845  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1846  NewOps[0], NewOps[1]);
1847  case Instruction::Trunc:
1848  case Instruction::ZExt:
1849  case Instruction::SExt:
1850  case Instruction::FPToUI:
1851  case Instruction::FPToSI:
1852  case Instruction::UIToFP:
1853  case Instruction::SIToFP:
1854  case Instruction::FPTrunc:
1855  case Instruction::FPExt: {
1856  // It's possible that the mask has a different number of elements from
1857  // the original cast. We recompute the destination type to match the mask.
1858  Type *DestTy = VectorType::get(
1859  I->getType()->getScalarType(),
1860  cast<VectorType>(NewOps[0]->getType())->getElementCount());
1861  assert(NewOps.size() == 1 && "cast with #ops != 1");
1862  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1863  "", I);
1864  }
1865  case Instruction::GetElementPtr: {
1866  Value *Ptr = NewOps[0];
1867  ArrayRef<Value*> Idx = NewOps.slice(1);
1869  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1870  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1871  return GEP;
1872  }
1873  }
1874  llvm_unreachable("failed to rebuild vector instructions");
1875 }
1876 
1878  // Mask.size() does not need to be equal to the number of vector elements.
1879 
1880  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1881  Type *EltTy = V->getType()->getScalarType();
1882  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1883  if (match(V, m_Undef()))
1884  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1885 
1886  if (isa<ConstantAggregateZero>(V))
1887  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1888 
1889  if (Constant *C = dyn_cast<Constant>(V))
1890  return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1891  Mask);
1892 
1893  Instruction *I = cast<Instruction>(V);
1894  switch (I->getOpcode()) {
1895  case Instruction::Add:
1896  case Instruction::FAdd:
1897  case Instruction::Sub:
1898  case Instruction::FSub:
1899  case Instruction::Mul:
1900  case Instruction::FMul:
1901  case Instruction::UDiv:
1902  case Instruction::SDiv:
1903  case Instruction::FDiv:
1904  case Instruction::URem:
1905  case Instruction::SRem:
1906  case Instruction::FRem:
1907  case Instruction::Shl:
1908  case Instruction::LShr:
1909  case Instruction::AShr:
1910  case Instruction::And:
1911  case Instruction::Or:
1912  case Instruction::Xor:
1913  case Instruction::ICmp:
1914  case Instruction::FCmp:
1915  case Instruction::Trunc:
1916  case Instruction::ZExt:
1917  case Instruction::SExt:
1918  case Instruction::FPToUI:
1919  case Instruction::FPToSI:
1920  case Instruction::UIToFP:
1921  case Instruction::SIToFP:
1922  case Instruction::FPTrunc:
1923  case Instruction::FPExt:
1924  case Instruction::Select:
1925  case Instruction::GetElementPtr: {
1926  SmallVector<Value*, 8> NewOps;
1927  bool NeedsRebuild =
1928  (Mask.size() !=
1929  cast<FixedVectorType>(I->getType())->getNumElements());
1930  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1931  Value *V;
1932  // Recursively call evaluateInDifferentElementOrder on vector arguments
1933  // as well. E.g. GetElementPtr may have scalar operands even if the
1934  // return value is a vector, so we need to examine the operand type.
1935  if (I->getOperand(i)->getType()->isVectorTy())
1936  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1937  else
1938  V = I->getOperand(i);
1939  NewOps.push_back(V);
1940  NeedsRebuild |= (V != I->getOperand(i));
1941  }
1942  if (NeedsRebuild) {
1943  return buildNew(I, NewOps);
1944  }
1945  return I;
1946  }
1947  case Instruction::InsertElement: {
1948  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1949 
1950  // The insertelement was inserting at Element. Figure out which element
1951  // that becomes after shuffling. The answer is guaranteed to be unique
1952  // by CanEvaluateShuffled.
1953  bool Found = false;
1954  int Index = 0;
1955  for (int e = Mask.size(); Index != e; ++Index) {
1956  if (Mask[Index] == Element) {
1957  Found = true;
1958  break;
1959  }
1960  }
1961 
1962  // If element is not in Mask, no need to handle the operand 1 (element to
1963  // be inserted). Just evaluate values in operand 0 according to Mask.
1964  if (!Found)
1965  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1966 
1967  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1968  return InsertElementInst::Create(V, I->getOperand(1),
1969  ConstantInt::get(I32Ty, Index), "", I);
1970  }
1971  }
1972  llvm_unreachable("failed to reorder elements of vector instruction!");
1973 }
1974 
1975 // Returns true if the shuffle is extracting a contiguous range of values from
1976 // LHS, for example:
1977 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1978 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1979 // Shuffles to: |EE|FF|GG|HH|
1980 // +--+--+--+--+
1982  ArrayRef<int> Mask) {
1983  unsigned LHSElems =
1984  cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1985  unsigned MaskElems = Mask.size();
1986  unsigned BegIdx = Mask.front();
1987  unsigned EndIdx = Mask.back();
1988  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1989  return false;
1990  for (unsigned I = 0; I != MaskElems; ++I)
1991  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1992  return false;
1993  return true;
1994 }
1995 
1996 /// These are the ingredients in an alternate form binary operator as described
1997 /// below.
1998 struct BinopElts {
2003  Value *V0 = nullptr, Value *V1 = nullptr) :
2004  Opcode(Opc), Op0(V0), Op1(V1) {}
2005  operator bool() const { return Opcode != 0; }
2006 };
2007 
2008 /// Binops may be transformed into binops with different opcodes and operands.
2009 /// Reverse the usual canonicalization to enable folds with the non-canonical
2010 /// form of the binop. If a transform is possible, return the elements of the
2011 /// new binop. If not, return invalid elements.
2013  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2014  Type *Ty = BO->getType();
2015  switch (BO->getOpcode()) {
2016  case Instruction::Shl: {
2017  // shl X, C --> mul X, (1 << C)
2018  Constant *C;
2019  if (match(BO1, m_Constant(C))) {
2020  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
2021  return {Instruction::Mul, BO0, ShlOne};
2022  }
2023  break;
2024  }
2025  case Instruction::Or: {
2026  // or X, C --> add X, C (when X and C have no common bits set)
2027  const APInt *C;
2028  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
2029  return {Instruction::Add, BO0, BO1};
2030  break;
2031  }
2032  case Instruction::Sub:
2033  // sub 0, X --> mul X, -1
2034  if (match(BO0, m_ZeroInt()))
2036  break;
2037  default:
2038  break;
2039  }
2040  return {};
2041 }
2042 
2043 /// A select shuffle of a select shuffle with a shared operand can be reduced
2044 /// to a single select shuffle. This is an obvious improvement in IR, and the
2045 /// backend is expected to lower select shuffles efficiently.
2047  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2048 
2049  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2051  Shuf.getShuffleMask(Mask);
2052  unsigned NumElts = Mask.size();
2053 
2054  // Canonicalize a select shuffle with common operand as Op1.
2055  auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2056  if (ShufOp && ShufOp->isSelect() &&
2057  (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2058  std::swap(Op0, Op1);
2060  }
2061 
2062  ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2063  if (!ShufOp || !ShufOp->isSelect() ||
2064  (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2065  return nullptr;
2066 
2067  Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2068  SmallVector<int, 16> Mask1;
2069  ShufOp->getShuffleMask(Mask1);
2070  assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2071 
2072  // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2073  if (Y == Op0) {
2074  std::swap(X, Y);
2075  ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
2076  }
2077 
2078  // If the mask chooses from X (operand 0), it stays the same.
2079  // If the mask chooses from the earlier shuffle, the other mask value is
2080  // transferred to the combined select shuffle:
2081  // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2082  SmallVector<int, 16> NewMask(NumElts);
2083  for (unsigned i = 0; i != NumElts; ++i)
2084  NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2085 
2086  // A select mask with undef elements might look like an identity mask.
2089  "Unexpected shuffle mask");
2090  return new ShuffleVectorInst(X, Y, NewMask);
2091 }
2092 
2094  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2095 
2096  // Are we shuffling together some value and that same value after it has been
2097  // modified by a binop with a constant?
2098  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2099  Constant *C;
2100  bool Op0IsBinop;
2101  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2102  Op0IsBinop = true;
2103  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2104  Op0IsBinop = false;
2105  else
2106  return nullptr;
2107 
2108  // The identity constant for a binop leaves a variable operand unchanged. For
2109  // a vector, this is a splat of something like 0, -1, or 1.
2110  // If there's no identity constant for this binop, we're done.
2111  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2112  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2113  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2114  if (!IdC)
2115  return nullptr;
2116 
2117  // Shuffle identity constants into the lanes that return the original value.
2118  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2119  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2120  // The existing binop constant vector remains in the same operand position.
2122  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2124 
2125  bool MightCreatePoisonOrUB =
2127  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2128  if (MightCreatePoisonOrUB)
2129  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2130 
2131  // shuf (bop X, C), X, M --> bop X, C'
2132  // shuf X, (bop X, C), M --> bop X, C'
2133  Value *X = Op0IsBinop ? Op1 : Op0;
2134  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2135  NewBO->copyIRFlags(BO);
2136 
2137  // An undef shuffle mask element may propagate as an undef constant element in
2138  // the new binop. That would produce poison where the original code might not.
2139  // If we already made a safe constant, then there's no danger.
2140  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2141  NewBO->dropPoisonGeneratingFlags();
2142  return NewBO;
2143 }
2144 
2145 /// If we have an insert of a scalar to a non-zero element of an undefined
2146 /// vector and then shuffle that value, that's the same as inserting to the zero
2147 /// element and shuffling. Splatting from the zero element is recognized as the
2148 /// canonical form of splat.
2151  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2153  Value *X;
2154  uint64_t IndexC;
2155 
2156  // Match a shuffle that is a splat to a non-zero element.
2157  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2158  m_ConstantInt(IndexC)))) ||
2159  !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2160  return nullptr;
2161 
2162  // Insert into element 0 of an undef vector.
2163  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
2164  Constant *Zero = Builder.getInt32(0);
2165  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
2166 
2167  // Splat from element 0. Any mask element that is undefined remains undefined.
2168  // For example:
2169  // shuf (inselt undef, X, 2), _, <2,2,undef>
2170  // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2171  unsigned NumMaskElts =
2172  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2173  SmallVector<int, 16> NewMask(NumMaskElts, 0);
2174  for (unsigned i = 0; i != NumMaskElts; ++i)
2175  if (Mask[i] == UndefMaskElem)
2176  NewMask[i] = Mask[i];
2177 
2178  return new ShuffleVectorInst(NewIns, NewMask);
2179 }
2180 
2181 /// Try to fold shuffles that are the equivalent of a vector select.
2183  if (!Shuf.isSelect())
2184  return nullptr;
2185 
2186  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2187  // Commuting undef to operand 0 conflicts with another canonicalization.
2188  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2189  if (!match(Shuf.getOperand(1), m_Undef()) &&
2190  Shuf.getMaskValue(0) >= (int)NumElts) {
2191  // TODO: Can we assert that both operands of a shuffle-select are not undef
2192  // (otherwise, it would have been folded by instsimplify?
2193  Shuf.commute();
2194  return &Shuf;
2195  }
2196 
2198  return I;
2199 
2201  return I;
2202 
2203  BinaryOperator *B0, *B1;
2204  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2205  !match(Shuf.getOperand(1), m_BinOp(B1)))
2206  return nullptr;
2207 
2208  // If one operand is "0 - X", allow that to be viewed as "X * -1"
2209  // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2210  // with a multiply, we will exit because C0/C1 will not be set.
2211  Value *X, *Y;
2212  Constant *C0 = nullptr, *C1 = nullptr;
2213  bool ConstantsAreOp1;
2214  if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2215  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2216  ConstantsAreOp1 = false;
2217  else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2218  m_Neg(m_Value(X)))) &&
2220  m_Neg(m_Value(Y)))))
2221  ConstantsAreOp1 = true;
2222  else
2223  return nullptr;
2224 
2225  // We need matching binops to fold the lanes together.
2226  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2227  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2228  bool DropNSW = false;
2229  if (ConstantsAreOp1 && Opc0 != Opc1) {
2230  // TODO: We drop "nsw" if shift is converted into multiply because it may
2231  // not be correct when the shift amount is BitWidth - 1. We could examine
2232  // each vector element to determine if it is safe to keep that flag.
2233  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2234  DropNSW = true;
2235  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2236  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2237  Opc0 = AltB0.Opcode;
2238  C0 = cast<Constant>(AltB0.Op1);
2239  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2240  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2241  Opc1 = AltB1.Opcode;
2242  C1 = cast<Constant>(AltB1.Op1);
2243  }
2244  }
2245 
2246  if (Opc0 != Opc1 || !C0 || !C1)
2247  return nullptr;
2248 
2249  // The opcodes must be the same. Use a new name to make that clear.
2250  BinaryOperator::BinaryOps BOpc = Opc0;
2251 
2252  // Select the constant elements needed for the single binop.
2255 
2256  // We are moving a binop after a shuffle. When a shuffle has an undefined
2257  // mask element, the result is undefined, but it is not poison or undefined
2258  // behavior. That is not necessarily true for div/rem/shift.
2259  bool MightCreatePoisonOrUB =
2262  if (MightCreatePoisonOrUB)
2264  ConstantsAreOp1);
2265 
2266  Value *V;
2267  if (X == Y) {
2268  // Remove a binop and the shuffle by rearranging the constant:
2269  // shuffle (op V, C0), (op V, C1), M --> op V, C'
2270  // shuffle (op C0, V), (op C1, V), M --> op C', V
2271  V = X;
2272  } else {
2273  // If there are 2 different variable operands, we must create a new shuffle
2274  // (select) first, so check uses to ensure that we don't end up with more
2275  // instructions than we started with.
2276  if (!B0->hasOneUse() && !B1->hasOneUse())
2277  return nullptr;
2278 
2279  // If we use the original shuffle mask and op1 is *variable*, we would be
2280  // putting an undef into operand 1 of div/rem/shift. This is either UB or
2281  // poison. We do not have to guard against UB when *constants* are op1
2282  // because safe constants guarantee that we do not overflow sdiv/srem (and
2283  // there's no danger for other opcodes).
2284  // TODO: To allow this case, create a new shuffle mask with no undefs.
2285  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2286  return nullptr;
2287 
2288  // Note: In general, we do not create new shuffles in InstCombine because we
2289  // do not know if a target can lower an arbitrary shuffle optimally. In this
2290  // case, the shuffle uses the existing mask, so there is no additional risk.
2291 
2292  // Select the variable vectors first, then perform the binop:
2293  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2294  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2295  V = Builder.CreateShuffleVector(X, Y, Mask);
2296  }
2297 
2298  Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2299  Builder.CreateBinOp(BOpc, NewC, V);
2300 
2301  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2302  // 1. If we changed an opcode, poison conditions might have changed.
2303  // 2. If the shuffle had undef mask elements, the new binop might have undefs
2304  // where the original code did not. But if we already made a safe constant,
2305  // then there's no danger.
2306  if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2307  NewI->copyIRFlags(B0);
2308  NewI->andIRFlags(B1);
2309  if (DropNSW)
2310  NewI->setHasNoSignedWrap(false);
2311  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2312  NewI->dropPoisonGeneratingFlags();
2313  }
2314  return replaceInstUsesWith(Shuf, NewBO);
2315 }
2316 
2317 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2318 /// Example (little endian):
2319 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2321  bool IsBigEndian) {
2322  // This must be a bitcasted shuffle of 1 vector integer operand.
2323  Type *DestType = Shuf.getType();
2324  Value *X;
2325  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2326  !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2327  return nullptr;
2328 
2329  // The source type must have the same number of elements as the shuffle,
2330  // and the source element type must be larger than the shuffle element type.
2331  Type *SrcType = X->getType();
2332  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2333  cast<FixedVectorType>(SrcType)->getNumElements() !=
2334  cast<FixedVectorType>(DestType)->getNumElements() ||
2335  SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2336  return nullptr;
2337 
2338  assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2339  "Expected a shuffle that decreases length");
2340 
2341  // Last, check that the mask chooses the correct low bits for each narrow
2342  // element in the result.
2343  uint64_t TruncRatio =
2344  SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2346  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2347  if (Mask[i] == UndefMaskElem)
2348  continue;
2349  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2350  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2351  if (Mask[i] != (int)LSBIndex)
2352  return nullptr;
2353  }
2354 
2355  return new TruncInst(X, DestType);
2356 }
2357 
2358 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2359 /// narrowing (concatenating with undef and extracting back to the original
2360 /// length). This allows replacing the wide select with a narrow select.
2363  // This must be a narrowing identity shuffle. It extracts the 1st N elements
2364  // of the 1st vector operand of a shuffle.
2365  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2366  return nullptr;
2367 
2368  // The vector being shuffled must be a vector select that we can eliminate.
2369  // TODO: The one-use requirement could be eased if X and/or Y are constants.
2370  Value *Cond, *X, *Y;
2371  if (!match(Shuf.getOperand(0),
2373  return nullptr;
2374 
2375  // We need a narrow condition value. It must be extended with undef elements
2376  // and have the same number of elements as this shuffle.
2377  unsigned NarrowNumElts =
2378  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2379  Value *NarrowCond;
2380  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2381  cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2382  NarrowNumElts ||
2383  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2384  return nullptr;
2385 
2386  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2387  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2388  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2389  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2390  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2391 }
2392 
2393 /// Canonicalize FP negate after shuffle.
2396  Instruction *FNeg0;
2397  Value *X;
2398  if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
2399  m_FNeg(m_Value(X)))))
2400  return nullptr;
2401 
2402  // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
2403  if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2404  Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2405  return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
2406  }
2407 
2408  Instruction *FNeg1;
2409  Value *Y;
2410  if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
2411  m_FNeg(m_Value(Y)))))
2412  return nullptr;
2413 
2414  // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
2415  if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
2416  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2417  Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
2418  NewFNeg->copyIRFlags(FNeg0);
2419  NewFNeg->andIRFlags(FNeg1);
2420  return NewFNeg;
2421  }
2422 
2423  return nullptr;
2424 }
2425 
2426 /// Canonicalize casts after shuffle.
2429  // Do we have 2 matching cast operands?
2430  auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2431  auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2432  if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2433  Cast0->getSrcTy() != Cast1->getSrcTy())
2434  return nullptr;
2435 
2436  // TODO: Allow other opcodes? That would require easing the type restrictions
2437  // below here.
2438  CastInst::CastOps CastOpcode = Cast0->getOpcode();
2439  switch (CastOpcode) {
2440  case Instruction::FPToSI:
2441  case Instruction::FPToUI:
2442  case Instruction::SIToFP:
2443  case Instruction::UIToFP:
2444  break;
2445  default:
2446  return nullptr;
2447  }
2448 
2449  VectorType *ShufTy = Shuf.getType();
2450  VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2451  VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2452 
2453  // TODO: Allow length-increasing shuffles?
2454  if (ShufTy->getElementCount().getKnownMinValue() >
2455  ShufOpTy->getElementCount().getKnownMinValue())
2456  return nullptr;
2457 
2458  // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2459  assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2460  "Expected fixed vector operands for casts and binary shuffle");
2461  if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2462  return nullptr;
2463 
2464  // At least one of the operands must have only one use (the shuffle).
2465  if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2466  return nullptr;
2467 
2468  // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2469  Value *X = Cast0->getOperand(0);
2470  Value *Y = Cast1->getOperand(0);
2471  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2472  return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2473 }
2474 
2475 /// Try to fold an extract subvector operation.
2477  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2478  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2479  return nullptr;
2480 
2481  // Check if we are extracting all bits of an inserted scalar:
2482  // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2483  Value *X;
2484  if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2485  X->getType()->getPrimitiveSizeInBits() ==
2486  Shuf.getType()->getPrimitiveSizeInBits())
2487  return new BitCastInst(X, Shuf.getType());
2488 
2489  // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2490  Value *Y;
2492  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2493  return nullptr;
2494 
2495  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2496  // then combining may result in worse codegen.
2497  if (!Op0->hasOneUse())
2498  return nullptr;
2499 
2500  // We are extracting a subvector from a shuffle. Remove excess elements from
2501  // the 1st shuffle mask to eliminate the extract.
2502  //
2503  // This transform is conservatively limited to identity extracts because we do
2504  // not allow arbitrary shuffle mask creation as a target-independent transform
2505  // (because we can't guarantee that will lower efficiently).
2506  //
2507  // If the extracting shuffle has an undef mask element, it transfers to the
2508  // new shuffle mask. Otherwise, copy the original mask element. Example:
2509  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2510  // shuf X, Y, <C0, undef, C2, undef>
2511  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2512  SmallVector<int, 16> NewMask(NumElts);
2513  assert(NumElts < Mask.size() &&
2514  "Identity with extract must have less elements than its inputs");
2515 
2516  for (unsigned i = 0; i != NumElts; ++i) {
2517  int ExtractMaskElt = Shuf.getMaskValue(i);
2518  int MaskElt = Mask[i];
2519  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2520  }
2521  return new ShuffleVectorInst(X, Y, NewMask);
2522 }
2523 
2524 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2525 /// operand with the operand of an insertelement.
2527  InstCombinerImpl &IC) {
2528  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2530  Shuf.getShuffleMask(Mask);
2531 
2532  int NumElts = Mask.size();
2533  int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2534 
2535  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2536  // not be able to handle it there if the insertelement has >1 use.
2537  // If the shuffle has an insertelement operand but does not choose the
2538  // inserted scalar element from that value, then we can replace that shuffle
2539  // operand with the source vector of the insertelement.
2540  Value *X;
2541  uint64_t IdxC;
2542  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2543  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2544  if (!is_contained(Mask, (int)IdxC))
2545  return IC.replaceOperand(Shuf, 0, X);
2546  }
2547  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2548  // Offset the index constant by the vector width because we are checking for
2549  // accesses to the 2nd vector input of the shuffle.
2550  IdxC += InpNumElts;
2551  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2552  if (!is_contained(Mask, (int)IdxC))
2553  return IC.replaceOperand(Shuf, 1, X);
2554  }
2555  // For the rest of the transform, the shuffle must not change vector sizes.
2556  // TODO: This restriction could be removed if the insert has only one use
2557  // (because the transform would require a new length-changing shuffle).
2558  if (NumElts != InpNumElts)
2559  return nullptr;
2560 
2561  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2562  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2563  // We need an insertelement with a constant index.
2564  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2565  m_ConstantInt(IndexC))))
2566  return false;
2567 
2568  // Test the shuffle mask to see if it splices the inserted scalar into the
2569  // operand 1 vector of the shuffle.
2570  int NewInsIndex = -1;
2571  for (int i = 0; i != NumElts; ++i) {
2572  // Ignore undef mask elements.
2573  if (Mask[i] == -1)
2574  continue;
2575 
2576  // The shuffle takes elements of operand 1 without lane changes.
2577  if (Mask[i] == NumElts + i)
2578  continue;
2579 
2580  // The shuffle must choose the inserted scalar exactly once.
2581  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2582  return false;
2583 
2584  // The shuffle is placing the inserted scalar into element i.
2585  NewInsIndex = i;
2586  }
2587 
2588  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2589 
2590  // Index is updated to the potentially translated insertion lane.
2591  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2592  return true;
2593  };
2594 
2595  // If the shuffle is unnecessary, insert the scalar operand directly into
2596  // operand 1 of the shuffle. Example:
2597  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2598  Value *Scalar;
2599  ConstantInt *IndexC;
2600  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2601  return InsertElementInst::Create(V1, Scalar, IndexC);
2602 
2603  // Try again after commuting shuffle. Example:
2604  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2605  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2606  std::swap(V0, V1);
2608  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2609  return InsertElementInst::Create(V1, Scalar, IndexC);
2610 
2611  return nullptr;
2612 }
2613 
2615  // Match the operands as identity with padding (also known as concatenation
2616  // with undef) shuffles of the same source type. The backend is expected to
2617  // recreate these concatenations from a shuffle of narrow operands.
2618  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2619  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2620  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2621  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2622  return nullptr;
2623 
2624  // We limit this transform to power-of-2 types because we expect that the
2625  // backend can convert the simplified IR patterns to identical nodes as the
2626  // original IR.
2627  // TODO: If we can verify the same behavior for arbitrary types, the
2628  // power-of-2 checks can be removed.
2629  Value *X = Shuffle0->getOperand(0);
2630  Value *Y = Shuffle1->getOperand(0);
2631  if (X->getType() != Y->getType() ||
2632  !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2633  !isPowerOf2_32(
2634  cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2635  !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2636  match(X, m_Undef()) || match(Y, m_Undef()))
2637  return nullptr;
2638  assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2639  match(Shuffle1->getOperand(1), m_Undef()) &&
2640  "Unexpected operand for identity shuffle");
2641 
2642  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2643  // operands directly by adjusting the shuffle mask to account for the narrower
2644  // types:
2645  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2646  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2647  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2648  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2649 
2651  SmallVector<int, 16> NewMask(Mask.size(), -1);
2652  for (int i = 0, e = Mask.size(); i != e; ++i) {
2653  if (Mask[i] == -1)
2654  continue;
2655 
2656  // If this shuffle is choosing an undef element from 1 of the sources, that
2657  // element is undef.
2658  if (Mask[i] < WideElts) {
2659  if (Shuffle0->getMaskValue(Mask[i]) == -1)
2660  continue;
2661  } else {
2662  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2663  continue;
2664  }
2665 
2666  // If this shuffle is choosing from the 1st narrow op, the mask element is
2667  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2668  // element is offset down to adjust for the narrow vector widths.
2669  if (Mask[i] < WideElts) {
2670  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2671  NewMask[i] = Mask[i];
2672  } else {
2673  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2674  NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2675  }
2676  }
2677  return new ShuffleVectorInst(X, Y, NewMask);
2678 }
2679 
2680 // Splatting the first element of the result of a BinOp, where any of the
2681 // BinOp's operands are the result of a first element splat can be simplified to
2682 // splatting the first element of the result of the BinOp
2684  if (!match(SVI.getOperand(1), m_Undef()) ||
2685  !match(SVI.getShuffleMask(), m_ZeroMask()))
2686  return nullptr;
2687 
2688  Value *Op0 = SVI.getOperand(0);
2689  Value *X, *Y;
2690  if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2691  m_Value(Y))) &&
2692  !match(Op0, m_BinOp(m_Value(X),
2693  m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask()))))
2694  return nullptr;
2695  if (X->getType() != Y->getType())
2696  return nullptr;
2697 
2698  auto *BinOp = cast<BinaryOperator>(Op0);
2699  if (!isSafeToSpeculativelyExecute(BinOp))
2700  return nullptr;
2701 
2702  Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2703  if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2704  NewBOI->copyIRFlags(BinOp);
2705 
2706  return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2707 }
2708 
2710  Value *LHS = SVI.getOperand(0);
2711  Value *RHS = SVI.getOperand(1);
2712  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2713  if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2714  SVI.getType(), ShufQuery))
2715  return replaceInstUsesWith(SVI, V);
2716 
2717  if (Instruction *I = simplifyBinOpSplats(SVI))
2718  return I;
2719 
2720  if (isa<ScalableVectorType>(LHS->getType()))
2721  return nullptr;
2722 
2723  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2724  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2725 
2726  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2727  //
2728  // if X and Y are of the same (vector) type, and the element size is not
2729  // changed by the bitcasts, we can distribute the bitcasts through the
2730  // shuffle, hopefully reducing the number of instructions. We make sure that
2731  // at least one bitcast only has one use, so we don't *increase* the number of
2732  // instructions here.
2733  Value *X, *Y;
2734  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2735  X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2736  X->getType()->getScalarSizeInBits() ==
2737  SVI.getType()->getScalarSizeInBits() &&
2738  (LHS->hasOneUse() || RHS->hasOneUse())) {
2739  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2740  SVI.getName() + ".uncasted");
2741  return new BitCastInst(V, SVI.getType());
2742  }
2743 
2746 
2747  // Peek through a bitcasted shuffle operand by scaling the mask. If the
2748  // simulated shuffle can simplify, then this shuffle is unnecessary:
2749  // shuf (bitcast X), undef, Mask --> bitcast X'
2750  // TODO: This could be extended to allow length-changing shuffles.
2751  // The transform might also be obsoleted if we allowed canonicalization
2752  // of bitcasted shuffles.
2753  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2754  X->getType()->isVectorTy() && VWidth == LHSWidth) {
2755  // Try to create a scaled mask constant.
2756  auto *XType = cast<FixedVectorType>(X->getType());
2757  unsigned XNumElts = XType->getNumElements();
2758  SmallVector<int, 16> ScaledMask;
2759  if (XNumElts >= VWidth) {
2760  assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2761  narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2762  } else {
2763  assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2764  if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2765  ScaledMask.clear();
2766  }
2767  if (!ScaledMask.empty()) {
2768  // If the shuffled source vector simplifies, cast that value to this
2769  // shuffle's type.
2770  if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2771  ScaledMask, XType, ShufQuery))
2772  return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2773  }
2774  }
2775 
2776  // shuffle x, x, mask --> shuffle x, undef, mask'
2777  if (LHS == RHS) {
2778  assert(!match(RHS, m_Undef()) &&
2779  "Shuffle with 2 undef ops not simplified?");
2780  return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2781  }
2782 
2783  // shuffle undef, x, mask --> shuffle x, undef, mask'
2784  if (match(LHS, m_Undef())) {
2785  SVI.commute();
2786  return &SVI;
2787  }
2788 
2790  return I;
2791 
2792  if (Instruction *I = foldSelectShuffle(SVI))
2793  return I;
2794 
2795  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2796  return I;
2797 
2798  if (Instruction *I = narrowVectorSelect(SVI, Builder))
2799  return I;
2800 
2801  if (Instruction *I = foldFNegShuffle(SVI, Builder))
2802  return I;
2803 
2804  if (Instruction *I = foldCastShuffle(SVI, Builder))
2805  return I;
2806 
2807  APInt UndefElts(VWidth, 0);
2808  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2809  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2810  if (V != &SVI)
2811  return replaceInstUsesWith(SVI, V);
2812  return &SVI;
2813  }
2814 
2816  return I;
2817 
2818  // These transforms have the potential to lose undef knowledge, so they are
2819  // intentionally placed after SimplifyDemandedVectorElts().
2820  if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2821  return I;
2823  return I;
2824 
2825  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2827  return replaceInstUsesWith(SVI, V);
2828  }
2829 
2830  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2831  // a non-vector type. We can instead bitcast the original vector followed by
2832  // an extract of the desired element:
2833  //
2834  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2835  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2836  // %1 = bitcast <4 x i8> %sroa to i32
2837  // Becomes:
2838  // %bc = bitcast <16 x i8> %in to <4 x i32>
2839  // %ext = extractelement <4 x i32> %bc, i32 0
2840  //
2841  // If the shuffle is extracting a contiguous range of values from the input
2842  // vector then each use which is a bitcast of the extracted size can be
2843  // replaced. This will work if the vector types are compatible, and the begin
2844  // index is aligned to a value in the casted vector type. If the begin index
2845  // isn't aligned then we can shuffle the original vector (keeping the same
2846  // vector type) before extracting.
2847  //
2848  // This code will bail out if the target type is fundamentally incompatible
2849  // with vectors of the source type.
2850  //
2851  // Example of <16 x i8>, target type i32:
2852  // Index range [4,8): v-----------v Will work.
2853  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2854  // <16 x i8>: | | | | | | | | | | | | | | | | |
2855  // <4 x i32>: | | | | |
2856  // +-----------+-----------+-----------+-----------+
2857  // Index range [6,10): ^-----------^ Needs an extra shuffle.
2858  // Target type i40: ^--------------^ Won't work, bail.
2859  bool MadeChange = false;
2860  if (isShuffleExtractingFromLHS(SVI, Mask)) {
2861  Value *V = LHS;
2862  unsigned MaskElems = Mask.size();
2863  auto *SrcTy = cast<FixedVectorType>(V->getType());
2864  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2865  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2866  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2867  unsigned SrcNumElems = SrcTy->getNumElements();
2870  for (User *U : SVI.users())
2871  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2872  if (!BC->use_empty())
2873  // Only visit bitcasts that weren't previously handled.
2874  BCs.push_back(BC);
2875  for (BitCastInst *BC : BCs) {
2876  unsigned BegIdx = Mask.front();
2877  Type *TgtTy = BC->getDestTy();
2878  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2879  if (!TgtElemBitWidth)
2880  continue;
2881  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2882  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2883  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2884  if (!VecBitWidthsEqual)
2885  continue;
2886  if (!VectorType::isValidElementType(TgtTy))
2887  continue;
2888  auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2889  if (!BegIsAligned) {
2890  // Shuffle the input so [0,NumElements) contains the output, and
2891  // [NumElems,SrcNumElems) is undef.
2892  SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2893  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2894  ShuffleMask[I] = Idx;
2895  V = Builder.CreateShuffleVector(V, ShuffleMask,
2896  SVI.getName() + ".extract");
2897  BegIdx = 0;
2898  }
2899  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2900  assert(SrcElemsPerTgtElem);
2901  BegIdx /= SrcElemsPerTgtElem;
2902  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2903  auto *NewBC =
2904  BCAlreadyExists
2905  ? NewBCs[CastSrcTy]
2906  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2907  if (!BCAlreadyExists)
2908  NewBCs[CastSrcTy] = NewBC;
2909  auto *Ext = Builder.CreateExtractElement(
2910  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2911  // The shufflevector isn't being replaced: the bitcast that used it
2912  // is. InstCombine will visit the newly-created instructions.
2913  replaceInstUsesWith(*BC, Ext);
2914  MadeChange = true;
2915  }
2916  }
2917 
2918  // If the LHS is a shufflevector itself, see if we can combine it with this
2919  // one without producing an unusual shuffle.
2920  // Cases that might be simplified:
2921  // 1.
2922  // x1=shuffle(v1,v2,mask1)
2923  // x=shuffle(x1,undef,mask)
2924  // ==>
2925  // x=shuffle(v1,undef,newMask)
2926  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2927  // 2.
2928  // x1=shuffle(v1,undef,mask1)
2929  // x=shuffle(x1,x2,mask)
2930  // where v1.size() == mask1.size()
2931  // ==>
2932  // x=shuffle(v1,x2,newMask)
2933  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2934  // 3.
2935  // x2=shuffle(v2,undef,mask2)
2936  // x=shuffle(x1,x2,mask)
2937  // where v2.size() == mask2.size()
2938  // ==>
2939  // x=shuffle(x1,v2,newMask)
2940  // newMask[i] = (mask[i] < x1.size())
2941  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2942  // 4.
2943  // x1=shuffle(v1,undef,mask1)
2944  // x2=shuffle(v2,undef,mask2)
2945  // x=shuffle(x1,x2,mask)
2946  // where v1.size() == v2.size()
2947  // ==>
2948  // x=shuffle(v1,v2,newMask)
2949  // newMask[i] = (mask[i] < x1.size())
2950  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2951  //
2952  // Here we are really conservative:
2953  // we are absolutely afraid of producing a shuffle mask not in the input
2954  // program, because the code gen may not be smart enough to turn a merged
2955  // shuffle into two specific shuffles: it may produce worse code. As such,
2956  // we only merge two shuffles if the result is either a splat or one of the
2957  // input shuffle masks. In this case, merging the shuffles just removes
2958  // one instruction, which we know is safe. This is good for things like
2959  // turning: (splat(splat)) -> splat, or
2960  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2961  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2962  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2963  if (LHSShuffle)
2964  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2965  LHSShuffle = nullptr;
2966  if (RHSShuffle)
2967  if (!match(RHSShuffle->getOperand(1), m_Undef()))
2968  RHSShuffle = nullptr;
2969  if (!LHSShuffle && !RHSShuffle)
2970  return MadeChange ? &SVI : nullptr;
2971 
2972  Value* LHSOp0 = nullptr;
2973  Value* LHSOp1 = nullptr;
2974  Value* RHSOp0 = nullptr;
2975  unsigned LHSOp0Width = 0;
2976  unsigned RHSOp0Width = 0;
2977  if (LHSShuffle) {
2978  LHSOp0 = LHSShuffle->getOperand(0);
2979  LHSOp1 = LHSShuffle->getOperand(1);
2980  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2981  }
2982  if (RHSShuffle) {
2983  RHSOp0 = RHSShuffle->getOperand(0);
2984  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2985  }
2986  Value* newLHS = LHS;
2987  Value* newRHS = RHS;
2988  if (LHSShuffle) {
2989  // case 1
2990  if (match(RHS, m_Undef())) {
2991  newLHS = LHSOp0;
2992  newRHS = LHSOp1;
2993  }
2994  // case 2 or 4
2995  else if (LHSOp0Width == LHSWidth) {
2996  newLHS = LHSOp0;
2997  }
2998  }
2999  // case 3 or 4
3000  if (RHSShuffle && RHSOp0Width == LHSWidth) {
3001  newRHS = RHSOp0;
3002  }
3003  // case 4
3004  if (LHSOp0 == RHSOp0) {
3005  newLHS = LHSOp0;
3006  newRHS = nullptr;
3007  }
3008 
3009  if (newLHS == LHS && newRHS == RHS)
3010  return MadeChange ? &SVI : nullptr;
3011 
3012  ArrayRef<int> LHSMask;
3013  ArrayRef<int> RHSMask;
3014  if (newLHS != LHS)
3015  LHSMask = LHSShuffle->getShuffleMask();
3016  if (RHSShuffle && newRHS != RHS)
3017  RHSMask = RHSShuffle->getShuffleMask();
3018 
3019  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3020  SmallVector<int, 16> newMask;
3021  bool isSplat = true;
3022  int SplatElt = -1;
3023  // Create a new mask for the new ShuffleVectorInst so that the new
3024  // ShuffleVectorInst is equivalent to the original one.
3025  for (unsigned i = 0; i < VWidth; ++i) {
3026  int eltMask;
3027  if (Mask[i] < 0) {
3028  // This element is an undef value.
3029  eltMask = -1;
3030  } else if (Mask[i] < (int)LHSWidth) {
3031  // This element is from left hand side vector operand.
3032  //
3033  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3034  // new mask value for the element.
3035  if (newLHS != LHS) {
3036  eltMask = LHSMask[Mask[i]];
3037  // If the value selected is an undef value, explicitly specify it
3038  // with a -1 mask value.
3039  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
3040  eltMask = -1;
3041  } else
3042  eltMask = Mask[i];
3043  } else {
3044  // This element is from right hand side vector operand
3045  //
3046  // If the value selected is an undef value, explicitly specify it
3047  // with a -1 mask value. (case 1)
3048  if (match(RHS, m_Undef()))
3049  eltMask = -1;
3050  // If RHS is going to be replaced (case 3 or 4), calculate the
3051  // new mask value for the element.
3052  else if (newRHS != RHS) {
3053  eltMask = RHSMask[Mask[i]-LHSWidth];
3054  // If the value selected is an undef value, explicitly specify it
3055  // with a -1 mask value.
3056  if (eltMask >= (int)RHSOp0Width) {
3057  assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
3058  "should have been check above");
3059  eltMask = -1;
3060  }
3061  } else
3062  eltMask = Mask[i]-LHSWidth;
3063 
3064  // If LHS's width is changed, shift the mask value accordingly.
3065  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3066  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3067  // If newRHS == newLHS, we want to remap any references from newRHS to
3068  // newLHS so that we can properly identify splats that may occur due to
3069  // obfuscation across the two vectors.
3070  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3071  eltMask += newLHSWidth;
3072  }
3073 
3074  // Check if this could still be a splat.
3075  if (eltMask >= 0) {
3076  if (SplatElt >= 0 && SplatElt != eltMask)
3077  isSplat = false;
3078  SplatElt = eltMask;
3079  }
3080 
3081  newMask.push_back(eltMask);
3082  }
3083 
3084  // If the result mask is equal to one of the original shuffle masks,
3085  // or is a splat, do the replacement.
3086  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3087  if (!newRHS)
3088  newRHS = UndefValue::get(newLHS->getType());
3089  return new ShuffleVectorInst(newLHS, newRHS, newMask);
3090  }
3091 
3092  return MadeChange ? &SVI : nullptr;
3093 }
i
i
Definition: README.txt:29
llvm::simplifyInsertElementInst
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:4841
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:1999
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1924
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2549
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::ShuffleVectorInst::isIdentityMask
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition: Instructions.cpp:2306
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:366
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2211
llvm::ElementCount
Definition: TypeSize.h:404
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1191
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1872
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:250
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5256
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2263
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1563
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3340
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
llvm::SmallDenseMap
Definition: DenseMap.h:880
narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition: InstCombineVectorOps.cpp:1487
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
foldSelectShuffleOfSelectShuffle
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
Definition: InstCombineVectorOps.cpp:2046
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
APInt.h
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1950
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1885
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1801
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:469
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:176
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:396
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:984
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:408
llvm::Optional
Definition: APInt.h:33
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1163
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2709
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::UnaryOperator
Definition: InstrTypes.h:102
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1593
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1902
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1383
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1996
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::simplifyExtractElementInst
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4953
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2714
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:172
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:169
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1768
isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition: LowerMatrixIntrinsics.cpp:98
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1300
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2201
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2795
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1360
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7966
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1301
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1936
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:216
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:807
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2063
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:764
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:157
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
Definition: InstCombineVectorOps.cpp:766
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:852
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:175
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2791
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2054
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1492
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
Definition: DerivedTypes.h:348
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
llvm::Value::user_back
User * user_back()
Definition: Value.h:407
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2591
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2526
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:359
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:314
VectorUtils.h
BasicBlock.h
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2104
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2413
llvm::InstCombinerImpl::foldSelectShuffle
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:2182
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:176
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:165
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
uint64_t
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:172
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4811
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2849
replaceExtractElements
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:684
getPreferredVectorIndex
ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
Definition: InstCombineVectorOps.cpp:388
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1356
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2668
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
ArrayRef.h
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:2149
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:1998
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1906
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1737
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1523
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(Instruction *I) const
Definition: InstructionSimplify.h:120
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:611
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition: InstCombineVectorOps.cpp:2476
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:955
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:224
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:189
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1877
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1901
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:410
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:358
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:1131
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
foldCastShuffle
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
Definition: InstCombineVectorOps.cpp:2427
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1081
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:167
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:321
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1578
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1687
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:490
Constant.h
llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2320
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::InstCombinerImpl::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombineInternal.h:397
narrowVectorSelect
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
Definition: InstCombineVectorOps.cpp:2361
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1981
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2741
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2342
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1360
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:216
Casting.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1264
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition: Instructions.cpp:4011
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:429
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:793
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1969
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1713
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2008
Instructions.h
cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:59
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
User.h
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:365
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:986
SmallBitVector.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2815
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1455
foldFNegShuffle
static Instruction * foldFNegShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate after shuffle.
Definition: InstCombineVectorOps.cpp:2394
llvm::PHINode
Definition: Instructions.h:2699
llvm::InstCombinerImpl::simplifyBinOpSplats
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2683
llvm::Constant::getSplatValue
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1623
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1902
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:335
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< int >
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2614
DerivedTypes.h
BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:2000
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2273
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:2012
foldTruncInsElt
static Instruction * foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
Try to convert scalar extraction ops (shift+trunc) with insertelt to bitcast and shuffle: inselt V,...
Definition: InstCombineVectorOps.cpp:1521
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2093
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1611
llvm::simplifyShuffleVectorInst
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5204
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1587
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2557
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2936
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Instruction::isExact
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
Definition: Instruction.cpp:232
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:291
llvm::LegacyLegalizeActions::NotFound
@ NotFound
Sentinel value for when no action was found in the specified table.
Definition: LegacyLegalizerInfo.h:74
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4731
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:2002
BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:2001
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:184
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2093
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1404
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:2131