LLVM  10.0.0svn
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/PatternMatch.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/User.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
37 #include <cassert>
38 #include <cstdint>
39 #include <iterator>
40 #include <utility>
41 
42 using namespace llvm;
43 using namespace PatternMatch;
44 
45 #define DEBUG_TYPE "instcombine"
46 
47 /// Return true if the value is cheaper to scalarize than it is to leave as a
48 /// vector operation. IsConstantExtractIndex indicates whether we are extracting
49 /// one known element from a vector constant.
50 ///
51 /// FIXME: It's possible to create more instructions than previously existed.
52 static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53  // If we can pick a scalar constant value out of a vector, that is free.
54  if (auto *C = dyn_cast<Constant>(V))
55  return IsConstantExtractIndex || C->getSplatValue();
56 
57  // An insertelement to the same constant index as our extract will simplify
58  // to the scalar inserted element. An insertelement to a different constant
59  // index is irrelevant to our extract.
61  return IsConstantExtractIndex;
62 
63  if (match(V, m_OneUse(m_Load(m_Value()))))
64  return true;
65 
66  Value *V0, *V1;
67  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69  cheapToScalarize(V1, IsConstantExtractIndex))
70  return true;
71 
72  CmpInst::Predicate UnusedPred;
73  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75  cheapToScalarize(V1, IsConstantExtractIndex))
76  return true;
77 
78  return false;
79 }
80 
81 // If we have a PHI node with a vector type that is only used to feed
82 // itself and be an operand of extractelement at a constant location,
83 // try to replace the PHI of the vector type with a PHI of a scalar type.
84 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
86  // The users we want the PHI to have are:
87  // 1) The EI ExtractElement (we already know this)
88  // 2) Possibly more ExtractElements with the same index.
89  // 3) Another operand, which will feed back into the PHI.
90  Instruction *PHIUser = nullptr;
91  for (auto U : PN->users()) {
92  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93  if (EI.getIndexOperand() == EU->getIndexOperand())
94  Extracts.push_back(EU);
95  else
96  return nullptr;
97  } else if (!PHIUser) {
98  PHIUser = cast<Instruction>(U);
99  } else {
100  return nullptr;
101  }
102  }
103 
104  if (!PHIUser)
105  return nullptr;
106 
107  // Verify that this PHI user has one use, which is the PHI itself,
108  // and that it is a binary operation which is cheap to scalarize.
109  // otherwise return nullptr.
110  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
111  !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
112  return nullptr;
113 
114  // Create a scalar PHI node that will replace the vector PHI node
115  // just before the current PHI node.
116  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118  // Scalarize each PHI operand.
119  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
120  Value *PHIInVal = PN->getIncomingValue(i);
121  BasicBlock *inBB = PN->getIncomingBlock(i);
122  Value *Elt = EI.getIndexOperand();
123  // If the operand is the PHI induction variable:
124  if (PHIInVal == PHIUser) {
125  // Scalarize the binary operation. Its first operand is the
126  // scalar PHI, and the second operand is extracted from the other
127  // vector operand.
128  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
130  Value *Op = InsertNewInstWith(
131  ExtractElementInst::Create(B0->getOperand(opId), Elt,
132  B0->getOperand(opId)->getName() + ".Elt"),
133  *B0);
134  Value *newPHIUser = InsertNewInstWith(
136  scalarPHI, Op, B0), *B0);
137  scalarPHI->addIncoming(newPHIUser, inBB);
138  } else {
139  // Scalarize PHI input:
140  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141  // Insert the new instruction into the predecessor basic block.
142  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143  BasicBlock::iterator InsertPos;
144  if (pos && !isa<PHINode>(pos)) {
145  InsertPos = ++pos->getIterator();
146  } else {
147  InsertPos = inBB->getFirstInsertionPt();
148  }
149 
150  InsertNewInstWith(newEI, *InsertPos);
151 
152  scalarPHI->addIncoming(newEI, inBB);
153  }
154  }
155 
156  for (auto E : Extracts)
157  replaceInstUsesWith(*E, scalarPHI);
158 
159  return &EI;
160 }
161 
163  InstCombiner::BuilderTy &Builder,
164  bool IsBigEndian) {
165  Value *X;
166  uint64_t ExtIndexC;
167  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
168  !X->getType()->isVectorTy() ||
169  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
170  return nullptr;
171 
172  // If this extractelement is using a bitcast from a vector of the same number
173  // of elements, see if we can find the source element from the source vector:
174  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175  Type *SrcTy = X->getType();
176  Type *DestTy = Ext.getType();
177  unsigned NumSrcElts = SrcTy->getVectorNumElements();
178  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179  if (NumSrcElts == NumElts)
180  if (Value *Elt = findScalarElement(X, ExtIndexC))
181  return new BitCastInst(Elt, DestTy);
182 
183  // If the source elements are wider than the destination, try to shift and
184  // truncate a subset of scalar bits of an insert op.
185  if (NumSrcElts < NumElts) {
186  Value *Scalar;
187  uint64_t InsIndexC;
188  if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
189  m_ConstantInt(InsIndexC))))
190  return nullptr;
191 
192  // The extract must be from the subset of vector elements that we inserted
193  // into. Example: if we inserted element 1 of a <2 x i64> and we are
194  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195  // of elements 4-7 of the bitcasted vector.
196  unsigned NarrowingRatio = NumElts / NumSrcElts;
197  if (ExtIndexC / NarrowingRatio != InsIndexC)
198  return nullptr;
199 
200  // We are extracting part of the original scalar. How that scalar is
201  // inserted into the vector depends on the endian-ness. Example:
202  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
203  // +--+--+--+--+--+--+--+--+
204  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
205  // extelt <4 x i16> V', 3: | |S2|S3|
206  // +--+--+--+--+--+--+--+--+
207  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209  // In this example, we must right-shift little-endian. Big-endian is just a
210  // truncate.
211  unsigned Chunk = ExtIndexC % NarrowingRatio;
212  if (IsBigEndian)
213  Chunk = NarrowingRatio - 1 - Chunk;
214 
215  // Bail out if this is an FP vector to FP vector sequence. That would take
216  // more instructions than we started with unless there is no shift, and it
217  // may not be handled as well in the backend.
218  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219  bool NeedDestBitcast = DestTy->isFloatingPointTy();
220  if (NeedSrcBitcast && NeedDestBitcast)
221  return nullptr;
222 
223  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225  unsigned ShAmt = Chunk * DestWidth;
226 
227  // TODO: This limitation is more strict than necessary. We could sum the
228  // number of new instructions and subtract the number eliminated to know if
229  // we can proceed.
230  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
231  if (NeedSrcBitcast || NeedDestBitcast)
232  return nullptr;
233 
234  if (NeedSrcBitcast) {
235  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237  }
238 
239  if (ShAmt) {
240  // Bail out if we could end with more instructions than we started with.
241  if (!Ext.getVectorOperand()->hasOneUse())
242  return nullptr;
243  Scalar = Builder.CreateLShr(Scalar, ShAmt);
244  }
245 
246  if (NeedDestBitcast) {
247  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249  }
250  return new TruncInst(Scalar, DestTy);
251  }
252 
253  return nullptr;
254 }
255 
257  Value *SrcVec = EI.getVectorOperand();
258  Value *Index = EI.getIndexOperand();
259  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
260  SQ.getWithInstruction(&EI)))
261  return replaceInstUsesWith(EI, V);
262 
263  // If extracting a specified index from the vector, see if we can recursively
264  // find a previously computed scalar that was inserted into the vector.
265  auto *IndexC = dyn_cast<ConstantInt>(Index);
266  if (IndexC) {
267  unsigned NumElts = EI.getVectorOperandType()->getNumElements();
268 
269  // InstSimplify should handle cases where the index is invalid.
270  if (!IndexC->getValue().ule(NumElts))
271  return nullptr;
272 
273  // This instruction only demands the single element from the input vector.
274  // If the input vector has a single use, simplify it based on this use
275  // property.
276  if (SrcVec->hasOneUse() && NumElts != 1) {
277  APInt UndefElts(NumElts, 0);
278  APInt DemandedElts(NumElts, 0);
279  DemandedElts.setBit(IndexC->getZExtValue());
280  if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
281  UndefElts)) {
282  EI.setOperand(0, V);
283  return &EI;
284  }
285  }
286 
287  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
288  return I;
289 
290  // If there's a vector PHI feeding a scalar use through this extractelement
291  // instruction, try to scalarize the PHI.
292  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
293  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
294  return ScalarPHI;
295  }
296 
297  BinaryOperator *BO;
298  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
299  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
300  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
301  Value *E0 = Builder.CreateExtractElement(X, Index);
302  Value *E1 = Builder.CreateExtractElement(Y, Index);
303  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
304  }
305 
306  Value *X, *Y;
307  CmpInst::Predicate Pred;
308  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
309  cheapToScalarize(SrcVec, IndexC)) {
310  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
311  Value *E0 = Builder.CreateExtractElement(X, Index);
312  Value *E1 = Builder.CreateExtractElement(Y, Index);
313  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
314  }
315 
316  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
317  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
318  // Extracting the inserted element?
319  if (IE->getOperand(2) == Index)
320  return replaceInstUsesWith(EI, IE->getOperand(1));
321  // If the inserted and extracted elements are constants, they must not
322  // be the same value, extract from the pre-inserted value instead.
323  if (isa<Constant>(IE->getOperand(2)) && IndexC) {
324  Worklist.AddValue(SrcVec);
325  EI.setOperand(0, IE->getOperand(0));
326  return &EI;
327  }
328  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
329  // If this is extracting an element from a shufflevector, figure out where
330  // it came from and extract from the appropriate input element instead.
331  if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
332  int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
333  Value *Src;
334  unsigned LHSWidth =
335  SVI->getOperand(0)->getType()->getVectorNumElements();
336 
337  if (SrcIdx < 0)
338  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
339  if (SrcIdx < (int)LHSWidth)
340  Src = SVI->getOperand(0);
341  else {
342  SrcIdx -= LHSWidth;
343  Src = SVI->getOperand(1);
344  }
346  return ExtractElementInst::Create(Src,
347  ConstantInt::get(Int32Ty,
348  SrcIdx, false));
349  }
350  } else if (auto *CI = dyn_cast<CastInst>(I)) {
351  // Canonicalize extractelement(cast) -> cast(extractelement).
352  // Bitcasts can change the number of vector elements, and they cost
353  // nothing.
354  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
355  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
356  Worklist.AddValue(EE);
357  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
358  }
359  }
360  }
361  return nullptr;
362 }
363 
364 /// If V is a shuffle of values that ONLY returns elements from either LHS or
365 /// RHS, return the shuffle mask and true. Otherwise, return false.
366 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
368  assert(LHS->getType() == RHS->getType() &&
369  "Invalid CollectSingleShuffleElements");
370  unsigned NumElts = V->getType()->getVectorNumElements();
371 
372  if (isa<UndefValue>(V)) {
373  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
374  return true;
375  }
376 
377  if (V == LHS) {
378  for (unsigned i = 0; i != NumElts; ++i)
380  return true;
381  }
382 
383  if (V == RHS) {
384  for (unsigned i = 0; i != NumElts; ++i)
386  i+NumElts));
387  return true;
388  }
389 
390  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
391  // If this is an insert of an extract from some other vector, include it.
392  Value *VecOp = IEI->getOperand(0);
393  Value *ScalarOp = IEI->getOperand(1);
394  Value *IdxOp = IEI->getOperand(2);
395 
396  if (!isa<ConstantInt>(IdxOp))
397  return false;
398  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
399 
400  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
401  // We can handle this if the vector we are inserting into is
402  // transitively ok.
403  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
404  // If so, update the mask to reflect the inserted undef.
405  Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
406  return true;
407  }
408  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
409  if (isa<ConstantInt>(EI->getOperand(1))) {
410  unsigned ExtractedIdx =
411  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
412  unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
413 
414  // This must be extracting from either LHS or RHS.
415  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
416  // We can handle this if the vector we are inserting into is
417  // transitively ok.
418  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
419  // If so, update the mask to reflect the inserted value.
420  if (EI->getOperand(0) == LHS) {
421  Mask[InsertedIdx % NumElts] =
423  ExtractedIdx);
424  } else {
425  assert(EI->getOperand(0) == RHS);
426  Mask[InsertedIdx % NumElts] =
428  ExtractedIdx + NumLHSElts);
429  }
430  return true;
431  }
432  }
433  }
434  }
435  }
436 
437  return false;
438 }
439 
440 /// If we have insertion into a vector that is wider than the vector that we
441 /// are extracting from, try to widen the source vector to allow a single
442 /// shufflevector to replace one or more insert/extract pairs.
444  ExtractElementInst *ExtElt,
445  InstCombiner &IC) {
446  VectorType *InsVecType = InsElt->getType();
447  VectorType *ExtVecType = ExtElt->getVectorOperandType();
448  unsigned NumInsElts = InsVecType->getVectorNumElements();
449  unsigned NumExtElts = ExtVecType->getVectorNumElements();
450 
451  // The inserted-to vector must be wider than the extracted-from vector.
452  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
453  NumExtElts >= NumInsElts)
454  return;
455 
456  // Create a shuffle mask to widen the extended-from vector using undefined
457  // values. The mask selects all of the values of the original vector followed
458  // by as many undefined values as needed to create a vector of the same length
459  // as the inserted-to vector.
460  SmallVector<Constant *, 16> ExtendMask;
461  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
462  for (unsigned i = 0; i < NumExtElts; ++i)
463  ExtendMask.push_back(ConstantInt::get(IntType, i));
464  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
465  ExtendMask.push_back(UndefValue::get(IntType));
466 
467  Value *ExtVecOp = ExtElt->getVectorOperand();
468  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
469  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
470  ? ExtVecOpInst->getParent()
471  : ExtElt->getParent();
472 
473  // TODO: This restriction matches the basic block check below when creating
474  // new extractelement instructions. If that limitation is removed, this one
475  // could also be removed. But for now, we just bail out to ensure that we
476  // will replace the extractelement instruction that is feeding our
477  // insertelement instruction. This allows the insertelement to then be
478  // replaced by a shufflevector. If the insertelement is not replaced, we can
479  // induce infinite looping because there's an optimization for extractelement
480  // that will delete our widening shuffle. This would trigger another attempt
481  // here to create that shuffle, and we spin forever.
482  if (InsertionBlock != InsElt->getParent())
483  return;
484 
485  // TODO: This restriction matches the check in visitInsertElementInst() and
486  // prevents an infinite loop caused by not turning the extract/insert pair
487  // into a shuffle. We really should not need either check, but we're lacking
488  // folds for shufflevectors because we're afraid to generate shuffle masks
489  // that the backend can't handle.
490  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
491  return;
492 
493  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
494  ConstantVector::get(ExtendMask));
495 
496  // Insert the new shuffle after the vector operand of the extract is defined
497  // (as long as it's not a PHI) or at the start of the basic block of the
498  // extract, so any subsequent extracts in the same basic block can use it.
499  // TODO: Insert before the earliest ExtractElementInst that is replaced.
500  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
501  WideVec->insertAfter(ExtVecOpInst);
502  else
503  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
504 
505  // Replace extracts from the original narrow vector with extracts from the new
506  // wide vector.
507  for (User *U : ExtVecOp->users()) {
509  if (!OldExt || OldExt->getParent() != WideVec->getParent())
510  continue;
511  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
512  NewExt->insertAfter(OldExt);
513  IC.replaceInstUsesWith(*OldExt, NewExt);
514  }
515 }
516 
517 /// We are building a shuffle to create V, which is a sequence of insertelement,
518 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
519 /// not rely on the second vector source. Return a std::pair containing the
520 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
521 /// parameter as required.
522 ///
523 /// Note: we intentionally don't try to fold earlier shuffles since they have
524 /// often been chosen carefully to be efficiently implementable on the target.
525 using ShuffleOps = std::pair<Value *, Value *>;
526 
529  Value *PermittedRHS,
530  InstCombiner &IC) {
531  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
532  unsigned NumElts = V->getType()->getVectorNumElements();
533 
534  if (isa<UndefValue>(V)) {
535  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
536  return std::make_pair(
537  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
538  }
539 
540  if (isa<ConstantAggregateZero>(V)) {
541  Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
542  return std::make_pair(V, nullptr);
543  }
544 
545  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
546  // If this is an insert of an extract from some other vector, include it.
547  Value *VecOp = IEI->getOperand(0);
548  Value *ScalarOp = IEI->getOperand(1);
549  Value *IdxOp = IEI->getOperand(2);
550 
551  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
552  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
553  unsigned ExtractedIdx =
554  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
555  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
556 
557  // Either the extracted from or inserted into vector must be RHSVec,
558  // otherwise we'd end up with a shuffle of three inputs.
559  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
560  Value *RHS = EI->getOperand(0);
561  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
562  assert(LR.second == nullptr || LR.second == RHS);
563 
564  if (LR.first->getType() != RHS->getType()) {
565  // Although we are giving up for now, see if we can create extracts
566  // that match the inserts for another round of combining.
567  replaceExtractElements(IEI, EI, IC);
568 
569  // We tried our best, but we can't find anything compatible with RHS
570  // further up the chain. Return a trivial shuffle.
571  for (unsigned i = 0; i < NumElts; ++i)
572  Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
573  return std::make_pair(V, nullptr);
574  }
575 
576  unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
577  Mask[InsertedIdx % NumElts] =
579  NumLHSElts+ExtractedIdx);
580  return std::make_pair(LR.first, RHS);
581  }
582 
583  if (VecOp == PermittedRHS) {
584  // We've gone as far as we can: anything on the other side of the
585  // extractelement will already have been converted into a shuffle.
586  unsigned NumLHSElts =
588  for (unsigned i = 0; i != NumElts; ++i)
591  i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
592  return std::make_pair(EI->getOperand(0), PermittedRHS);
593  }
594 
595  // If this insertelement is a chain that comes from exactly these two
596  // vectors, return the vector and the effective shuffle.
597  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
598  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
599  Mask))
600  return std::make_pair(EI->getOperand(0), PermittedRHS);
601  }
602  }
603  }
604 
605  // Otherwise, we can't do anything fancy. Return an identity vector.
606  for (unsigned i = 0; i != NumElts; ++i)
608  return std::make_pair(V, nullptr);
609 }
610 
611 /// Try to find redundant insertvalue instructions, like the following ones:
612 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
613 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
614 /// Here the second instruction inserts values at the same indices, as the
615 /// first one, making the first one redundant.
616 /// It should be transformed to:
617 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
619  bool IsRedundant = false;
620  ArrayRef<unsigned int> FirstIndices = I.getIndices();
621 
622  // If there is a chain of insertvalue instructions (each of them except the
623  // last one has only one use and it's another insertvalue insn from this
624  // chain), check if any of the 'children' uses the same indices as the first
625  // instruction. In this case, the first one is redundant.
626  Value *V = &I;
627  unsigned Depth = 0;
628  while (V->hasOneUse() && Depth < 10) {
629  User *U = V->user_back();
630  auto UserInsInst = dyn_cast<InsertValueInst>(U);
631  if (!UserInsInst || U->getOperand(0) != V)
632  break;
633  if (UserInsInst->getIndices() == FirstIndices) {
634  IsRedundant = true;
635  break;
636  }
637  V = UserInsInst;
638  Depth++;
639  }
640 
641  if (IsRedundant)
642  return replaceInstUsesWith(I, I.getOperand(0));
643  return nullptr;
644 }
645 
647  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
648  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
649 
650  // A vector select does not change the size of the operands.
651  if (MaskSize != VecSize)
652  return false;
653 
654  // Each mask element must be undefined or choose a vector element from one of
655  // the source operands without crossing vector lanes.
656  for (int i = 0; i != MaskSize; ++i) {
657  int Elt = Shuf.getMaskValue(i);
658  if (Elt != -1 && Elt != i && Elt != i + VecSize)
659  return false;
660  }
661 
662  return true;
663 }
664 
665 /// Turn a chain of inserts that splats a value into an insert + shuffle:
666 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
667 /// shufflevector(insertelt(X, %k, 0), undef, zero)
669  // We are interested in the last insert in a chain. So if this insert has a
670  // single user and that user is an insert, bail.
671  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
672  return nullptr;
673 
674  auto *VecTy = cast<VectorType>(InsElt.getType());
675  unsigned NumElements = VecTy->getNumElements();
676 
677  // Do not try to do this for a one-element vector, since that's a nop,
678  // and will cause an inf-loop.
679  if (NumElements == 1)
680  return nullptr;
681 
682  Value *SplatVal = InsElt.getOperand(1);
683  InsertElementInst *CurrIE = &InsElt;
684  SmallVector<bool, 16> ElementPresent(NumElements, false);
685  InsertElementInst *FirstIE = nullptr;
686 
687  // Walk the chain backwards, keeping track of which indices we inserted into,
688  // until we hit something that isn't an insert of the splatted value.
689  while (CurrIE) {
690  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
691  if (!Idx || CurrIE->getOperand(1) != SplatVal)
692  return nullptr;
693 
694  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
695  // Check none of the intermediate steps have any additional uses, except
696  // for the root insertelement instruction, which can be re-used, if it
697  // inserts at position 0.
698  if (CurrIE != &InsElt &&
699  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
700  return nullptr;
701 
702  ElementPresent[Idx->getZExtValue()] = true;
703  FirstIE = CurrIE;
704  CurrIE = NextIE;
705  }
706 
707  // If this is just a single insertelement (not a sequence), we are done.
708  if (FirstIE == &InsElt)
709  return nullptr;
710 
711  // If we are not inserting into an undef vector, make sure we've seen an
712  // insert into every element.
713  // TODO: If the base vector is not undef, it might be better to create a splat
714  // and then a select-shuffle (blend) with the base vector.
715  if (!isa<UndefValue>(FirstIE->getOperand(0)))
716  if (any_of(ElementPresent, [](bool Present) { return !Present; }))
717  return nullptr;
718 
719  // Create the insert + shuffle.
721  UndefValue *UndefVec = UndefValue::get(VecTy);
722  Constant *Zero = ConstantInt::get(Int32Ty, 0);
723  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
724  FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
725 
726  // Splat from element 0, but replace absent elements with undef in the mask.
727  SmallVector<Constant *, 16> Mask(NumElements, Zero);
728  for (unsigned i = 0; i != NumElements; ++i)
729  if (!ElementPresent[i])
730  Mask[i] = UndefValue::get(Int32Ty);
731 
732  return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
733 }
734 
735 /// Try to fold an insert element into an existing splat shuffle by changing
736 /// the shuffle's mask to include the index of this insert element.
738  // Check if the vector operand of this insert is a canonical splat shuffle.
739  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
740  if (!Shuf || !Shuf->isZeroEltSplat())
741  return nullptr;
742 
743  // Check for a constant insertion index.
744  uint64_t IdxC;
745  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
746  return nullptr;
747 
748  // Check if the splat shuffle's input is the same as this insert's scalar op.
749  Value *X = InsElt.getOperand(1);
750  Value *Op0 = Shuf->getOperand(0);
751  if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
752  return nullptr;
753 
754  // Replace the shuffle mask element at the index of this insert with a zero.
755  // For example:
756  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
757  // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
758  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
759  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
760  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
761  Constant *Zero = ConstantInt::getNullValue(I32Ty);
762  for (unsigned i = 0; i != NumMaskElts; ++i)
763  NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
764 
765  Constant *NewMask = ConstantVector::get(NewMaskVec);
766  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
767 }
768 
769 /// Try to fold an extract+insert element into an existing identity shuffle by
770 /// changing the shuffle's mask to include the index of this insert element.
772  // Check if the vector operand of this insert is an identity shuffle.
773  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
774  if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
775  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
776  return nullptr;
777 
778  // Check for a constant insertion index.
779  uint64_t IdxC;
780  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
781  return nullptr;
782 
783  // Check if this insert's scalar op is extracted from the identity shuffle's
784  // input vector.
785  Value *Scalar = InsElt.getOperand(1);
786  Value *X = Shuf->getOperand(0);
787  if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
788  return nullptr;
789 
790  // Replace the shuffle mask element at the index of this extract+insert with
791  // that same index value.
792  // For example:
793  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
794  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
795  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
796  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
797  Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC);
798  Constant *OldMask = Shuf->getMask();
799  for (unsigned i = 0; i != NumMaskElts; ++i) {
800  if (i != IdxC) {
801  // All mask elements besides the inserted element remain the same.
802  NewMaskVec[i] = OldMask->getAggregateElement(i);
803  } else if (OldMask->getAggregateElement(i) == NewMaskEltC) {
804  // If the mask element was already set, there's nothing to do
805  // (demanded elements analysis may unset it later).
806  return nullptr;
807  } else {
808  assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&
809  "Unexpected shuffle mask element for identity shuffle");
810  NewMaskVec[i] = NewMaskEltC;
811  }
812  }
813 
814  Constant *NewMask = ConstantVector::get(NewMaskVec);
815  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
816 }
817 
818 /// If we have an insertelement instruction feeding into another insertelement
819 /// and the 2nd is inserting a constant into the vector, canonicalize that
820 /// constant insertion before the insertion of a variable:
821 ///
822 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
823 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
824 ///
825 /// This has the potential of eliminating the 2nd insertelement instruction
826 /// via constant folding of the scalar constant into a vector constant.
828  InstCombiner::BuilderTy &Builder) {
829  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
830  if (!InsElt1 || !InsElt1->hasOneUse())
831  return nullptr;
832 
833  Value *X, *Y;
834  Constant *ScalarC;
835  ConstantInt *IdxC1, *IdxC2;
836  if (match(InsElt1->getOperand(0), m_Value(X)) &&
837  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
838  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
839  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
840  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
841  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
842  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
843  }
844 
845  return nullptr;
846 }
847 
848 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
849 /// --> shufflevector X, CVec', Mask'
851  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
852  // Bail out if the parent has more than one use. In that case, we'd be
853  // replacing the insertelt with a shuffle, and that's not a clear win.
854  if (!Inst || !Inst->hasOneUse())
855  return nullptr;
856  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
857  // The shuffle must have a constant vector operand. The insertelt must have
858  // a constant scalar being inserted at a constant position in the vector.
859  Constant *ShufConstVec, *InsEltScalar;
860  uint64_t InsEltIndex;
861  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
862  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
863  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
864  return nullptr;
865 
866  // Adding an element to an arbitrary shuffle could be expensive, but a
867  // shuffle that selects elements from vectors without crossing lanes is
868  // assumed cheap.
869  // If we're just adding a constant into that shuffle, it will still be
870  // cheap.
871  if (!isShuffleEquivalentToSelect(*Shuf))
872  return nullptr;
873 
874  // From the above 'select' check, we know that the mask has the same number
875  // of elements as the vector input operands. We also know that each constant
876  // input element is used in its lane and can not be used more than once by
877  // the shuffle. Therefore, replace the constant in the shuffle's constant
878  // vector with the insertelt constant. Replace the constant in the shuffle's
879  // mask vector with the insertelt index plus the length of the vector
880  // (because the constant vector operand of a shuffle is always the 2nd
881  // operand).
882  Constant *Mask = Shuf->getMask();
883  unsigned NumElts = Mask->getType()->getVectorNumElements();
884  SmallVector<Constant *, 16> NewShufElts(NumElts);
885  SmallVector<Constant *, 16> NewMaskElts(NumElts);
886  for (unsigned I = 0; I != NumElts; ++I) {
887  if (I == InsEltIndex) {
888  NewShufElts[I] = InsEltScalar;
889  Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
890  NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
891  } else {
892  // Copy over the existing values.
893  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
894  NewMaskElts[I] = Mask->getAggregateElement(I);
895  }
896  }
897 
898  // Create new operands for a shuffle that includes the constant of the
899  // original insertelt. The old shuffle will be dead now.
900  return new ShuffleVectorInst(Shuf->getOperand(0),
901  ConstantVector::get(NewShufElts),
902  ConstantVector::get(NewMaskElts));
903  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
904  // Transform sequences of insertelements ops with constant data/indexes into
905  // a single shuffle op.
906  unsigned NumElts = InsElt.getType()->getNumElements();
907 
908  uint64_t InsertIdx[2];
909  Constant *Val[2];
910  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
911  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
912  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
913  !match(IEI->getOperand(1), m_Constant(Val[1])))
914  return nullptr;
915  SmallVector<Constant *, 16> Values(NumElts);
917  auto ValI = std::begin(Val);
918  // Generate new constant vector and mask.
919  // We have 2 values/masks from the insertelements instructions. Insert them
920  // into new value/mask vectors.
921  for (uint64_t I : InsertIdx) {
922  if (!Values[I]) {
923  assert(!Mask[I]);
924  Values[I] = *ValI;
925  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
926  NumElts + I);
927  }
928  ++ValI;
929  }
930  // Remaining values are filled with 'undef' values.
931  for (unsigned I = 0; I < NumElts; ++I) {
932  if (!Values[I]) {
933  assert(!Mask[I]);
934  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
935  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
936  }
937  }
938  // Create new operands for a shuffle that includes the constant of the
939  // original insertelt.
940  return new ShuffleVectorInst(IEI->getOperand(0),
941  ConstantVector::get(Values),
942  ConstantVector::get(Mask));
943  }
944  return nullptr;
945 }
946 
948  Value *VecOp = IE.getOperand(0);
949  Value *ScalarOp = IE.getOperand(1);
950  Value *IdxOp = IE.getOperand(2);
951 
952  if (auto *V = SimplifyInsertElementInst(
953  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
954  return replaceInstUsesWith(IE, V);
955 
956  // If the vector and scalar are both bitcast from the same element type, do
957  // the insert in that source type followed by bitcast.
958  Value *VecSrc, *ScalarSrc;
959  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
960  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
961  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
962  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
963  VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
964  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
965  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
966  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
967  return new BitCastInst(NewInsElt, IE.getType());
968  }
969 
970  // If the inserted element was extracted from some other vector and both
971  // indexes are valid constants, try to turn this into a shuffle.
972  uint64_t InsertedIdx, ExtractedIdx;
973  Value *ExtVecOp;
974  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
975  match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
976  m_ConstantInt(ExtractedIdx))) &&
977  ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
978  // TODO: Looking at the user(s) to determine if this insert is a
979  // fold-to-shuffle opportunity does not match the usual instcombine
980  // constraints. We should decide if the transform is worthy based only
981  // on this instruction and its operands, but that may not work currently.
982  //
983  // Here, we are trying to avoid creating shuffles before reaching
984  // the end of a chain of extract-insert pairs. This is complicated because
985  // we do not generally form arbitrary shuffle masks in instcombine
986  // (because those may codegen poorly), but collectShuffleElements() does
987  // exactly that.
988  //
989  // The rules for determining what is an acceptable target-independent
990  // shuffle mask are fuzzy because they evolve based on the backend's
991  // capabilities and real-world impact.
992  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
993  if (!Insert.hasOneUse())
994  return true;
995  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
996  if (!InsertUser)
997  return true;
998  return false;
999  };
1000 
1001  // Try to form a shuffle from a chain of extract-insert ops.
1002  if (isShuffleRootCandidate(IE)) {
1004  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1005 
1006  // The proposed shuffle may be trivial, in which case we shouldn't
1007  // perform the combine.
1008  if (LR.first != &IE && LR.second != &IE) {
1009  // We now have a shuffle of LHS, RHS, Mask.
1010  if (LR.second == nullptr)
1011  LR.second = UndefValue::get(LR.first->getType());
1012  return new ShuffleVectorInst(LR.first, LR.second,
1013  ConstantVector::get(Mask));
1014  }
1015  }
1016  }
1017 
1018  unsigned VWidth = VecOp->getType()->getVectorNumElements();
1019  APInt UndefElts(VWidth, 0);
1020  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1021  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1022  if (V != &IE)
1023  return replaceInstUsesWith(IE, V);
1024  return &IE;
1025  }
1026 
1028  return Shuf;
1029 
1030  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1031  return NewInsElt;
1032 
1033  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1034  return Broadcast;
1035 
1036  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1037  return Splat;
1038 
1039  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1040  return IdentityShuf;
1041 
1042  return nullptr;
1043 }
1044 
1045 /// Return true if we can evaluate the specified expression tree if the vector
1046 /// elements were shuffled in a different order.
1048  unsigned Depth = 5) {
1049  // We can always reorder the elements of a constant.
1050  if (isa<Constant>(V))
1051  return true;
1052 
1053  // We won't reorder vector arguments. No IPO here.
1055  if (!I) return false;
1056 
1057  // Two users may expect different orders of the elements. Don't try it.
1058  if (!I->hasOneUse())
1059  return false;
1060 
1061  if (Depth == 0) return false;
1062 
1063  switch (I->getOpcode()) {
1064  case Instruction::Add:
1065  case Instruction::FAdd:
1066  case Instruction::Sub:
1067  case Instruction::FSub:
1068  case Instruction::Mul:
1069  case Instruction::FMul:
1070  case Instruction::UDiv:
1071  case Instruction::SDiv:
1072  case Instruction::FDiv:
1073  case Instruction::URem:
1074  case Instruction::SRem:
1075  case Instruction::FRem:
1076  case Instruction::Shl:
1077  case Instruction::LShr:
1078  case Instruction::AShr:
1079  case Instruction::And:
1080  case Instruction::Or:
1081  case Instruction::Xor:
1082  case Instruction::ICmp:
1083  case Instruction::FCmp:
1084  case Instruction::Trunc:
1085  case Instruction::ZExt:
1086  case Instruction::SExt:
1087  case Instruction::FPToUI:
1088  case Instruction::FPToSI:
1089  case Instruction::UIToFP:
1090  case Instruction::SIToFP:
1091  case Instruction::FPTrunc:
1092  case Instruction::FPExt:
1093  case Instruction::GetElementPtr: {
1094  // Bail out if we would create longer vector ops. We could allow creating
1095  // longer vector ops, but that may result in more expensive codegen. We
1096  // would also need to limit the transform to avoid undefined behavior for
1097  // integer div/rem.
1098  Type *ITy = I->getType();
1099  if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1100  return false;
1101  for (Value *Operand : I->operands()) {
1102  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1103  return false;
1104  }
1105  return true;
1106  }
1107  case Instruction::InsertElement: {
1109  if (!CI) return false;
1110  int ElementNumber = CI->getLimitedValue();
1111 
1112  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1113  // can't put an element into multiple indices.
1114  bool SeenOnce = false;
1115  for (int i = 0, e = Mask.size(); i != e; ++i) {
1116  if (Mask[i] == ElementNumber) {
1117  if (SeenOnce)
1118  return false;
1119  SeenOnce = true;
1120  }
1121  }
1122  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1123  }
1124  }
1125  return false;
1126 }
1127 
1128 /// Rebuild a new instruction just like 'I' but with the new operands given.
1129 /// In the event of type mismatch, the type of the operands is correct.
1131  // We don't want to use the IRBuilder here because we want the replacement
1132  // instructions to appear next to 'I', not the builder's insertion point.
1133  switch (I->getOpcode()) {
1134  case Instruction::Add:
1135  case Instruction::FAdd:
1136  case Instruction::Sub:
1137  case Instruction::FSub:
1138  case Instruction::Mul:
1139  case Instruction::FMul:
1140  case Instruction::UDiv:
1141  case Instruction::SDiv:
1142  case Instruction::FDiv:
1143  case Instruction::URem:
1144  case Instruction::SRem:
1145  case Instruction::FRem:
1146  case Instruction::Shl:
1147  case Instruction::LShr:
1148  case Instruction::AShr:
1149  case Instruction::And:
1150  case Instruction::Or:
1151  case Instruction::Xor: {
1152  BinaryOperator *BO = cast<BinaryOperator>(I);
1153  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1154  BinaryOperator *New =
1155  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1156  NewOps[0], NewOps[1], "", BO);
1157  if (isa<OverflowingBinaryOperator>(BO)) {
1158  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1159  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1160  }
1161  if (isa<PossiblyExactOperator>(BO)) {
1162  New->setIsExact(BO->isExact());
1163  }
1164  if (isa<FPMathOperator>(BO))
1165  New->copyFastMathFlags(I);
1166  return New;
1167  }
1168  case Instruction::ICmp:
1169  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1170  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1171  NewOps[0], NewOps[1]);
1172  case Instruction::FCmp:
1173  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1174  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1175  NewOps[0], NewOps[1]);
1176  case Instruction::Trunc:
1177  case Instruction::ZExt:
1178  case Instruction::SExt:
1179  case Instruction::FPToUI:
1180  case Instruction::FPToSI:
1181  case Instruction::UIToFP:
1182  case Instruction::SIToFP:
1183  case Instruction::FPTrunc:
1184  case Instruction::FPExt: {
1185  // It's possible that the mask has a different number of elements from
1186  // the original cast. We recompute the destination type to match the mask.
1187  Type *DestTy =
1189  NewOps[0]->getType()->getVectorNumElements());
1190  assert(NewOps.size() == 1 && "cast with #ops != 1");
1191  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1192  "", I);
1193  }
1194  case Instruction::GetElementPtr: {
1195  Value *Ptr = NewOps[0];
1196  ArrayRef<Value*> Idx = NewOps.slice(1);
1198  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1199  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1200  return GEP;
1201  }
1202  }
1203  llvm_unreachable("failed to rebuild vector instructions");
1204 }
1205 
1207  // Mask.size() does not need to be equal to the number of vector elements.
1208 
1209  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1210  Type *EltTy = V->getType()->getScalarType();
1211  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1212  if (isa<UndefValue>(V))
1213  return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1214 
1215  if (isa<ConstantAggregateZero>(V))
1216  return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1217 
1218  if (Constant *C = dyn_cast<Constant>(V)) {
1219  SmallVector<Constant *, 16> MaskValues;
1220  for (int i = 0, e = Mask.size(); i != e; ++i) {
1221  if (Mask[i] == -1)
1222  MaskValues.push_back(UndefValue::get(I32Ty));
1223  else
1224  MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1225  }
1227  ConstantVector::get(MaskValues));
1228  }
1229 
1230  Instruction *I = cast<Instruction>(V);
1231  switch (I->getOpcode()) {
1232  case Instruction::Add:
1233  case Instruction::FAdd:
1234  case Instruction::Sub:
1235  case Instruction::FSub:
1236  case Instruction::Mul:
1237  case Instruction::FMul:
1238  case Instruction::UDiv:
1239  case Instruction::SDiv:
1240  case Instruction::FDiv:
1241  case Instruction::URem:
1242  case Instruction::SRem:
1243  case Instruction::FRem:
1244  case Instruction::Shl:
1245  case Instruction::LShr:
1246  case Instruction::AShr:
1247  case Instruction::And:
1248  case Instruction::Or:
1249  case Instruction::Xor:
1250  case Instruction::ICmp:
1251  case Instruction::FCmp:
1252  case Instruction::Trunc:
1253  case Instruction::ZExt:
1254  case Instruction::SExt:
1255  case Instruction::FPToUI:
1256  case Instruction::FPToSI:
1257  case Instruction::UIToFP:
1258  case Instruction::SIToFP:
1259  case Instruction::FPTrunc:
1260  case Instruction::FPExt:
1261  case Instruction::Select:
1262  case Instruction::GetElementPtr: {
1263  SmallVector<Value*, 8> NewOps;
1264  bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1265  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1266  Value *V;
1267  // Recursively call evaluateInDifferentElementOrder on vector arguments
1268  // as well. E.g. GetElementPtr may have scalar operands even if the
1269  // return value is a vector, so we need to examine the operand type.
1270  if (I->getOperand(i)->getType()->isVectorTy())
1271  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1272  else
1273  V = I->getOperand(i);
1274  NewOps.push_back(V);
1275  NeedsRebuild |= (V != I->getOperand(i));
1276  }
1277  if (NeedsRebuild) {
1278  return buildNew(I, NewOps);
1279  }
1280  return I;
1281  }
1282  case Instruction::InsertElement: {
1283  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1284 
1285  // The insertelement was inserting at Element. Figure out which element
1286  // that becomes after shuffling. The answer is guaranteed to be unique
1287  // by CanEvaluateShuffled.
1288  bool Found = false;
1289  int Index = 0;
1290  for (int e = Mask.size(); Index != e; ++Index) {
1291  if (Mask[Index] == Element) {
1292  Found = true;
1293  break;
1294  }
1295  }
1296 
1297  // If element is not in Mask, no need to handle the operand 1 (element to
1298  // be inserted). Just evaluate values in operand 0 according to Mask.
1299  if (!Found)
1300  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1301 
1302  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1303  return InsertElementInst::Create(V, I->getOperand(1),
1304  ConstantInt::get(I32Ty, Index), "", I);
1305  }
1306  }
1307  llvm_unreachable("failed to reorder elements of vector instruction!");
1308 }
1309 
1311  bool &isLHSID, bool &isRHSID) {
1312  isLHSID = isRHSID = true;
1313 
1314  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1315  if (Mask[i] < 0) continue; // Ignore undef values.
1316  // Is this an identity shuffle of the LHS value?
1317  isLHSID &= (Mask[i] == (int)i);
1318 
1319  // Is this an identity shuffle of the RHS value?
1320  isRHSID &= (Mask[i]-e == i);
1321  }
1322 }
1323 
1324 // Returns true if the shuffle is extracting a contiguous range of values from
1325 // LHS, for example:
1326 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1327 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1328 // Shuffles to: |EE|FF|GG|HH|
1329 // +--+--+--+--+
1332  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1333  unsigned MaskElems = Mask.size();
1334  unsigned BegIdx = Mask.front();
1335  unsigned EndIdx = Mask.back();
1336  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1337  return false;
1338  for (unsigned I = 0; I != MaskElems; ++I)
1339  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1340  return false;
1341  return true;
1342 }
1343 
1344 /// These are the ingredients in an alternate form binary operator as described
1345 /// below.
1346 struct BinopElts {
1351  Value *V0 = nullptr, Value *V1 = nullptr) :
1352  Opcode(Opc), Op0(V0), Op1(V1) {}
1353  operator bool() const { return Opcode != 0; }
1354 };
1355 
1356 /// Binops may be transformed into binops with different opcodes and operands.
1357 /// Reverse the usual canonicalization to enable folds with the non-canonical
1358 /// form of the binop. If a transform is possible, return the elements of the
1359 /// new binop. If not, return invalid elements.
1361  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1362  Type *Ty = BO->getType();
1363  switch (BO->getOpcode()) {
1364  case Instruction::Shl: {
1365  // shl X, C --> mul X, (1 << C)
1366  Constant *C;
1367  if (match(BO1, m_Constant(C))) {
1368  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1369  return { Instruction::Mul, BO0, ShlOne };
1370  }
1371  break;
1372  }
1373  case Instruction::Or: {
1374  // or X, C --> add X, C (when X and C have no common bits set)
1375  const APInt *C;
1376  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1377  return { Instruction::Add, BO0, BO1 };
1378  break;
1379  }
1380  default:
1381  break;
1382  }
1383  return {};
1384 }
1385 
1387  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1388 
1389  // Are we shuffling together some value and that same value after it has been
1390  // modified by a binop with a constant?
1391  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1392  Constant *C;
1393  bool Op0IsBinop;
1394  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1395  Op0IsBinop = true;
1396  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1397  Op0IsBinop = false;
1398  else
1399  return nullptr;
1400 
1401  // The identity constant for a binop leaves a variable operand unchanged. For
1402  // a vector, this is a splat of something like 0, -1, or 1.
1403  // If there's no identity constant for this binop, we're done.
1404  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1405  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1406  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1407  if (!IdC)
1408  return nullptr;
1409 
1410  // Shuffle identity constants into the lanes that return the original value.
1411  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1412  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1413  // The existing binop constant vector remains in the same operand position.
1414  Constant *Mask = Shuf.getMask();
1415  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1416  ConstantExpr::getShuffleVector(IdC, C, Mask);
1417 
1418  bool MightCreatePoisonOrUB =
1419  Mask->containsUndefElement() &&
1420  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1421  if (MightCreatePoisonOrUB)
1422  NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1423 
1424  // shuf (bop X, C), X, M --> bop X, C'
1425  // shuf X, (bop X, C), M --> bop X, C'
1426  Value *X = Op0IsBinop ? Op1 : Op0;
1427  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1428  NewBO->copyIRFlags(BO);
1429 
1430  // An undef shuffle mask element may propagate as an undef constant element in
1431  // the new binop. That would produce poison where the original code might not.
1432  // If we already made a safe constant, then there's no danger.
1433  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1434  NewBO->dropPoisonGeneratingFlags();
1435  return NewBO;
1436 }
1437 
1438 /// If we have an insert of a scalar to a non-zero element of an undefined
1439 /// vector and then shuffle that value, that's the same as inserting to the zero
1440 /// element and shuffling. Splatting from the zero element is recognized as the
1441 /// canonical form of splat.
1443  InstCombiner::BuilderTy &Builder) {
1444  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1445  Constant *Mask = Shuf.getMask();
1446  Value *X;
1447  uint64_t IndexC;
1448 
1449  // Match a shuffle that is a splat to a non-zero element.
1450  if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1451  m_ConstantInt(IndexC)))) ||
1452  !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
1453  return nullptr;
1454 
1455  // Insert into element 0 of an undef vector.
1456  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1457  Constant *Zero = Builder.getInt32(0);
1458  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1459 
1460  // Splat from element 0. Any mask element that is undefined remains undefined.
1461  // For example:
1462  // shuf (inselt undef, X, 2), undef, <2,2,undef>
1463  // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1464  unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1465  SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1466  for (unsigned i = 0; i != NumMaskElts; ++i)
1467  if (isa<UndefValue>(Mask->getAggregateElement(i)))
1468  NewMask[i] = Mask->getAggregateElement(i);
1469 
1470  return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1471 }
1472 
1473 /// Try to fold shuffles that are the equivalent of a vector select.
1475  InstCombiner::BuilderTy &Builder,
1476  const DataLayout &DL) {
1477  if (!Shuf.isSelect())
1478  return nullptr;
1479 
1480  // Canonicalize to choose from operand 0 first.
1481  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1482  if (Shuf.getMaskValue(0) >= (int)NumElts) {
1483  // TODO: Can we assert that both operands of a shuffle-select are not undef
1484  // (otherwise, it would have been folded by instsimplify?
1485  Shuf.commute();
1486  return &Shuf;
1487  }
1488 
1490  return I;
1491 
1492  BinaryOperator *B0, *B1;
1493  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1494  !match(Shuf.getOperand(1), m_BinOp(B1)))
1495  return nullptr;
1496 
1497  Value *X, *Y;
1498  Constant *C0, *C1;
1499  bool ConstantsAreOp1;
1500  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1501  match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1502  ConstantsAreOp1 = true;
1503  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1504  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1505  ConstantsAreOp1 = false;
1506  else
1507  return nullptr;
1508 
1509  // We need matching binops to fold the lanes together.
1510  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1511  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1512  bool DropNSW = false;
1513  if (ConstantsAreOp1 && Opc0 != Opc1) {
1514  // TODO: We drop "nsw" if shift is converted into multiply because it may
1515  // not be correct when the shift amount is BitWidth - 1. We could examine
1516  // each vector element to determine if it is safe to keep that flag.
1517  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1518  DropNSW = true;
1519  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1520  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1521  Opc0 = AltB0.Opcode;
1522  C0 = cast<Constant>(AltB0.Op1);
1523  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1524  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1525  Opc1 = AltB1.Opcode;
1526  C1 = cast<Constant>(AltB1.Op1);
1527  }
1528  }
1529 
1530  if (Opc0 != Opc1)
1531  return nullptr;
1532 
1533  // The opcodes must be the same. Use a new name to make that clear.
1534  BinaryOperator::BinaryOps BOpc = Opc0;
1535 
1536  // Select the constant elements needed for the single binop.
1537  Constant *Mask = Shuf.getMask();
1538  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1539 
1540  // We are moving a binop after a shuffle. When a shuffle has an undefined
1541  // mask element, the result is undefined, but it is not poison or undefined
1542  // behavior. That is not necessarily true for div/rem/shift.
1543  bool MightCreatePoisonOrUB =
1544  Mask->containsUndefElement() &&
1546  if (MightCreatePoisonOrUB)
1547  NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1548 
1549  Value *V;
1550  if (X == Y) {
1551  // Remove a binop and the shuffle by rearranging the constant:
1552  // shuffle (op V, C0), (op V, C1), M --> op V, C'
1553  // shuffle (op C0, V), (op C1, V), M --> op C', V
1554  V = X;
1555  } else {
1556  // If there are 2 different variable operands, we must create a new shuffle
1557  // (select) first, so check uses to ensure that we don't end up with more
1558  // instructions than we started with.
1559  if (!B0->hasOneUse() && !B1->hasOneUse())
1560  return nullptr;
1561 
1562  // If we use the original shuffle mask and op1 is *variable*, we would be
1563  // putting an undef into operand 1 of div/rem/shift. This is either UB or
1564  // poison. We do not have to guard against UB when *constants* are op1
1565  // because safe constants guarantee that we do not overflow sdiv/srem (and
1566  // there's no danger for other opcodes).
1567  // TODO: To allow this case, create a new shuffle mask with no undefs.
1568  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1569  return nullptr;
1570 
1571  // Note: In general, we do not create new shuffles in InstCombine because we
1572  // do not know if a target can lower an arbitrary shuffle optimally. In this
1573  // case, the shuffle uses the existing mask, so there is no additional risk.
1574 
1575  // Select the variable vectors first, then perform the binop:
1576  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1577  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1578  V = Builder.CreateShuffleVector(X, Y, Mask);
1579  }
1580 
1581  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1582  BinaryOperator::Create(BOpc, NewC, V);
1583 
1584  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1585  // 1. If we changed an opcode, poison conditions might have changed.
1586  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1587  // where the original code did not. But if we already made a safe constant,
1588  // then there's no danger.
1589  NewBO->copyIRFlags(B0);
1590  NewBO->andIRFlags(B1);
1591  if (DropNSW)
1592  NewBO->setHasNoSignedWrap(false);
1593  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1594  NewBO->dropPoisonGeneratingFlags();
1595  return NewBO;
1596 }
1597 
1598 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1599 /// narrowing (concatenating with undef and extracting back to the original
1600 /// length). This allows replacing the wide select with a narrow select.
1602  InstCombiner::BuilderTy &Builder) {
1603  // This must be a narrowing identity shuffle. It extracts the 1st N elements
1604  // of the 1st vector operand of a shuffle.
1605  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1606  return nullptr;
1607 
1608  // The vector being shuffled must be a vector select that we can eliminate.
1609  // TODO: The one-use requirement could be eased if X and/or Y are constants.
1610  Value *Cond, *X, *Y;
1611  if (!match(Shuf.getOperand(0),
1612  m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1613  return nullptr;
1614 
1615  // We need a narrow condition value. It must be extended with undef elements
1616  // and have the same number of elements as this shuffle.
1617  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1618  Value *NarrowCond;
1619  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1620  m_Constant()))) ||
1621  NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1622  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1623  return nullptr;
1624 
1625  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1626  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1628  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1629  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1630  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1631 }
1632 
1633 /// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1635  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1636  if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1637  return nullptr;
1638 
1639  Value *X, *Y;
1640  Constant *Mask;
1641  if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1642  return nullptr;
1643 
1644  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1645  // then combining may result in worse codegen.
1646  if (!Op0->hasOneUse())
1647  return nullptr;
1648 
1649  // We are extracting a subvector from a shuffle. Remove excess elements from
1650  // the 1st shuffle mask to eliminate the extract.
1651  //
1652  // This transform is conservatively limited to identity extracts because we do
1653  // not allow arbitrary shuffle mask creation as a target-independent transform
1654  // (because we can't guarantee that will lower efficiently).
1655  //
1656  // If the extracting shuffle has an undef mask element, it transfers to the
1657  // new shuffle mask. Otherwise, copy the original mask element. Example:
1658  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1659  // shuf X, Y, <C0, undef, C2, undef>
1660  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1661  SmallVector<Constant *, 16> NewMask(NumElts);
1662  assert(NumElts < Mask->getType()->getVectorNumElements() &&
1663  "Identity with extract must have less elements than its inputs");
1664 
1665  for (unsigned i = 0; i != NumElts; ++i) {
1666  Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1667  Constant *MaskElt = Mask->getAggregateElement(i);
1668  NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1669  }
1670  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1671 }
1672 
1673 /// Try to replace a shuffle with an insertelement.
1675  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1677 
1678  // The shuffle must not change vector sizes.
1679  // TODO: This restriction could be removed if the insert has only one use
1680  // (because the transform would require a new length-changing shuffle).
1681  int NumElts = Mask.size();
1682  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1683  return nullptr;
1684 
1685  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1686  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1687  // We need an insertelement with a constant index.
1688  if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1689  m_ConstantInt(IndexC))))
1690  return false;
1691 
1692  // Test the shuffle mask to see if it splices the inserted scalar into the
1693  // operand 1 vector of the shuffle.
1694  int NewInsIndex = -1;
1695  for (int i = 0; i != NumElts; ++i) {
1696  // Ignore undef mask elements.
1697  if (Mask[i] == -1)
1698  continue;
1699 
1700  // The shuffle takes elements of operand 1 without lane changes.
1701  if (Mask[i] == NumElts + i)
1702  continue;
1703 
1704  // The shuffle must choose the inserted scalar exactly once.
1705  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1706  return false;
1707 
1708  // The shuffle is placing the inserted scalar into element i.
1709  NewInsIndex = i;
1710  }
1711 
1712  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1713 
1714  // Index is updated to the potentially translated insertion lane.
1715  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1716  return true;
1717  };
1718 
1719  // If the shuffle is unnecessary, insert the scalar operand directly into
1720  // operand 1 of the shuffle. Example:
1721  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1722  Value *Scalar;
1723  ConstantInt *IndexC;
1724  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1725  return InsertElementInst::Create(V1, Scalar, IndexC);
1726 
1727  // Try again after commuting shuffle. Example:
1728  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1729  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1730  std::swap(V0, V1);
1732  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1733  return InsertElementInst::Create(V1, Scalar, IndexC);
1734 
1735  return nullptr;
1736 }
1737 
1739  // Match the operands as identity with padding (also known as concatenation
1740  // with undef) shuffles of the same source type. The backend is expected to
1741  // recreate these concatenations from a shuffle of narrow operands.
1742  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1743  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1744  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1745  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1746  return nullptr;
1747 
1748  // We limit this transform to power-of-2 types because we expect that the
1749  // backend can convert the simplified IR patterns to identical nodes as the
1750  // original IR.
1751  // TODO: If we can verify the same behavior for arbitrary types, the
1752  // power-of-2 checks can be removed.
1753  Value *X = Shuffle0->getOperand(0);
1754  Value *Y = Shuffle1->getOperand(0);
1755  if (X->getType() != Y->getType() ||
1757  !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1759  isa<UndefValue>(X) || isa<UndefValue>(Y))
1760  return nullptr;
1761  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1762  isa<UndefValue>(Shuffle1->getOperand(1)) &&
1763  "Unexpected operand for identity shuffle");
1764 
1765  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1766  // operands directly by adjusting the shuffle mask to account for the narrower
1767  // types:
1768  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1769  int NarrowElts = X->getType()->getVectorNumElements();
1770  int WideElts = Shuffle0->getType()->getVectorNumElements();
1771  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1772 
1773  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1775  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1776  for (int i = 0, e = Mask.size(); i != e; ++i) {
1777  if (Mask[i] == -1)
1778  continue;
1779 
1780  // If this shuffle is choosing an undef element from 1 of the sources, that
1781  // element is undef.
1782  if (Mask[i] < WideElts) {
1783  if (Shuffle0->getMaskValue(Mask[i]) == -1)
1784  continue;
1785  } else {
1786  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1787  continue;
1788  }
1789 
1790  // If this shuffle is choosing from the 1st narrow op, the mask element is
1791  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1792  // element is offset down to adjust for the narrow vector widths.
1793  if (Mask[i] < WideElts) {
1794  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
1795  NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1796  } else {
1797  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
1798  NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1799  }
1800  }
1801  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1802 }
1803 
1805  Value *LHS = SVI.getOperand(0);
1806  Value *RHS = SVI.getOperand(1);
1807  if (auto *V = SimplifyShuffleVectorInst(
1808  LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1809  return replaceInstUsesWith(SVI, V);
1810 
1811  // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
1812  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1813  unsigned VWidth = SVI.getType()->getVectorNumElements();
1814  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1817  if (LHS == RHS || isa<UndefValue>(LHS)) {
1818  // Remap any references to RHS to use LHS.
1820  for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
1821  if (Mask[i] < 0) {
1822  Elts.push_back(UndefValue::get(Int32Ty));
1823  continue;
1824  }
1825 
1826  if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
1827  (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
1828  Mask[i] = -1; // Turn into undef.
1829  Elts.push_back(UndefValue::get(Int32Ty));
1830  } else {
1831  Mask[i] = Mask[i] % e; // Force to LHS.
1832  Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1833  }
1834  }
1835  SVI.setOperand(0, SVI.getOperand(1));
1836  SVI.setOperand(1, UndefValue::get(RHS->getType()));
1837  SVI.setOperand(2, ConstantVector::get(Elts));
1838  return &SVI;
1839  }
1840 
1841  if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1842  return I;
1843 
1844  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1845  return I;
1846 
1847  if (Instruction *I = narrowVectorSelect(SVI, Builder))
1848  return I;
1849 
1850  APInt UndefElts(VWidth, 0);
1851  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1852  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1853  if (V != &SVI)
1854  return replaceInstUsesWith(SVI, V);
1855  return &SVI;
1856  }
1857 
1859  return I;
1860 
1861  // These transforms have the potential to lose undef knowledge, so they are
1862  // intentionally placed after SimplifyDemandedVectorElts().
1863  if (Instruction *I = foldShuffleWithInsert(SVI))
1864  return I;
1866  return I;
1867 
1868  if (VWidth == LHSWidth) {
1869  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1870  bool isLHSID, isRHSID;
1871  recognizeIdentityMask(Mask, isLHSID, isRHSID);
1872 
1873  // Eliminate identity shuffles.
1874  if (isLHSID) return replaceInstUsesWith(SVI, LHS);
1875  if (isRHSID) return replaceInstUsesWith(SVI, RHS);
1876  }
1877 
1878  if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
1879  Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1880  return replaceInstUsesWith(SVI, V);
1881  }
1882 
1883  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1884  // a non-vector type. We can instead bitcast the original vector followed by
1885  // an extract of the desired element:
1886  //
1887  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1888  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1889  // %1 = bitcast <4 x i8> %sroa to i32
1890  // Becomes:
1891  // %bc = bitcast <16 x i8> %in to <4 x i32>
1892  // %ext = extractelement <4 x i32> %bc, i32 0
1893  //
1894  // If the shuffle is extracting a contiguous range of values from the input
1895  // vector then each use which is a bitcast of the extracted size can be
1896  // replaced. This will work if the vector types are compatible, and the begin
1897  // index is aligned to a value in the casted vector type. If the begin index
1898  // isn't aligned then we can shuffle the original vector (keeping the same
1899  // vector type) before extracting.
1900  //
1901  // This code will bail out if the target type is fundamentally incompatible
1902  // with vectors of the source type.
1903  //
1904  // Example of <16 x i8>, target type i32:
1905  // Index range [4,8): v-----------v Will work.
1906  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1907  // <16 x i8>: | | | | | | | | | | | | | | | | |
1908  // <4 x i32>: | | | | |
1909  // +-----------+-----------+-----------+-----------+
1910  // Index range [6,10): ^-----------^ Needs an extra shuffle.
1911  // Target type i40: ^--------------^ Won't work, bail.
1912  bool MadeChange = false;
1913  if (isShuffleExtractingFromLHS(SVI, Mask)) {
1914  Value *V = LHS;
1915  unsigned MaskElems = Mask.size();
1916  VectorType *SrcTy = cast<VectorType>(V->getType());
1917  unsigned VecBitWidth = SrcTy->getBitWidth();
1918  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1919  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
1920  unsigned SrcNumElems = SrcTy->getNumElements();
1923  for (User *U : SVI.users())
1924  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
1925  if (!BC->use_empty())
1926  // Only visit bitcasts that weren't previously handled.
1927  BCs.push_back(BC);
1928  for (BitCastInst *BC : BCs) {
1929  unsigned BegIdx = Mask.front();
1930  Type *TgtTy = BC->getDestTy();
1931  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
1932  if (!TgtElemBitWidth)
1933  continue;
1934  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
1935  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
1936  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
1937  if (!VecBitWidthsEqual)
1938  continue;
1939  if (!VectorType::isValidElementType(TgtTy))
1940  continue;
1941  VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
1942  if (!BegIsAligned) {
1943  // Shuffle the input so [0,NumElements) contains the output, and
1944  // [NumElems,SrcNumElems) is undef.
1945  SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
1946  UndefValue::get(Int32Ty));
1947  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
1948  ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
1949  V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
1950  ConstantVector::get(ShuffleMask),
1951  SVI.getName() + ".extract");
1952  BegIdx = 0;
1953  }
1954  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
1955  assert(SrcElemsPerTgtElem);
1956  BegIdx /= SrcElemsPerTgtElem;
1957  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
1958  auto *NewBC =
1959  BCAlreadyExists
1960  ? NewBCs[CastSrcTy]
1961  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
1962  if (!BCAlreadyExists)
1963  NewBCs[CastSrcTy] = NewBC;
1964  auto *Ext = Builder.CreateExtractElement(
1965  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
1966  // The shufflevector isn't being replaced: the bitcast that used it
1967  // is. InstCombine will visit the newly-created instructions.
1968  replaceInstUsesWith(*BC, Ext);
1969  MadeChange = true;
1970  }
1971  }
1972 
1973  // If the LHS is a shufflevector itself, see if we can combine it with this
1974  // one without producing an unusual shuffle.
1975  // Cases that might be simplified:
1976  // 1.
1977  // x1=shuffle(v1,v2,mask1)
1978  // x=shuffle(x1,undef,mask)
1979  // ==>
1980  // x=shuffle(v1,undef,newMask)
1981  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
1982  // 2.
1983  // x1=shuffle(v1,undef,mask1)
1984  // x=shuffle(x1,x2,mask)
1985  // where v1.size() == mask1.size()
1986  // ==>
1987  // x=shuffle(v1,x2,newMask)
1988  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
1989  // 3.
1990  // x2=shuffle(v2,undef,mask2)
1991  // x=shuffle(x1,x2,mask)
1992  // where v2.size() == mask2.size()
1993  // ==>
1994  // x=shuffle(x1,v2,newMask)
1995  // newMask[i] = (mask[i] < x1.size())
1996  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
1997  // 4.
1998  // x1=shuffle(v1,undef,mask1)
1999  // x2=shuffle(v2,undef,mask2)
2000  // x=shuffle(x1,x2,mask)
2001  // where v1.size() == v2.size()
2002  // ==>
2003  // x=shuffle(v1,v2,newMask)
2004  // newMask[i] = (mask[i] < x1.size())
2005  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2006  //
2007  // Here we are really conservative:
2008  // we are absolutely afraid of producing a shuffle mask not in the input
2009  // program, because the code gen may not be smart enough to turn a merged
2010  // shuffle into two specific shuffles: it may produce worse code. As such,
2011  // we only merge two shuffles if the result is either a splat or one of the
2012  // input shuffle masks. In this case, merging the shuffles just removes
2013  // one instruction, which we know is safe. This is good for things like
2014  // turning: (splat(splat)) -> splat, or
2015  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2016  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2017  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2018  if (LHSShuffle)
2019  if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
2020  LHSShuffle = nullptr;
2021  if (RHSShuffle)
2022  if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
2023  RHSShuffle = nullptr;
2024  if (!LHSShuffle && !RHSShuffle)
2025  return MadeChange ? &SVI : nullptr;
2026 
2027  Value* LHSOp0 = nullptr;
2028  Value* LHSOp1 = nullptr;
2029  Value* RHSOp0 = nullptr;
2030  unsigned LHSOp0Width = 0;
2031  unsigned RHSOp0Width = 0;
2032  if (LHSShuffle) {
2033  LHSOp0 = LHSShuffle->getOperand(0);
2034  LHSOp1 = LHSShuffle->getOperand(1);
2035  LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
2036  }
2037  if (RHSShuffle) {
2038  RHSOp0 = RHSShuffle->getOperand(0);
2039  RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
2040  }
2041  Value* newLHS = LHS;
2042  Value* newRHS = RHS;
2043  if (LHSShuffle) {
2044  // case 1
2045  if (isa<UndefValue>(RHS)) {
2046  newLHS = LHSOp0;
2047  newRHS = LHSOp1;
2048  }
2049  // case 2 or 4
2050  else if (LHSOp0Width == LHSWidth) {
2051  newLHS = LHSOp0;
2052  }
2053  }
2054  // case 3 or 4
2055  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2056  newRHS = RHSOp0;
2057  }
2058  // case 4
2059  if (LHSOp0 == RHSOp0) {
2060  newLHS = LHSOp0;
2061  newRHS = nullptr;
2062  }
2063 
2064  if (newLHS == LHS && newRHS == RHS)
2065  return MadeChange ? &SVI : nullptr;
2066 
2067  SmallVector<int, 16> LHSMask;
2068  SmallVector<int, 16> RHSMask;
2069  if (newLHS != LHS)
2070  LHSMask = LHSShuffle->getShuffleMask();
2071  if (RHSShuffle && newRHS != RHS)
2072  RHSMask = RHSShuffle->getShuffleMask();
2073 
2074  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2075  SmallVector<int, 16> newMask;
2076  bool isSplat = true;
2077  int SplatElt = -1;
2078  // Create a new mask for the new ShuffleVectorInst so that the new
2079  // ShuffleVectorInst is equivalent to the original one.
2080  for (unsigned i = 0; i < VWidth; ++i) {
2081  int eltMask;
2082  if (Mask[i] < 0) {
2083  // This element is an undef value.
2084  eltMask = -1;
2085  } else if (Mask[i] < (int)LHSWidth) {
2086  // This element is from left hand side vector operand.
2087  //
2088  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2089  // new mask value for the element.
2090  if (newLHS != LHS) {
2091  eltMask = LHSMask[Mask[i]];
2092  // If the value selected is an undef value, explicitly specify it
2093  // with a -1 mask value.
2094  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2095  eltMask = -1;
2096  } else
2097  eltMask = Mask[i];
2098  } else {
2099  // This element is from right hand side vector operand
2100  //
2101  // If the value selected is an undef value, explicitly specify it
2102  // with a -1 mask value. (case 1)
2103  if (isa<UndefValue>(RHS))
2104  eltMask = -1;
2105  // If RHS is going to be replaced (case 3 or 4), calculate the
2106  // new mask value for the element.
2107  else if (newRHS != RHS) {
2108  eltMask = RHSMask[Mask[i]-LHSWidth];
2109  // If the value selected is an undef value, explicitly specify it
2110  // with a -1 mask value.
2111  if (eltMask >= (int)RHSOp0Width) {
2112  assert(isa<UndefValue>(RHSShuffle->getOperand(1))
2113  && "should have been check above");
2114  eltMask = -1;
2115  }
2116  } else
2117  eltMask = Mask[i]-LHSWidth;
2118 
2119  // If LHS's width is changed, shift the mask value accordingly.
2120  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2121  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2122  // If newRHS == newLHS, we want to remap any references from newRHS to
2123  // newLHS so that we can properly identify splats that may occur due to
2124  // obfuscation across the two vectors.
2125  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2126  eltMask += newLHSWidth;
2127  }
2128 
2129  // Check if this could still be a splat.
2130  if (eltMask >= 0) {
2131  if (SplatElt >= 0 && SplatElt != eltMask)
2132  isSplat = false;
2133  SplatElt = eltMask;
2134  }
2135 
2136  newMask.push_back(eltMask);
2137  }
2138 
2139  // If the result mask is equal to one of the original shuffle masks,
2140  // or is a splat, do the replacement.
2141  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2143  for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2144  if (newMask[i] < 0) {
2145  Elts.push_back(UndefValue::get(Int32Ty));
2146  } else {
2147  Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2148  }
2149  }
2150  if (!newRHS)
2151  newRHS = UndefValue::get(newLHS->getType());
2152  return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2153  }
2154 
2155  // If the result mask is an identity, replace uses of this instruction with
2156  // corresponding argument.
2157  bool isLHSID, isRHSID;
2158  recognizeIdentityMask(newMask, isLHSID, isRHSID);
2159  if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
2160  if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
2161 
2162  return MadeChange ? &SVI : nullptr;
2163 }
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8...
Type * getVectorElementType() const
Definition: Type.h:371
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
uint64_t CallInst * C
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:86
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex –> shufflevector X...
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle&#39;s mask to includ...
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1363
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:901
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, SmallVector< int, 16 > &Mask)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:386
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Constant * getMask() const
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
unsigned getBitWidth() const
Return the minimum number of bits in the Vector type.
Definition: DerivedTypes.h:525
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1402
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElement(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1285
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The core instruction combiner logic.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if &#39;V & Mask&#39; is known to be zero.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="")
Definition: InstrTypes.h:249
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
This file implements a class to represent arbitrary precision integral constant values and operations...
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:412
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1964
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf)
Try to replace a shuffle with an insertelement.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:623
This instruction compares its operands according to the predicate given to the constructor.
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
This class represents a no-op cast from one type to another.
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:81
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Instruction * visitExtractElementInst(ExtractElementInst &EI)
These are the ingredients in an alternate form binary operator as described below.
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< Constant *> &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
VectorType * getType() const
Overload to return most specific vector type.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:359
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Instruction * visitInsertElementInst(InsertElementInst &IE)
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:61
This instruction inserts a single (scalar) element into a VectorType value.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:189
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:576
static Constant * getShuffleVector(Constant *V1, Constant *V2, Constant *Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2160
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2338
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
Class to represent integer types.
Definition: DerivedTypes.h:40
VectorType * getVectorOperandType() const
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool isExact() const
Determine whether the exact flag is set.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
void commute()
Swap the first 2 operands and adjust the mask to preserve the semantics of the instruction.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:250
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
BinaryOperator::BinaryOps Opcode
bool containsUndefElement() const
Return true if this is a vector constant that includes any undefined elements.
Definition: Constants.cpp:268
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2320
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2334
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:419
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:178
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:549
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:79
#define I(x, y, z)
Definition: MD5.cpp:58
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2321
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder, bool IsBigEndian)
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
This file provides internal interfaces used to implement the InstCombine.
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
static Value * buildNew(Instruction *I, ArrayRef< Value *> NewOps)
Rebuild a new instruction just like &#39;I&#39; but with the new operands given.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1228
Type * getElementType() const
Definition: DerivedTypes.h:394
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
bool isIntDivRem() const
Definition: Instruction.h:131
static bool isSplat(ArrayRef< Value *> VL)
static void recognizeIdentityMask(const SmallVectorImpl< int > &Mask, bool &isLHSID, bool &isRHSID)
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< Constant *> &Mask, Value *PermittedRHS, InstCombiner &IC)
specific_intval m_SpecificInt(uint64_t V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:653
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle&#39;s mas...
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1110
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
IntegerType * Int32Ty
User * user_back()
Definition: Value.h:405
const BasicBlock * getParent() const
Definition: Instruction.h:66
This instruction inserts a struct field of array element value into an aggregate value.