LLVM  10.0.0svn
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/PatternMatch.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/User.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
37 #include <cassert>
38 #include <cstdint>
39 #include <iterator>
40 #include <utility>
41 
42 using namespace llvm;
43 using namespace PatternMatch;
44 
45 #define DEBUG_TYPE "instcombine"
46 
47 /// Return true if the value is cheaper to scalarize than it is to leave as a
48 /// vector operation. IsConstantExtractIndex indicates whether we are extracting
49 /// one known element from a vector constant.
50 ///
51 /// FIXME: It's possible to create more instructions than previously existed.
52 static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53  // If we can pick a scalar constant value out of a vector, that is free.
54  if (auto *C = dyn_cast<Constant>(V))
55  return IsConstantExtractIndex || C->getSplatValue();
56 
57  // An insertelement to the same constant index as our extract will simplify
58  // to the scalar inserted element. An insertelement to a different constant
59  // index is irrelevant to our extract.
61  return IsConstantExtractIndex;
62 
63  if (match(V, m_OneUse(m_Load(m_Value()))))
64  return true;
65 
66  Value *V0, *V1;
67  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69  cheapToScalarize(V1, IsConstantExtractIndex))
70  return true;
71 
72  CmpInst::Predicate UnusedPred;
73  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75  cheapToScalarize(V1, IsConstantExtractIndex))
76  return true;
77 
78  return false;
79 }
80 
81 // If we have a PHI node with a vector type that is only used to feed
82 // itself and be an operand of extractelement at a constant location,
83 // try to replace the PHI of the vector type with a PHI of a scalar type.
84 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
86  // The users we want the PHI to have are:
87  // 1) The EI ExtractElement (we already know this)
88  // 2) Possibly more ExtractElements with the same index.
89  // 3) Another operand, which will feed back into the PHI.
90  Instruction *PHIUser = nullptr;
91  for (auto U : PN->users()) {
92  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93  if (EI.getIndexOperand() == EU->getIndexOperand())
94  Extracts.push_back(EU);
95  else
96  return nullptr;
97  } else if (!PHIUser) {
98  PHIUser = cast<Instruction>(U);
99  } else {
100  return nullptr;
101  }
102  }
103 
104  if (!PHIUser)
105  return nullptr;
106 
107  // Verify that this PHI user has one use, which is the PHI itself,
108  // and that it is a binary operation which is cheap to scalarize.
109  // otherwise return nullptr.
110  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
111  !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
112  return nullptr;
113 
114  // Create a scalar PHI node that will replace the vector PHI node
115  // just before the current PHI node.
116  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118  // Scalarize each PHI operand.
119  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
120  Value *PHIInVal = PN->getIncomingValue(i);
121  BasicBlock *inBB = PN->getIncomingBlock(i);
122  Value *Elt = EI.getIndexOperand();
123  // If the operand is the PHI induction variable:
124  if (PHIInVal == PHIUser) {
125  // Scalarize the binary operation. Its first operand is the
126  // scalar PHI, and the second operand is extracted from the other
127  // vector operand.
128  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
130  Value *Op = InsertNewInstWith(
131  ExtractElementInst::Create(B0->getOperand(opId), Elt,
132  B0->getOperand(opId)->getName() + ".Elt"),
133  *B0);
134  Value *newPHIUser = InsertNewInstWith(
136  scalarPHI, Op, B0), *B0);
137  scalarPHI->addIncoming(newPHIUser, inBB);
138  } else {
139  // Scalarize PHI input:
140  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141  // Insert the new instruction into the predecessor basic block.
142  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143  BasicBlock::iterator InsertPos;
144  if (pos && !isa<PHINode>(pos)) {
145  InsertPos = ++pos->getIterator();
146  } else {
147  InsertPos = inBB->getFirstInsertionPt();
148  }
149 
150  InsertNewInstWith(newEI, *InsertPos);
151 
152  scalarPHI->addIncoming(newEI, inBB);
153  }
154  }
155 
156  for (auto E : Extracts)
157  replaceInstUsesWith(*E, scalarPHI);
158 
159  return &EI;
160 }
161 
163  InstCombiner::BuilderTy &Builder,
164  bool IsBigEndian) {
165  Value *X;
166  uint64_t ExtIndexC;
167  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
168  !X->getType()->isVectorTy() ||
169  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
170  return nullptr;
171 
172  // If this extractelement is using a bitcast from a vector of the same number
173  // of elements, see if we can find the source element from the source vector:
174  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175  Type *SrcTy = X->getType();
176  Type *DestTy = Ext.getType();
177  unsigned NumSrcElts = SrcTy->getVectorNumElements();
178  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179  if (NumSrcElts == NumElts)
180  if (Value *Elt = findScalarElement(X, ExtIndexC))
181  return new BitCastInst(Elt, DestTy);
182 
183  // If the source elements are wider than the destination, try to shift and
184  // truncate a subset of scalar bits of an insert op.
185  if (NumSrcElts < NumElts) {
186  Value *Scalar;
187  uint64_t InsIndexC;
188  if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
189  m_ConstantInt(InsIndexC))))
190  return nullptr;
191 
192  // The extract must be from the subset of vector elements that we inserted
193  // into. Example: if we inserted element 1 of a <2 x i64> and we are
194  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195  // of elements 4-7 of the bitcasted vector.
196  unsigned NarrowingRatio = NumElts / NumSrcElts;
197  if (ExtIndexC / NarrowingRatio != InsIndexC)
198  return nullptr;
199 
200  // We are extracting part of the original scalar. How that scalar is
201  // inserted into the vector depends on the endian-ness. Example:
202  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
203  // +--+--+--+--+--+--+--+--+
204  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
205  // extelt <4 x i16> V', 3: | |S2|S3|
206  // +--+--+--+--+--+--+--+--+
207  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209  // In this example, we must right-shift little-endian. Big-endian is just a
210  // truncate.
211  unsigned Chunk = ExtIndexC % NarrowingRatio;
212  if (IsBigEndian)
213  Chunk = NarrowingRatio - 1 - Chunk;
214 
215  // Bail out if this is an FP vector to FP vector sequence. That would take
216  // more instructions than we started with unless there is no shift, and it
217  // may not be handled as well in the backend.
218  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219  bool NeedDestBitcast = DestTy->isFloatingPointTy();
220  if (NeedSrcBitcast && NeedDestBitcast)
221  return nullptr;
222 
223  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225  unsigned ShAmt = Chunk * DestWidth;
226 
227  // TODO: This limitation is more strict than necessary. We could sum the
228  // number of new instructions and subtract the number eliminated to know if
229  // we can proceed.
230  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
231  if (NeedSrcBitcast || NeedDestBitcast)
232  return nullptr;
233 
234  if (NeedSrcBitcast) {
235  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237  }
238 
239  if (ShAmt) {
240  // Bail out if we could end with more instructions than we started with.
241  if (!Ext.getVectorOperand()->hasOneUse())
242  return nullptr;
243  Scalar = Builder.CreateLShr(Scalar, ShAmt);
244  }
245 
246  if (NeedDestBitcast) {
247  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249  }
250  return new TruncInst(Scalar, DestTy);
251  }
252 
253  return nullptr;
254 }
255 
257  Value *SrcVec = EI.getVectorOperand();
258  Value *Index = EI.getIndexOperand();
259  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
260  SQ.getWithInstruction(&EI)))
261  return replaceInstUsesWith(EI, V);
262 
263  // If extracting a specified index from the vector, see if we can recursively
264  // find a previously computed scalar that was inserted into the vector.
265  auto *IndexC = dyn_cast<ConstantInt>(Index);
266  if (IndexC) {
267  unsigned NumElts = EI.getVectorOperandType()->getNumElements();
268 
269  // InstSimplify should handle cases where the index is invalid.
270  if (!IndexC->getValue().ule(NumElts))
271  return nullptr;
272 
273  // This instruction only demands the single element from the input vector.
274  // If the input vector has a single use, simplify it based on this use
275  // property.
276  if (SrcVec->hasOneUse() && NumElts != 1) {
277  APInt UndefElts(NumElts, 0);
278  APInt DemandedElts(NumElts, 0);
279  DemandedElts.setBit(IndexC->getZExtValue());
280  if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
281  UndefElts)) {
282  EI.setOperand(0, V);
283  return &EI;
284  }
285  }
286 
287  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
288  return I;
289 
290  // If there's a vector PHI feeding a scalar use through this extractelement
291  // instruction, try to scalarize the PHI.
292  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
293  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
294  return ScalarPHI;
295  }
296 
297  BinaryOperator *BO;
298  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
299  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
300  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
301  Value *E0 = Builder.CreateExtractElement(X, Index);
302  Value *E1 = Builder.CreateExtractElement(Y, Index);
303  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
304  }
305 
306  Value *X, *Y;
307  CmpInst::Predicate Pred;
308  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
309  cheapToScalarize(SrcVec, IndexC)) {
310  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
311  Value *E0 = Builder.CreateExtractElement(X, Index);
312  Value *E1 = Builder.CreateExtractElement(Y, Index);
313  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
314  }
315 
316  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
317  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
318  // Extracting the inserted element?
319  if (IE->getOperand(2) == Index)
320  return replaceInstUsesWith(EI, IE->getOperand(1));
321  // If the inserted and extracted elements are constants, they must not
322  // be the same value, extract from the pre-inserted value instead.
323  if (isa<Constant>(IE->getOperand(2)) && IndexC) {
324  Worklist.AddValue(SrcVec);
325  EI.setOperand(0, IE->getOperand(0));
326  return &EI;
327  }
328  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
329  // If this is extracting an element from a shufflevector, figure out where
330  // it came from and extract from the appropriate input element instead.
331  if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
332  int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
333  Value *Src;
334  unsigned LHSWidth =
335  SVI->getOperand(0)->getType()->getVectorNumElements();
336 
337  if (SrcIdx < 0)
338  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
339  if (SrcIdx < (int)LHSWidth)
340  Src = SVI->getOperand(0);
341  else {
342  SrcIdx -= LHSWidth;
343  Src = SVI->getOperand(1);
344  }
346  return ExtractElementInst::Create(Src,
347  ConstantInt::get(Int32Ty,
348  SrcIdx, false));
349  }
350  } else if (auto *CI = dyn_cast<CastInst>(I)) {
351  // Canonicalize extractelement(cast) -> cast(extractelement).
352  // Bitcasts can change the number of vector elements, and they cost
353  // nothing.
354  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
355  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
356  Worklist.AddValue(EE);
357  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
358  }
359  }
360  }
361  return nullptr;
362 }
363 
364 /// If V is a shuffle of values that ONLY returns elements from either LHS or
365 /// RHS, return the shuffle mask and true. Otherwise, return false.
366 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
368  assert(LHS->getType() == RHS->getType() &&
369  "Invalid CollectSingleShuffleElements");
370  unsigned NumElts = V->getType()->getVectorNumElements();
371 
372  if (isa<UndefValue>(V)) {
373  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
374  return true;
375  }
376 
377  if (V == LHS) {
378  for (unsigned i = 0; i != NumElts; ++i)
380  return true;
381  }
382 
383  if (V == RHS) {
384  for (unsigned i = 0; i != NumElts; ++i)
386  i+NumElts));
387  return true;
388  }
389 
390  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
391  // If this is an insert of an extract from some other vector, include it.
392  Value *VecOp = IEI->getOperand(0);
393  Value *ScalarOp = IEI->getOperand(1);
394  Value *IdxOp = IEI->getOperand(2);
395 
396  if (!isa<ConstantInt>(IdxOp))
397  return false;
398  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
399 
400  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
401  // We can handle this if the vector we are inserting into is
402  // transitively ok.
403  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
404  // If so, update the mask to reflect the inserted undef.
405  Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
406  return true;
407  }
408  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
409  if (isa<ConstantInt>(EI->getOperand(1))) {
410  unsigned ExtractedIdx =
411  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
412  unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
413 
414  // This must be extracting from either LHS or RHS.
415  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
416  // We can handle this if the vector we are inserting into is
417  // transitively ok.
418  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
419  // If so, update the mask to reflect the inserted value.
420  if (EI->getOperand(0) == LHS) {
421  Mask[InsertedIdx % NumElts] =
423  ExtractedIdx);
424  } else {
425  assert(EI->getOperand(0) == RHS);
426  Mask[InsertedIdx % NumElts] =
428  ExtractedIdx + NumLHSElts);
429  }
430  return true;
431  }
432  }
433  }
434  }
435  }
436 
437  return false;
438 }
439 
440 /// If we have insertion into a vector that is wider than the vector that we
441 /// are extracting from, try to widen the source vector to allow a single
442 /// shufflevector to replace one or more insert/extract pairs.
444  ExtractElementInst *ExtElt,
445  InstCombiner &IC) {
446  VectorType *InsVecType = InsElt->getType();
447  VectorType *ExtVecType = ExtElt->getVectorOperandType();
448  unsigned NumInsElts = InsVecType->getVectorNumElements();
449  unsigned NumExtElts = ExtVecType->getVectorNumElements();
450 
451  // The inserted-to vector must be wider than the extracted-from vector.
452  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
453  NumExtElts >= NumInsElts)
454  return;
455 
456  // Create a shuffle mask to widen the extended-from vector using undefined
457  // values. The mask selects all of the values of the original vector followed
458  // by as many undefined values as needed to create a vector of the same length
459  // as the inserted-to vector.
460  SmallVector<Constant *, 16> ExtendMask;
461  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
462  for (unsigned i = 0; i < NumExtElts; ++i)
463  ExtendMask.push_back(ConstantInt::get(IntType, i));
464  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
465  ExtendMask.push_back(UndefValue::get(IntType));
466 
467  Value *ExtVecOp = ExtElt->getVectorOperand();
468  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
469  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
470  ? ExtVecOpInst->getParent()
471  : ExtElt->getParent();
472 
473  // TODO: This restriction matches the basic block check below when creating
474  // new extractelement instructions. If that limitation is removed, this one
475  // could also be removed. But for now, we just bail out to ensure that we
476  // will replace the extractelement instruction that is feeding our
477  // insertelement instruction. This allows the insertelement to then be
478  // replaced by a shufflevector. If the insertelement is not replaced, we can
479  // induce infinite looping because there's an optimization for extractelement
480  // that will delete our widening shuffle. This would trigger another attempt
481  // here to create that shuffle, and we spin forever.
482  if (InsertionBlock != InsElt->getParent())
483  return;
484 
485  // TODO: This restriction matches the check in visitInsertElementInst() and
486  // prevents an infinite loop caused by not turning the extract/insert pair
487  // into a shuffle. We really should not need either check, but we're lacking
488  // folds for shufflevectors because we're afraid to generate shuffle masks
489  // that the backend can't handle.
490  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
491  return;
492 
493  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
494  ConstantVector::get(ExtendMask));
495 
496  // Insert the new shuffle after the vector operand of the extract is defined
497  // (as long as it's not a PHI) or at the start of the basic block of the
498  // extract, so any subsequent extracts in the same basic block can use it.
499  // TODO: Insert before the earliest ExtractElementInst that is replaced.
500  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
501  WideVec->insertAfter(ExtVecOpInst);
502  else
503  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
504 
505  // Replace extracts from the original narrow vector with extracts from the new
506  // wide vector.
507  for (User *U : ExtVecOp->users()) {
509  if (!OldExt || OldExt->getParent() != WideVec->getParent())
510  continue;
511  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
512  NewExt->insertAfter(OldExt);
513  IC.replaceInstUsesWith(*OldExt, NewExt);
514  }
515 }
516 
517 /// We are building a shuffle to create V, which is a sequence of insertelement,
518 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
519 /// not rely on the second vector source. Return a std::pair containing the
520 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
521 /// parameter as required.
522 ///
523 /// Note: we intentionally don't try to fold earlier shuffles since they have
524 /// often been chosen carefully to be efficiently implementable on the target.
525 using ShuffleOps = std::pair<Value *, Value *>;
526 
529  Value *PermittedRHS,
530  InstCombiner &IC) {
531  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
532  unsigned NumElts = V->getType()->getVectorNumElements();
533 
534  if (isa<UndefValue>(V)) {
535  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
536  return std::make_pair(
537  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
538  }
539 
540  if (isa<ConstantAggregateZero>(V)) {
541  Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
542  return std::make_pair(V, nullptr);
543  }
544 
545  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
546  // If this is an insert of an extract from some other vector, include it.
547  Value *VecOp = IEI->getOperand(0);
548  Value *ScalarOp = IEI->getOperand(1);
549  Value *IdxOp = IEI->getOperand(2);
550 
551  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
552  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
553  unsigned ExtractedIdx =
554  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
555  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
556 
557  // Either the extracted from or inserted into vector must be RHSVec,
558  // otherwise we'd end up with a shuffle of three inputs.
559  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
560  Value *RHS = EI->getOperand(0);
561  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
562  assert(LR.second == nullptr || LR.second == RHS);
563 
564  if (LR.first->getType() != RHS->getType()) {
565  // Although we are giving up for now, see if we can create extracts
566  // that match the inserts for another round of combining.
567  replaceExtractElements(IEI, EI, IC);
568 
569  // We tried our best, but we can't find anything compatible with RHS
570  // further up the chain. Return a trivial shuffle.
571  for (unsigned i = 0; i < NumElts; ++i)
572  Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
573  return std::make_pair(V, nullptr);
574  }
575 
576  unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
577  Mask[InsertedIdx % NumElts] =
579  NumLHSElts+ExtractedIdx);
580  return std::make_pair(LR.first, RHS);
581  }
582 
583  if (VecOp == PermittedRHS) {
584  // We've gone as far as we can: anything on the other side of the
585  // extractelement will already have been converted into a shuffle.
586  unsigned NumLHSElts =
588  for (unsigned i = 0; i != NumElts; ++i)
591  i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
592  return std::make_pair(EI->getOperand(0), PermittedRHS);
593  }
594 
595  // If this insertelement is a chain that comes from exactly these two
596  // vectors, return the vector and the effective shuffle.
597  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
598  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
599  Mask))
600  return std::make_pair(EI->getOperand(0), PermittedRHS);
601  }
602  }
603  }
604 
605  // Otherwise, we can't do anything fancy. Return an identity vector.
606  for (unsigned i = 0; i != NumElts; ++i)
608  return std::make_pair(V, nullptr);
609 }
610 
611 /// Try to find redundant insertvalue instructions, like the following ones:
612 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
613 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
614 /// Here the second instruction inserts values at the same indices, as the
615 /// first one, making the first one redundant.
616 /// It should be transformed to:
617 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
619  bool IsRedundant = false;
620  ArrayRef<unsigned int> FirstIndices = I.getIndices();
621 
622  // If there is a chain of insertvalue instructions (each of them except the
623  // last one has only one use and it's another insertvalue insn from this
624  // chain), check if any of the 'children' uses the same indices as the first
625  // instruction. In this case, the first one is redundant.
626  Value *V = &I;
627  unsigned Depth = 0;
628  while (V->hasOneUse() && Depth < 10) {
629  User *U = V->user_back();
630  auto UserInsInst = dyn_cast<InsertValueInst>(U);
631  if (!UserInsInst || U->getOperand(0) != V)
632  break;
633  if (UserInsInst->getIndices() == FirstIndices) {
634  IsRedundant = true;
635  break;
636  }
637  V = UserInsInst;
638  Depth++;
639  }
640 
641  if (IsRedundant)
642  return replaceInstUsesWith(I, I.getOperand(0));
643  return nullptr;
644 }
645 
647  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
648  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
649 
650  // A vector select does not change the size of the operands.
651  if (MaskSize != VecSize)
652  return false;
653 
654  // Each mask element must be undefined or choose a vector element from one of
655  // the source operands without crossing vector lanes.
656  for (int i = 0; i != MaskSize; ++i) {
657  int Elt = Shuf.getMaskValue(i);
658  if (Elt != -1 && Elt != i && Elt != i + VecSize)
659  return false;
660  }
661 
662  return true;
663 }
664 
665 /// Turn a chain of inserts that splats a value into an insert + shuffle:
666 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
667 /// shufflevector(insertelt(X, %k, 0), undef, zero)
669  // We are interested in the last insert in a chain. So if this insert has a
670  // single user and that user is an insert, bail.
671  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
672  return nullptr;
673 
674  auto *VecTy = cast<VectorType>(InsElt.getType());
675  unsigned NumElements = VecTy->getNumElements();
676 
677  // Do not try to do this for a one-element vector, since that's a nop,
678  // and will cause an inf-loop.
679  if (NumElements == 1)
680  return nullptr;
681 
682  Value *SplatVal = InsElt.getOperand(1);
683  InsertElementInst *CurrIE = &InsElt;
684  SmallVector<bool, 16> ElementPresent(NumElements, false);
685  InsertElementInst *FirstIE = nullptr;
686 
687  // Walk the chain backwards, keeping track of which indices we inserted into,
688  // until we hit something that isn't an insert of the splatted value.
689  while (CurrIE) {
690  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
691  if (!Idx || CurrIE->getOperand(1) != SplatVal)
692  return nullptr;
693 
694  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
695  // Check none of the intermediate steps have any additional uses, except
696  // for the root insertelement instruction, which can be re-used, if it
697  // inserts at position 0.
698  if (CurrIE != &InsElt &&
699  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
700  return nullptr;
701 
702  ElementPresent[Idx->getZExtValue()] = true;
703  FirstIE = CurrIE;
704  CurrIE = NextIE;
705  }
706 
707  // If this is just a single insertelement (not a sequence), we are done.
708  if (FirstIE == &InsElt)
709  return nullptr;
710 
711  // If we are not inserting into an undef vector, make sure we've seen an
712  // insert into every element.
713  // TODO: If the base vector is not undef, it might be better to create a splat
714  // and then a select-shuffle (blend) with the base vector.
715  if (!isa<UndefValue>(FirstIE->getOperand(0)))
716  if (any_of(ElementPresent, [](bool Present) { return !Present; }))
717  return nullptr;
718 
719  // Create the insert + shuffle.
721  UndefValue *UndefVec = UndefValue::get(VecTy);
722  Constant *Zero = ConstantInt::get(Int32Ty, 0);
723  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
724  FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
725 
726  // Splat from element 0, but replace absent elements with undef in the mask.
727  SmallVector<Constant *, 16> Mask(NumElements, Zero);
728  for (unsigned i = 0; i != NumElements; ++i)
729  if (!ElementPresent[i])
730  Mask[i] = UndefValue::get(Int32Ty);
731 
732  return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
733 }
734 
735 /// Try to fold an insert element into an existing splat shuffle by changing
736 /// the shuffle's mask to include the index of this insert element.
738  // Check if the vector operand of this insert is a canonical splat shuffle.
739  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
740  if (!Shuf || !Shuf->isZeroEltSplat())
741  return nullptr;
742 
743  // Check for a constant insertion index.
744  uint64_t IdxC;
745  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
746  return nullptr;
747 
748  // Check if the splat shuffle's input is the same as this insert's scalar op.
749  Value *X = InsElt.getOperand(1);
750  Value *Op0 = Shuf->getOperand(0);
751  if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
752  return nullptr;
753 
754  // Replace the shuffle mask element at the index of this insert with a zero.
755  // For example:
756  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
757  // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
758  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
759  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
760  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
761  Constant *Zero = ConstantInt::getNullValue(I32Ty);
762  for (unsigned i = 0; i != NumMaskElts; ++i)
763  NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
764 
765  Constant *NewMask = ConstantVector::get(NewMaskVec);
766  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
767 }
768 
769 /// If we have an insertelement instruction feeding into another insertelement
770 /// and the 2nd is inserting a constant into the vector, canonicalize that
771 /// constant insertion before the insertion of a variable:
772 ///
773 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
774 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
775 ///
776 /// This has the potential of eliminating the 2nd insertelement instruction
777 /// via constant folding of the scalar constant into a vector constant.
779  InstCombiner::BuilderTy &Builder) {
780  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
781  if (!InsElt1 || !InsElt1->hasOneUse())
782  return nullptr;
783 
784  Value *X, *Y;
785  Constant *ScalarC;
786  ConstantInt *IdxC1, *IdxC2;
787  if (match(InsElt1->getOperand(0), m_Value(X)) &&
788  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
789  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
790  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
791  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
792  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
793  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
794  }
795 
796  return nullptr;
797 }
798 
799 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
800 /// --> shufflevector X, CVec', Mask'
802  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
803  // Bail out if the parent has more than one use. In that case, we'd be
804  // replacing the insertelt with a shuffle, and that's not a clear win.
805  if (!Inst || !Inst->hasOneUse())
806  return nullptr;
807  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
808  // The shuffle must have a constant vector operand. The insertelt must have
809  // a constant scalar being inserted at a constant position in the vector.
810  Constant *ShufConstVec, *InsEltScalar;
811  uint64_t InsEltIndex;
812  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
813  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
814  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
815  return nullptr;
816 
817  // Adding an element to an arbitrary shuffle could be expensive, but a
818  // shuffle that selects elements from vectors without crossing lanes is
819  // assumed cheap.
820  // If we're just adding a constant into that shuffle, it will still be
821  // cheap.
822  if (!isShuffleEquivalentToSelect(*Shuf))
823  return nullptr;
824 
825  // From the above 'select' check, we know that the mask has the same number
826  // of elements as the vector input operands. We also know that each constant
827  // input element is used in its lane and can not be used more than once by
828  // the shuffle. Therefore, replace the constant in the shuffle's constant
829  // vector with the insertelt constant. Replace the constant in the shuffle's
830  // mask vector with the insertelt index plus the length of the vector
831  // (because the constant vector operand of a shuffle is always the 2nd
832  // operand).
833  Constant *Mask = Shuf->getMask();
834  unsigned NumElts = Mask->getType()->getVectorNumElements();
835  SmallVector<Constant *, 16> NewShufElts(NumElts);
836  SmallVector<Constant *, 16> NewMaskElts(NumElts);
837  for (unsigned I = 0; I != NumElts; ++I) {
838  if (I == InsEltIndex) {
839  NewShufElts[I] = InsEltScalar;
840  Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
841  NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
842  } else {
843  // Copy over the existing values.
844  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
845  NewMaskElts[I] = Mask->getAggregateElement(I);
846  }
847  }
848 
849  // Create new operands for a shuffle that includes the constant of the
850  // original insertelt. The old shuffle will be dead now.
851  return new ShuffleVectorInst(Shuf->getOperand(0),
852  ConstantVector::get(NewShufElts),
853  ConstantVector::get(NewMaskElts));
854  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
855  // Transform sequences of insertelements ops with constant data/indexes into
856  // a single shuffle op.
857  unsigned NumElts = InsElt.getType()->getNumElements();
858 
859  uint64_t InsertIdx[2];
860  Constant *Val[2];
861  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
862  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
863  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
864  !match(IEI->getOperand(1), m_Constant(Val[1])))
865  return nullptr;
866  SmallVector<Constant *, 16> Values(NumElts);
868  auto ValI = std::begin(Val);
869  // Generate new constant vector and mask.
870  // We have 2 values/masks from the insertelements instructions. Insert them
871  // into new value/mask vectors.
872  for (uint64_t I : InsertIdx) {
873  if (!Values[I]) {
874  assert(!Mask[I]);
875  Values[I] = *ValI;
876  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
877  NumElts + I);
878  }
879  ++ValI;
880  }
881  // Remaining values are filled with 'undef' values.
882  for (unsigned I = 0; I < NumElts; ++I) {
883  if (!Values[I]) {
884  assert(!Mask[I]);
885  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
886  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
887  }
888  }
889  // Create new operands for a shuffle that includes the constant of the
890  // original insertelt.
891  return new ShuffleVectorInst(IEI->getOperand(0),
892  ConstantVector::get(Values),
893  ConstantVector::get(Mask));
894  }
895  return nullptr;
896 }
897 
899  Value *VecOp = IE.getOperand(0);
900  Value *ScalarOp = IE.getOperand(1);
901  Value *IdxOp = IE.getOperand(2);
902 
903  if (auto *V = SimplifyInsertElementInst(
904  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
905  return replaceInstUsesWith(IE, V);
906 
907  // If the vector and scalar are both bitcast from the same element type, do
908  // the insert in that source type followed by bitcast.
909  Value *VecSrc, *ScalarSrc;
910  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
911  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
912  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
913  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
914  VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
915  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
916  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
917  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
918  return new BitCastInst(NewInsElt, IE.getType());
919  }
920 
921  // If the inserted element was extracted from some other vector and both
922  // indexes are valid constants, try to turn this into a shuffle.
923  uint64_t InsertedIdx, ExtractedIdx;
924  Value *ExtVecOp;
925  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
926  match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
927  m_ConstantInt(ExtractedIdx))) &&
928  ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
929  // TODO: Looking at the user(s) to determine if this insert is a
930  // fold-to-shuffle opportunity does not match the usual instcombine
931  // constraints. We should decide if the transform is worthy based only
932  // on this instruction and its operands, but that may not work currently.
933  //
934  // Here, we are trying to avoid creating shuffles before reaching
935  // the end of a chain of extract-insert pairs. This is complicated because
936  // we do not generally form arbitrary shuffle masks in instcombine
937  // (because those may codegen poorly), but collectShuffleElements() does
938  // exactly that.
939  //
940  // The rules for determining what is an acceptable target-independent
941  // shuffle mask are fuzzy because they evolve based on the backend's
942  // capabilities and real-world impact.
943  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
944  if (!Insert.hasOneUse())
945  return true;
946  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
947  if (!InsertUser)
948  return true;
949  return false;
950  };
951 
952  // Try to form a shuffle from a chain of extract-insert ops.
953  if (isShuffleRootCandidate(IE)) {
955  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
956 
957  // The proposed shuffle may be trivial, in which case we shouldn't
958  // perform the combine.
959  if (LR.first != &IE && LR.second != &IE) {
960  // We now have a shuffle of LHS, RHS, Mask.
961  if (LR.second == nullptr)
962  LR.second = UndefValue::get(LR.first->getType());
963  return new ShuffleVectorInst(LR.first, LR.second,
964  ConstantVector::get(Mask));
965  }
966  }
967  }
968 
969  unsigned VWidth = VecOp->getType()->getVectorNumElements();
970  APInt UndefElts(VWidth, 0);
971  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
972  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
973  if (V != &IE)
974  return replaceInstUsesWith(IE, V);
975  return &IE;
976  }
977 
979  return Shuf;
980 
981  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
982  return NewInsElt;
983 
984  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
985  return Broadcast;
986 
987  if (Instruction *Splat = foldInsEltIntoSplat(IE))
988  return Splat;
989 
990  return nullptr;
991 }
992 
993 /// Return true if we can evaluate the specified expression tree if the vector
994 /// elements were shuffled in a different order.
996  unsigned Depth = 5) {
997  // We can always reorder the elements of a constant.
998  if (isa<Constant>(V))
999  return true;
1000 
1001  // We won't reorder vector arguments. No IPO here.
1003  if (!I) return false;
1004 
1005  // Two users may expect different orders of the elements. Don't try it.
1006  if (!I->hasOneUse())
1007  return false;
1008 
1009  if (Depth == 0) return false;
1010 
1011  switch (I->getOpcode()) {
1012  case Instruction::Add:
1013  case Instruction::FAdd:
1014  case Instruction::Sub:
1015  case Instruction::FSub:
1016  case Instruction::Mul:
1017  case Instruction::FMul:
1018  case Instruction::UDiv:
1019  case Instruction::SDiv:
1020  case Instruction::FDiv:
1021  case Instruction::URem:
1022  case Instruction::SRem:
1023  case Instruction::FRem:
1024  case Instruction::Shl:
1025  case Instruction::LShr:
1026  case Instruction::AShr:
1027  case Instruction::And:
1028  case Instruction::Or:
1029  case Instruction::Xor:
1030  case Instruction::ICmp:
1031  case Instruction::FCmp:
1032  case Instruction::Trunc:
1033  case Instruction::ZExt:
1034  case Instruction::SExt:
1035  case Instruction::FPToUI:
1036  case Instruction::FPToSI:
1037  case Instruction::UIToFP:
1038  case Instruction::SIToFP:
1039  case Instruction::FPTrunc:
1040  case Instruction::FPExt:
1041  case Instruction::GetElementPtr: {
1042  // Bail out if we would create longer vector ops. We could allow creating
1043  // longer vector ops, but that may result in more expensive codegen. We
1044  // would also need to limit the transform to avoid undefined behavior for
1045  // integer div/rem.
1046  Type *ITy = I->getType();
1047  if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1048  return false;
1049  for (Value *Operand : I->operands()) {
1050  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1051  return false;
1052  }
1053  return true;
1054  }
1055  case Instruction::InsertElement: {
1057  if (!CI) return false;
1058  int ElementNumber = CI->getLimitedValue();
1059 
1060  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1061  // can't put an element into multiple indices.
1062  bool SeenOnce = false;
1063  for (int i = 0, e = Mask.size(); i != e; ++i) {
1064  if (Mask[i] == ElementNumber) {
1065  if (SeenOnce)
1066  return false;
1067  SeenOnce = true;
1068  }
1069  }
1070  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1071  }
1072  }
1073  return false;
1074 }
1075 
1076 /// Rebuild a new instruction just like 'I' but with the new operands given.
1077 /// In the event of type mismatch, the type of the operands is correct.
1079  // We don't want to use the IRBuilder here because we want the replacement
1080  // instructions to appear next to 'I', not the builder's insertion point.
1081  switch (I->getOpcode()) {
1082  case Instruction::Add:
1083  case Instruction::FAdd:
1084  case Instruction::Sub:
1085  case Instruction::FSub:
1086  case Instruction::Mul:
1087  case Instruction::FMul:
1088  case Instruction::UDiv:
1089  case Instruction::SDiv:
1090  case Instruction::FDiv:
1091  case Instruction::URem:
1092  case Instruction::SRem:
1093  case Instruction::FRem:
1094  case Instruction::Shl:
1095  case Instruction::LShr:
1096  case Instruction::AShr:
1097  case Instruction::And:
1098  case Instruction::Or:
1099  case Instruction::Xor: {
1100  BinaryOperator *BO = cast<BinaryOperator>(I);
1101  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1102  BinaryOperator *New =
1103  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1104  NewOps[0], NewOps[1], "", BO);
1105  if (isa<OverflowingBinaryOperator>(BO)) {
1106  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1107  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1108  }
1109  if (isa<PossiblyExactOperator>(BO)) {
1110  New->setIsExact(BO->isExact());
1111  }
1112  if (isa<FPMathOperator>(BO))
1113  New->copyFastMathFlags(I);
1114  return New;
1115  }
1116  case Instruction::ICmp:
1117  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1118  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1119  NewOps[0], NewOps[1]);
1120  case Instruction::FCmp:
1121  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1122  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1123  NewOps[0], NewOps[1]);
1124  case Instruction::Trunc:
1125  case Instruction::ZExt:
1126  case Instruction::SExt:
1127  case Instruction::FPToUI:
1128  case Instruction::FPToSI:
1129  case Instruction::UIToFP:
1130  case Instruction::SIToFP:
1131  case Instruction::FPTrunc:
1132  case Instruction::FPExt: {
1133  // It's possible that the mask has a different number of elements from
1134  // the original cast. We recompute the destination type to match the mask.
1135  Type *DestTy =
1137  NewOps[0]->getType()->getVectorNumElements());
1138  assert(NewOps.size() == 1 && "cast with #ops != 1");
1139  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1140  "", I);
1141  }
1142  case Instruction::GetElementPtr: {
1143  Value *Ptr = NewOps[0];
1144  ArrayRef<Value*> Idx = NewOps.slice(1);
1146  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1147  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1148  return GEP;
1149  }
1150  }
1151  llvm_unreachable("failed to rebuild vector instructions");
1152 }
1153 
1155  // Mask.size() does not need to be equal to the number of vector elements.
1156 
1157  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1158  Type *EltTy = V->getType()->getScalarType();
1159  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1160  if (isa<UndefValue>(V))
1161  return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1162 
1163  if (isa<ConstantAggregateZero>(V))
1164  return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1165 
1166  if (Constant *C = dyn_cast<Constant>(V)) {
1167  SmallVector<Constant *, 16> MaskValues;
1168  for (int i = 0, e = Mask.size(); i != e; ++i) {
1169  if (Mask[i] == -1)
1170  MaskValues.push_back(UndefValue::get(I32Ty));
1171  else
1172  MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1173  }
1175  ConstantVector::get(MaskValues));
1176  }
1177 
1178  Instruction *I = cast<Instruction>(V);
1179  switch (I->getOpcode()) {
1180  case Instruction::Add:
1181  case Instruction::FAdd:
1182  case Instruction::Sub:
1183  case Instruction::FSub:
1184  case Instruction::Mul:
1185  case Instruction::FMul:
1186  case Instruction::UDiv:
1187  case Instruction::SDiv:
1188  case Instruction::FDiv:
1189  case Instruction::URem:
1190  case Instruction::SRem:
1191  case Instruction::FRem:
1192  case Instruction::Shl:
1193  case Instruction::LShr:
1194  case Instruction::AShr:
1195  case Instruction::And:
1196  case Instruction::Or:
1197  case Instruction::Xor:
1198  case Instruction::ICmp:
1199  case Instruction::FCmp:
1200  case Instruction::Trunc:
1201  case Instruction::ZExt:
1202  case Instruction::SExt:
1203  case Instruction::FPToUI:
1204  case Instruction::FPToSI:
1205  case Instruction::UIToFP:
1206  case Instruction::SIToFP:
1207  case Instruction::FPTrunc:
1208  case Instruction::FPExt:
1209  case Instruction::Select:
1210  case Instruction::GetElementPtr: {
1211  SmallVector<Value*, 8> NewOps;
1212  bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1213  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1214  Value *V;
1215  // Recursively call evaluateInDifferentElementOrder on vector arguments
1216  // as well. E.g. GetElementPtr may have scalar operands even if the
1217  // return value is a vector, so we need to examine the operand type.
1218  if (I->getOperand(i)->getType()->isVectorTy())
1219  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1220  else
1221  V = I->getOperand(i);
1222  NewOps.push_back(V);
1223  NeedsRebuild |= (V != I->getOperand(i));
1224  }
1225  if (NeedsRebuild) {
1226  return buildNew(I, NewOps);
1227  }
1228  return I;
1229  }
1230  case Instruction::InsertElement: {
1231  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1232 
1233  // The insertelement was inserting at Element. Figure out which element
1234  // that becomes after shuffling. The answer is guaranteed to be unique
1235  // by CanEvaluateShuffled.
1236  bool Found = false;
1237  int Index = 0;
1238  for (int e = Mask.size(); Index != e; ++Index) {
1239  if (Mask[Index] == Element) {
1240  Found = true;
1241  break;
1242  }
1243  }
1244 
1245  // If element is not in Mask, no need to handle the operand 1 (element to
1246  // be inserted). Just evaluate values in operand 0 according to Mask.
1247  if (!Found)
1248  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1249 
1250  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1251  return InsertElementInst::Create(V, I->getOperand(1),
1252  ConstantInt::get(I32Ty, Index), "", I);
1253  }
1254  }
1255  llvm_unreachable("failed to reorder elements of vector instruction!");
1256 }
1257 
1259  bool &isLHSID, bool &isRHSID) {
1260  isLHSID = isRHSID = true;
1261 
1262  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1263  if (Mask[i] < 0) continue; // Ignore undef values.
1264  // Is this an identity shuffle of the LHS value?
1265  isLHSID &= (Mask[i] == (int)i);
1266 
1267  // Is this an identity shuffle of the RHS value?
1268  isRHSID &= (Mask[i]-e == i);
1269  }
1270 }
1271 
1272 // Returns true if the shuffle is extracting a contiguous range of values from
1273 // LHS, for example:
1274 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1275 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1276 // Shuffles to: |EE|FF|GG|HH|
1277 // +--+--+--+--+
1280  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1281  unsigned MaskElems = Mask.size();
1282  unsigned BegIdx = Mask.front();
1283  unsigned EndIdx = Mask.back();
1284  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1285  return false;
1286  for (unsigned I = 0; I != MaskElems; ++I)
1287  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1288  return false;
1289  return true;
1290 }
1291 
1292 /// These are the ingredients in an alternate form binary operator as described
1293 /// below.
1294 struct BinopElts {
1299  Value *V0 = nullptr, Value *V1 = nullptr) :
1300  Opcode(Opc), Op0(V0), Op1(V1) {}
1301  operator bool() const { return Opcode != 0; }
1302 };
1303 
1304 /// Binops may be transformed into binops with different opcodes and operands.
1305 /// Reverse the usual canonicalization to enable folds with the non-canonical
1306 /// form of the binop. If a transform is possible, return the elements of the
1307 /// new binop. If not, return invalid elements.
1309  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1310  Type *Ty = BO->getType();
1311  switch (BO->getOpcode()) {
1312  case Instruction::Shl: {
1313  // shl X, C --> mul X, (1 << C)
1314  Constant *C;
1315  if (match(BO1, m_Constant(C))) {
1316  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1317  return { Instruction::Mul, BO0, ShlOne };
1318  }
1319  break;
1320  }
1321  case Instruction::Or: {
1322  // or X, C --> add X, C (when X and C have no common bits set)
1323  const APInt *C;
1324  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1325  return { Instruction::Add, BO0, BO1 };
1326  break;
1327  }
1328  default:
1329  break;
1330  }
1331  return {};
1332 }
1333 
1335  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1336 
1337  // Are we shuffling together some value and that same value after it has been
1338  // modified by a binop with a constant?
1339  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1340  Constant *C;
1341  bool Op0IsBinop;
1342  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1343  Op0IsBinop = true;
1344  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1345  Op0IsBinop = false;
1346  else
1347  return nullptr;
1348 
1349  // The identity constant for a binop leaves a variable operand unchanged. For
1350  // a vector, this is a splat of something like 0, -1, or 1.
1351  // If there's no identity constant for this binop, we're done.
1352  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1353  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1354  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1355  if (!IdC)
1356  return nullptr;
1357 
1358  // Shuffle identity constants into the lanes that return the original value.
1359  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1360  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1361  // The existing binop constant vector remains in the same operand position.
1362  Constant *Mask = Shuf.getMask();
1363  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1364  ConstantExpr::getShuffleVector(IdC, C, Mask);
1365 
1366  bool MightCreatePoisonOrUB =
1367  Mask->containsUndefElement() &&
1368  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1369  if (MightCreatePoisonOrUB)
1370  NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1371 
1372  // shuf (bop X, C), X, M --> bop X, C'
1373  // shuf X, (bop X, C), M --> bop X, C'
1374  Value *X = Op0IsBinop ? Op1 : Op0;
1375  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1376  NewBO->copyIRFlags(BO);
1377 
1378  // An undef shuffle mask element may propagate as an undef constant element in
1379  // the new binop. That would produce poison where the original code might not.
1380  // If we already made a safe constant, then there's no danger.
1381  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1382  NewBO->dropPoisonGeneratingFlags();
1383  return NewBO;
1384 }
1385 
1386 /// If we have an insert of a scalar to a non-zero element of an undefined
1387 /// vector and then shuffle that value, that's the same as inserting to the zero
1388 /// element and shuffling. Splatting from the zero element is recognized as the
1389 /// canonical form of splat.
1391  InstCombiner::BuilderTy &Builder) {
1392  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1393  Constant *Mask = Shuf.getMask();
1394  Value *X;
1395  uint64_t IndexC;
1396 
1397  // Match a shuffle that is a splat to a non-zero element.
1398  if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1399  m_ConstantInt(IndexC)))) ||
1400  !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
1401  return nullptr;
1402 
1403  // Insert into element 0 of an undef vector.
1404  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1405  Constant *Zero = Builder.getInt32(0);
1406  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1407 
1408  // Splat from element 0. Any mask element that is undefined remains undefined.
1409  // For example:
1410  // shuf (inselt undef, X, 2), undef, <2,2,undef>
1411  // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1412  unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1413  SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1414  for (unsigned i = 0; i != NumMaskElts; ++i)
1415  if (isa<UndefValue>(Mask->getAggregateElement(i)))
1416  NewMask[i] = Mask->getAggregateElement(i);
1417 
1418  return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1419 }
1420 
1421 /// Try to fold shuffles that are the equivalent of a vector select.
1423  InstCombiner::BuilderTy &Builder,
1424  const DataLayout &DL) {
1425  if (!Shuf.isSelect())
1426  return nullptr;
1427 
1428  // Canonicalize to choose from operand 0 first.
1429  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1430  if (Shuf.getMaskValue(0) >= (int)NumElts) {
1431  // TODO: Can we assert that both operands of a shuffle-select are not undef
1432  // (otherwise, it would have been folded by instsimplify?
1433  Shuf.commute();
1434  return &Shuf;
1435  }
1436 
1438  return I;
1439 
1440  BinaryOperator *B0, *B1;
1441  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1442  !match(Shuf.getOperand(1), m_BinOp(B1)))
1443  return nullptr;
1444 
1445  Value *X, *Y;
1446  Constant *C0, *C1;
1447  bool ConstantsAreOp1;
1448  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1449  match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1450  ConstantsAreOp1 = true;
1451  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1452  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1453  ConstantsAreOp1 = false;
1454  else
1455  return nullptr;
1456 
1457  // We need matching binops to fold the lanes together.
1458  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1459  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1460  bool DropNSW = false;
1461  if (ConstantsAreOp1 && Opc0 != Opc1) {
1462  // TODO: We drop "nsw" if shift is converted into multiply because it may
1463  // not be correct when the shift amount is BitWidth - 1. We could examine
1464  // each vector element to determine if it is safe to keep that flag.
1465  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1466  DropNSW = true;
1467  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1468  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1469  Opc0 = AltB0.Opcode;
1470  C0 = cast<Constant>(AltB0.Op1);
1471  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1472  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1473  Opc1 = AltB1.Opcode;
1474  C1 = cast<Constant>(AltB1.Op1);
1475  }
1476  }
1477 
1478  if (Opc0 != Opc1)
1479  return nullptr;
1480 
1481  // The opcodes must be the same. Use a new name to make that clear.
1482  BinaryOperator::BinaryOps BOpc = Opc0;
1483 
1484  // Select the constant elements needed for the single binop.
1485  Constant *Mask = Shuf.getMask();
1486  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1487 
1488  // We are moving a binop after a shuffle. When a shuffle has an undefined
1489  // mask element, the result is undefined, but it is not poison or undefined
1490  // behavior. That is not necessarily true for div/rem/shift.
1491  bool MightCreatePoisonOrUB =
1492  Mask->containsUndefElement() &&
1494  if (MightCreatePoisonOrUB)
1495  NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1496 
1497  Value *V;
1498  if (X == Y) {
1499  // Remove a binop and the shuffle by rearranging the constant:
1500  // shuffle (op V, C0), (op V, C1), M --> op V, C'
1501  // shuffle (op C0, V), (op C1, V), M --> op C', V
1502  V = X;
1503  } else {
1504  // If there are 2 different variable operands, we must create a new shuffle
1505  // (select) first, so check uses to ensure that we don't end up with more
1506  // instructions than we started with.
1507  if (!B0->hasOneUse() && !B1->hasOneUse())
1508  return nullptr;
1509 
1510  // If we use the original shuffle mask and op1 is *variable*, we would be
1511  // putting an undef into operand 1 of div/rem/shift. This is either UB or
1512  // poison. We do not have to guard against UB when *constants* are op1
1513  // because safe constants guarantee that we do not overflow sdiv/srem (and
1514  // there's no danger for other opcodes).
1515  // TODO: To allow this case, create a new shuffle mask with no undefs.
1516  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1517  return nullptr;
1518 
1519  // Note: In general, we do not create new shuffles in InstCombine because we
1520  // do not know if a target can lower an arbitrary shuffle optimally. In this
1521  // case, the shuffle uses the existing mask, so there is no additional risk.
1522 
1523  // Select the variable vectors first, then perform the binop:
1524  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1525  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1526  V = Builder.CreateShuffleVector(X, Y, Mask);
1527  }
1528 
1529  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1530  BinaryOperator::Create(BOpc, NewC, V);
1531 
1532  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1533  // 1. If we changed an opcode, poison conditions might have changed.
1534  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1535  // where the original code did not. But if we already made a safe constant,
1536  // then there's no danger.
1537  NewBO->copyIRFlags(B0);
1538  NewBO->andIRFlags(B1);
1539  if (DropNSW)
1540  NewBO->setHasNoSignedWrap(false);
1541  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1542  NewBO->dropPoisonGeneratingFlags();
1543  return NewBO;
1544 }
1545 
1546 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1547 /// narrowing (concatenating with undef and extracting back to the original
1548 /// length). This allows replacing the wide select with a narrow select.
1550  InstCombiner::BuilderTy &Builder) {
1551  // This must be a narrowing identity shuffle. It extracts the 1st N elements
1552  // of the 1st vector operand of a shuffle.
1553  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1554  return nullptr;
1555 
1556  // The vector being shuffled must be a vector select that we can eliminate.
1557  // TODO: The one-use requirement could be eased if X and/or Y are constants.
1558  Value *Cond, *X, *Y;
1559  if (!match(Shuf.getOperand(0),
1560  m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1561  return nullptr;
1562 
1563  // We need a narrow condition value. It must be extended with undef elements
1564  // and have the same number of elements as this shuffle.
1565  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1566  Value *NarrowCond;
1567  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1568  m_Constant()))) ||
1569  NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1570  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1571  return nullptr;
1572 
1573  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1574  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1576  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1577  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1578  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1579 }
1580 
1581 /// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1583  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1584  if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1585  return nullptr;
1586 
1587  Value *X, *Y;
1588  Constant *Mask;
1589  if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1590  return nullptr;
1591 
1592  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1593  // then combining may result in worse codegen.
1594  if (!Op0->hasOneUse())
1595  return nullptr;
1596 
1597  // We are extracting a subvector from a shuffle. Remove excess elements from
1598  // the 1st shuffle mask to eliminate the extract.
1599  //
1600  // This transform is conservatively limited to identity extracts because we do
1601  // not allow arbitrary shuffle mask creation as a target-independent transform
1602  // (because we can't guarantee that will lower efficiently).
1603  //
1604  // If the extracting shuffle has an undef mask element, it transfers to the
1605  // new shuffle mask. Otherwise, copy the original mask element. Example:
1606  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1607  // shuf X, Y, <C0, undef, C2, undef>
1608  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1609  SmallVector<Constant *, 16> NewMask(NumElts);
1610  assert(NumElts < Mask->getType()->getVectorNumElements() &&
1611  "Identity with extract must have less elements than its inputs");
1612 
1613  for (unsigned i = 0; i != NumElts; ++i) {
1614  Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1615  Constant *MaskElt = Mask->getAggregateElement(i);
1616  NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1617  }
1618  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1619 }
1620 
1621 /// Try to replace a shuffle with an insertelement.
1623  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1625 
1626  // The shuffle must not change vector sizes.
1627  // TODO: This restriction could be removed if the insert has only one use
1628  // (because the transform would require a new length-changing shuffle).
1629  int NumElts = Mask.size();
1630  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1631  return nullptr;
1632 
1633  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1634  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1635  // We need an insertelement with a constant index.
1636  if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1637  m_ConstantInt(IndexC))))
1638  return false;
1639 
1640  // Test the shuffle mask to see if it splices the inserted scalar into the
1641  // operand 1 vector of the shuffle.
1642  int NewInsIndex = -1;
1643  for (int i = 0; i != NumElts; ++i) {
1644  // Ignore undef mask elements.
1645  if (Mask[i] == -1)
1646  continue;
1647 
1648  // The shuffle takes elements of operand 1 without lane changes.
1649  if (Mask[i] == NumElts + i)
1650  continue;
1651 
1652  // The shuffle must choose the inserted scalar exactly once.
1653  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1654  return false;
1655 
1656  // The shuffle is placing the inserted scalar into element i.
1657  NewInsIndex = i;
1658  }
1659 
1660  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1661 
1662  // Index is updated to the potentially translated insertion lane.
1663  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1664  return true;
1665  };
1666 
1667  // If the shuffle is unnecessary, insert the scalar operand directly into
1668  // operand 1 of the shuffle. Example:
1669  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1670  Value *Scalar;
1671  ConstantInt *IndexC;
1672  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1673  return InsertElementInst::Create(V1, Scalar, IndexC);
1674 
1675  // Try again after commuting shuffle. Example:
1676  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1677  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1678  std::swap(V0, V1);
1680  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1681  return InsertElementInst::Create(V1, Scalar, IndexC);
1682 
1683  return nullptr;
1684 }
1685 
1687  // Match the operands as identity with padding (also known as concatenation
1688  // with undef) shuffles of the same source type. The backend is expected to
1689  // recreate these concatenations from a shuffle of narrow operands.
1690  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1691  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1692  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1693  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1694  return nullptr;
1695 
1696  // We limit this transform to power-of-2 types because we expect that the
1697  // backend can convert the simplified IR patterns to identical nodes as the
1698  // original IR.
1699  // TODO: If we can verify the same behavior for arbitrary types, the
1700  // power-of-2 checks can be removed.
1701  Value *X = Shuffle0->getOperand(0);
1702  Value *Y = Shuffle1->getOperand(0);
1703  if (X->getType() != Y->getType() ||
1705  !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1707  isa<UndefValue>(X) || isa<UndefValue>(Y))
1708  return nullptr;
1709  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1710  isa<UndefValue>(Shuffle1->getOperand(1)) &&
1711  "Unexpected operand for identity shuffle");
1712 
1713  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1714  // operands directly by adjusting the shuffle mask to account for the narrower
1715  // types:
1716  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1717  int NarrowElts = X->getType()->getVectorNumElements();
1718  int WideElts = Shuffle0->getType()->getVectorNumElements();
1719  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1720 
1721  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1723  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1724  for (int i = 0, e = Mask.size(); i != e; ++i) {
1725  if (Mask[i] == -1)
1726  continue;
1727 
1728  // If this shuffle is choosing an undef element from 1 of the sources, that
1729  // element is undef.
1730  if (Mask[i] < WideElts) {
1731  if (Shuffle0->getMaskValue(Mask[i]) == -1)
1732  continue;
1733  } else {
1734  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1735  continue;
1736  }
1737 
1738  // If this shuffle is choosing from the 1st narrow op, the mask element is
1739  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1740  // element is offset down to adjust for the narrow vector widths.
1741  if (Mask[i] < WideElts) {
1742  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
1743  NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1744  } else {
1745  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
1746  NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1747  }
1748  }
1749  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1750 }
1751 
1753  Value *LHS = SVI.getOperand(0);
1754  Value *RHS = SVI.getOperand(1);
1755  if (auto *V = SimplifyShuffleVectorInst(
1756  LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1757  return replaceInstUsesWith(SVI, V);
1758 
1759  // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
1760  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1761  unsigned VWidth = SVI.getType()->getVectorNumElements();
1762  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1765  if (LHS == RHS || isa<UndefValue>(LHS)) {
1766  // Remap any references to RHS to use LHS.
1768  for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
1769  if (Mask[i] < 0) {
1770  Elts.push_back(UndefValue::get(Int32Ty));
1771  continue;
1772  }
1773 
1774  if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
1775  (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
1776  Mask[i] = -1; // Turn into undef.
1777  Elts.push_back(UndefValue::get(Int32Ty));
1778  } else {
1779  Mask[i] = Mask[i] % e; // Force to LHS.
1780  Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1781  }
1782  }
1783  SVI.setOperand(0, SVI.getOperand(1));
1784  SVI.setOperand(1, UndefValue::get(RHS->getType()));
1785  SVI.setOperand(2, ConstantVector::get(Elts));
1786  return &SVI;
1787  }
1788 
1789  if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1790  return I;
1791 
1792  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1793  return I;
1794 
1795  if (Instruction *I = narrowVectorSelect(SVI, Builder))
1796  return I;
1797 
1798  APInt UndefElts(VWidth, 0);
1799  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1800  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1801  if (V != &SVI)
1802  return replaceInstUsesWith(SVI, V);
1803  return &SVI;
1804  }
1805 
1807  return I;
1808 
1809  // These transforms have the potential to lose undef knowledge, so they are
1810  // intentionally placed after SimplifyDemandedVectorElts().
1811  if (Instruction *I = foldShuffleWithInsert(SVI))
1812  return I;
1814  return I;
1815 
1816  if (VWidth == LHSWidth) {
1817  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1818  bool isLHSID, isRHSID;
1819  recognizeIdentityMask(Mask, isLHSID, isRHSID);
1820 
1821  // Eliminate identity shuffles.
1822  if (isLHSID) return replaceInstUsesWith(SVI, LHS);
1823  if (isRHSID) return replaceInstUsesWith(SVI, RHS);
1824  }
1825 
1826  if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
1827  Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1828  return replaceInstUsesWith(SVI, V);
1829  }
1830 
1831  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1832  // a non-vector type. We can instead bitcast the original vector followed by
1833  // an extract of the desired element:
1834  //
1835  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1836  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1837  // %1 = bitcast <4 x i8> %sroa to i32
1838  // Becomes:
1839  // %bc = bitcast <16 x i8> %in to <4 x i32>
1840  // %ext = extractelement <4 x i32> %bc, i32 0
1841  //
1842  // If the shuffle is extracting a contiguous range of values from the input
1843  // vector then each use which is a bitcast of the extracted size can be
1844  // replaced. This will work if the vector types are compatible, and the begin
1845  // index is aligned to a value in the casted vector type. If the begin index
1846  // isn't aligned then we can shuffle the original vector (keeping the same
1847  // vector type) before extracting.
1848  //
1849  // This code will bail out if the target type is fundamentally incompatible
1850  // with vectors of the source type.
1851  //
1852  // Example of <16 x i8>, target type i32:
1853  // Index range [4,8): v-----------v Will work.
1854  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1855  // <16 x i8>: | | | | | | | | | | | | | | | | |
1856  // <4 x i32>: | | | | |
1857  // +-----------+-----------+-----------+-----------+
1858  // Index range [6,10): ^-----------^ Needs an extra shuffle.
1859  // Target type i40: ^--------------^ Won't work, bail.
1860  bool MadeChange = false;
1861  if (isShuffleExtractingFromLHS(SVI, Mask)) {
1862  Value *V = LHS;
1863  unsigned MaskElems = Mask.size();
1864  VectorType *SrcTy = cast<VectorType>(V->getType());
1865  unsigned VecBitWidth = SrcTy->getBitWidth();
1866  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1867  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
1868  unsigned SrcNumElems = SrcTy->getNumElements();
1871  for (User *U : SVI.users())
1872  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
1873  if (!BC->use_empty())
1874  // Only visit bitcasts that weren't previously handled.
1875  BCs.push_back(BC);
1876  for (BitCastInst *BC : BCs) {
1877  unsigned BegIdx = Mask.front();
1878  Type *TgtTy = BC->getDestTy();
1879  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
1880  if (!TgtElemBitWidth)
1881  continue;
1882  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
1883  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
1884  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
1885  if (!VecBitWidthsEqual)
1886  continue;
1887  if (!VectorType::isValidElementType(TgtTy))
1888  continue;
1889  VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
1890  if (!BegIsAligned) {
1891  // Shuffle the input so [0,NumElements) contains the output, and
1892  // [NumElems,SrcNumElems) is undef.
1893  SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
1894  UndefValue::get(Int32Ty));
1895  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
1896  ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
1897  V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
1898  ConstantVector::get(ShuffleMask),
1899  SVI.getName() + ".extract");
1900  BegIdx = 0;
1901  }
1902  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
1903  assert(SrcElemsPerTgtElem);
1904  BegIdx /= SrcElemsPerTgtElem;
1905  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
1906  auto *NewBC =
1907  BCAlreadyExists
1908  ? NewBCs[CastSrcTy]
1909  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
1910  if (!BCAlreadyExists)
1911  NewBCs[CastSrcTy] = NewBC;
1912  auto *Ext = Builder.CreateExtractElement(
1913  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
1914  // The shufflevector isn't being replaced: the bitcast that used it
1915  // is. InstCombine will visit the newly-created instructions.
1916  replaceInstUsesWith(*BC, Ext);
1917  MadeChange = true;
1918  }
1919  }
1920 
1921  // If the LHS is a shufflevector itself, see if we can combine it with this
1922  // one without producing an unusual shuffle.
1923  // Cases that might be simplified:
1924  // 1.
1925  // x1=shuffle(v1,v2,mask1)
1926  // x=shuffle(x1,undef,mask)
1927  // ==>
1928  // x=shuffle(v1,undef,newMask)
1929  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
1930  // 2.
1931  // x1=shuffle(v1,undef,mask1)
1932  // x=shuffle(x1,x2,mask)
1933  // where v1.size() == mask1.size()
1934  // ==>
1935  // x=shuffle(v1,x2,newMask)
1936  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
1937  // 3.
1938  // x2=shuffle(v2,undef,mask2)
1939  // x=shuffle(x1,x2,mask)
1940  // where v2.size() == mask2.size()
1941  // ==>
1942  // x=shuffle(x1,v2,newMask)
1943  // newMask[i] = (mask[i] < x1.size())
1944  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
1945  // 4.
1946  // x1=shuffle(v1,undef,mask1)
1947  // x2=shuffle(v2,undef,mask2)
1948  // x=shuffle(x1,x2,mask)
1949  // where v1.size() == v2.size()
1950  // ==>
1951  // x=shuffle(v1,v2,newMask)
1952  // newMask[i] = (mask[i] < x1.size())
1953  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
1954  //
1955  // Here we are really conservative:
1956  // we are absolutely afraid of producing a shuffle mask not in the input
1957  // program, because the code gen may not be smart enough to turn a merged
1958  // shuffle into two specific shuffles: it may produce worse code. As such,
1959  // we only merge two shuffles if the result is either a splat or one of the
1960  // input shuffle masks. In this case, merging the shuffles just removes
1961  // one instruction, which we know is safe. This is good for things like
1962  // turning: (splat(splat)) -> splat, or
1963  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
1964  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
1965  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
1966  if (LHSShuffle)
1967  if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
1968  LHSShuffle = nullptr;
1969  if (RHSShuffle)
1970  if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
1971  RHSShuffle = nullptr;
1972  if (!LHSShuffle && !RHSShuffle)
1973  return MadeChange ? &SVI : nullptr;
1974 
1975  Value* LHSOp0 = nullptr;
1976  Value* LHSOp1 = nullptr;
1977  Value* RHSOp0 = nullptr;
1978  unsigned LHSOp0Width = 0;
1979  unsigned RHSOp0Width = 0;
1980  if (LHSShuffle) {
1981  LHSOp0 = LHSShuffle->getOperand(0);
1982  LHSOp1 = LHSShuffle->getOperand(1);
1983  LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
1984  }
1985  if (RHSShuffle) {
1986  RHSOp0 = RHSShuffle->getOperand(0);
1987  RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
1988  }
1989  Value* newLHS = LHS;
1990  Value* newRHS = RHS;
1991  if (LHSShuffle) {
1992  // case 1
1993  if (isa<UndefValue>(RHS)) {
1994  newLHS = LHSOp0;
1995  newRHS = LHSOp1;
1996  }
1997  // case 2 or 4
1998  else if (LHSOp0Width == LHSWidth) {
1999  newLHS = LHSOp0;
2000  }
2001  }
2002  // case 3 or 4
2003  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2004  newRHS = RHSOp0;
2005  }
2006  // case 4
2007  if (LHSOp0 == RHSOp0) {
2008  newLHS = LHSOp0;
2009  newRHS = nullptr;
2010  }
2011 
2012  if (newLHS == LHS && newRHS == RHS)
2013  return MadeChange ? &SVI : nullptr;
2014 
2015  SmallVector<int, 16> LHSMask;
2016  SmallVector<int, 16> RHSMask;
2017  if (newLHS != LHS)
2018  LHSMask = LHSShuffle->getShuffleMask();
2019  if (RHSShuffle && newRHS != RHS)
2020  RHSMask = RHSShuffle->getShuffleMask();
2021 
2022  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2023  SmallVector<int, 16> newMask;
2024  bool isSplat = true;
2025  int SplatElt = -1;
2026  // Create a new mask for the new ShuffleVectorInst so that the new
2027  // ShuffleVectorInst is equivalent to the original one.
2028  for (unsigned i = 0; i < VWidth; ++i) {
2029  int eltMask;
2030  if (Mask[i] < 0) {
2031  // This element is an undef value.
2032  eltMask = -1;
2033  } else if (Mask[i] < (int)LHSWidth) {
2034  // This element is from left hand side vector operand.
2035  //
2036  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2037  // new mask value for the element.
2038  if (newLHS != LHS) {
2039  eltMask = LHSMask[Mask[i]];
2040  // If the value selected is an undef value, explicitly specify it
2041  // with a -1 mask value.
2042  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2043  eltMask = -1;
2044  } else
2045  eltMask = Mask[i];
2046  } else {
2047  // This element is from right hand side vector operand
2048  //
2049  // If the value selected is an undef value, explicitly specify it
2050  // with a -1 mask value. (case 1)
2051  if (isa<UndefValue>(RHS))
2052  eltMask = -1;
2053  // If RHS is going to be replaced (case 3 or 4), calculate the
2054  // new mask value for the element.
2055  else if (newRHS != RHS) {
2056  eltMask = RHSMask[Mask[i]-LHSWidth];
2057  // If the value selected is an undef value, explicitly specify it
2058  // with a -1 mask value.
2059  if (eltMask >= (int)RHSOp0Width) {
2060  assert(isa<UndefValue>(RHSShuffle->getOperand(1))
2061  && "should have been check above");
2062  eltMask = -1;
2063  }
2064  } else
2065  eltMask = Mask[i]-LHSWidth;
2066 
2067  // If LHS's width is changed, shift the mask value accordingly.
2068  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2069  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2070  // If newRHS == newLHS, we want to remap any references from newRHS to
2071  // newLHS so that we can properly identify splats that may occur due to
2072  // obfuscation across the two vectors.
2073  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2074  eltMask += newLHSWidth;
2075  }
2076 
2077  // Check if this could still be a splat.
2078  if (eltMask >= 0) {
2079  if (SplatElt >= 0 && SplatElt != eltMask)
2080  isSplat = false;
2081  SplatElt = eltMask;
2082  }
2083 
2084  newMask.push_back(eltMask);
2085  }
2086 
2087  // If the result mask is equal to one of the original shuffle masks,
2088  // or is a splat, do the replacement.
2089  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2091  for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2092  if (newMask[i] < 0) {
2093  Elts.push_back(UndefValue::get(Int32Ty));
2094  } else {
2095  Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2096  }
2097  }
2098  if (!newRHS)
2099  newRHS = UndefValue::get(newLHS->getType());
2100  return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2101  }
2102 
2103  // If the result mask is an identity, replace uses of this instruction with
2104  // corresponding argument.
2105  bool isLHSID, isRHSID;
2106  recognizeIdentityMask(newMask, isLHSID, isRHSID);
2107  if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
2108  if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
2109 
2110  return MadeChange ? &SVI : nullptr;
2111 }
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8...
Type * getVectorElementType() const
Definition: Type.h:371
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
uint64_t CallInst * C
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:86
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex –> shufflevector X...
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle&#39;s mask to includ...
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1350
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:901
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:745
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, SmallVector< int, 16 > &Mask)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:386
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Constant * getMask() const
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
unsigned getBitWidth() const
Return the minimum number of bits in the Vector type.
Definition: DerivedTypes.h:525
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1402
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElement(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1285
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The core instruction combiner logic.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if &#39;V & Mask&#39; is known to be zero.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="")
Definition: InstrTypes.h:249
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
This file implements a class to represent arbitrary precision integral constant values and operations...
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:412
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf)
Try to replace a shuffle with an insertelement.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:623
This instruction compares its operands according to the predicate given to the constructor.
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
This class represents a no-op cast from one type to another.
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:81
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Instruction * visitExtractElementInst(ExtractElementInst &EI)
These are the ingredients in an alternate form binary operator as described below.
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< Constant *> &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
VectorType * getType() const
Overload to return most specific vector type.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Instruction * visitInsertElementInst(InsertElementInst &IE)
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:61
This instruction inserts a single (scalar) element into a VectorType value.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:189
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:576
static Constant * getShuffleVector(Constant *V1, Constant *V2, Constant *Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1184
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2325
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
Class to represent integer types.
Definition: DerivedTypes.h:40
VectorType * getVectorOperandType() const
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1433
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool isExact() const
Determine whether the exact flag is set.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1874
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
void commute()
Swap the first 2 operands and adjust the mask to preserve the semantics of the instruction.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:250
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
BinaryOperator::BinaryOps Opcode
bool containsUndefElement() const
Return true if this is a vector constant that includes any undefined elements.
Definition: Constants.cpp:253
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2306
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:343
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2320
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:640
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:419
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:178
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:549
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:79
#define I(x, y, z)
Definition: MD5.cpp:58
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2308
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder, bool IsBigEndian)
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
This file provides internal interfaces used to implement the InstCombine.
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
static Value * buildNew(Instruction *I, ArrayRef< Value *> NewOps)
Rebuild a new instruction just like &#39;I&#39; but with the new operands given.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1228
Type * getElementType() const
Definition: DerivedTypes.h:394
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:432
bool isIntDivRem() const
Definition: Instruction.h:131
static bool isSplat(ArrayRef< Value *> VL)
static void recognizeIdentityMask(const SmallVectorImpl< int > &Mask, bool &isLHSID, bool &isRHSID)
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< Constant *> &Mask, Value *PermittedRHS, InstCombiner &IC)
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1097
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
IntegerType * Int32Ty
User * user_back()
Definition: Value.h:405
const BasicBlock * getParent() const
Definition: Instruction.h:66
This instruction inserts a struct field of array element value into an aggregate value.