LLVM  9.0.0svn
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Operator.h"
30 #include "llvm/IR/PatternMatch.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/User.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
37 #include <cassert>
38 #include <cstdint>
39 #include <iterator>
40 #include <utility>
41 
42 using namespace llvm;
43 using namespace PatternMatch;
44 
45 #define DEBUG_TYPE "instcombine"
46 
47 /// Return true if the value is cheaper to scalarize than it is to leave as a
48 /// vector operation. IsConstantExtractIndex indicates whether we are extracting
49 /// one known element from a vector constant.
50 ///
51 /// FIXME: It's possible to create more instructions than previously existed.
52 static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53  // If we can pick a scalar constant value out of a vector, that is free.
54  if (auto *C = dyn_cast<Constant>(V))
55  return IsConstantExtractIndex || C->getSplatValue();
56 
57  // An insertelement to the same constant index as our extract will simplify
58  // to the scalar inserted element. An insertelement to a different constant
59  // index is irrelevant to our extract.
61  return IsConstantExtractIndex;
62 
63  if (match(V, m_OneUse(m_Load(m_Value()))))
64  return true;
65 
66  Value *V0, *V1;
67  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69  cheapToScalarize(V1, IsConstantExtractIndex))
70  return true;
71 
72  CmpInst::Predicate UnusedPred;
73  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75  cheapToScalarize(V1, IsConstantExtractIndex))
76  return true;
77 
78  return false;
79 }
80 
81 // If we have a PHI node with a vector type that is only used to feed
82 // itself and be an operand of extractelement at a constant location,
83 // try to replace the PHI of the vector type with a PHI of a scalar type.
84 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
86  // The users we want the PHI to have are:
87  // 1) The EI ExtractElement (we already know this)
88  // 2) Possibly more ExtractElements with the same index.
89  // 3) Another operand, which will feed back into the PHI.
90  Instruction *PHIUser = nullptr;
91  for (auto U : PN->users()) {
92  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93  if (EI.getIndexOperand() == EU->getIndexOperand())
94  Extracts.push_back(EU);
95  else
96  return nullptr;
97  } else if (!PHIUser) {
98  PHIUser = cast<Instruction>(U);
99  } else {
100  return nullptr;
101  }
102  }
103 
104  if (!PHIUser)
105  return nullptr;
106 
107  // Verify that this PHI user has one use, which is the PHI itself,
108  // and that it is a binary operation which is cheap to scalarize.
109  // otherwise return nullptr.
110  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
111  !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
112  return nullptr;
113 
114  // Create a scalar PHI node that will replace the vector PHI node
115  // just before the current PHI node.
116  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118  // Scalarize each PHI operand.
119  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
120  Value *PHIInVal = PN->getIncomingValue(i);
121  BasicBlock *inBB = PN->getIncomingBlock(i);
122  Value *Elt = EI.getIndexOperand();
123  // If the operand is the PHI induction variable:
124  if (PHIInVal == PHIUser) {
125  // Scalarize the binary operation. Its first operand is the
126  // scalar PHI, and the second operand is extracted from the other
127  // vector operand.
128  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
130  Value *Op = InsertNewInstWith(
131  ExtractElementInst::Create(B0->getOperand(opId), Elt,
132  B0->getOperand(opId)->getName() + ".Elt"),
133  *B0);
134  Value *newPHIUser = InsertNewInstWith(
136  scalarPHI, Op, B0), *B0);
137  scalarPHI->addIncoming(newPHIUser, inBB);
138  } else {
139  // Scalarize PHI input:
140  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141  // Insert the new instruction into the predecessor basic block.
142  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143  BasicBlock::iterator InsertPos;
144  if (pos && !isa<PHINode>(pos)) {
145  InsertPos = ++pos->getIterator();
146  } else {
147  InsertPos = inBB->getFirstInsertionPt();
148  }
149 
150  InsertNewInstWith(newEI, *InsertPos);
151 
152  scalarPHI->addIncoming(newEI, inBB);
153  }
154  }
155 
156  for (auto E : Extracts)
157  replaceInstUsesWith(*E, scalarPHI);
158 
159  return &EI;
160 }
161 
163  InstCombiner::BuilderTy &Builder,
164  bool IsBigEndian) {
165  Value *X;
166  uint64_t ExtIndexC;
167  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
168  !X->getType()->isVectorTy() ||
169  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
170  return nullptr;
171 
172  // If this extractelement is using a bitcast from a vector of the same number
173  // of elements, see if we can find the source element from the source vector:
174  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175  Type *SrcTy = X->getType();
176  Type *DestTy = Ext.getType();
177  unsigned NumSrcElts = SrcTy->getVectorNumElements();
178  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179  if (NumSrcElts == NumElts)
180  if (Value *Elt = findScalarElement(X, ExtIndexC))
181  return new BitCastInst(Elt, DestTy);
182 
183  // If the source elements are wider than the destination, try to shift and
184  // truncate a subset of scalar bits of an insert op.
185  if (NumSrcElts < NumElts) {
186  Value *Scalar;
187  uint64_t InsIndexC;
188  if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
189  m_ConstantInt(InsIndexC))))
190  return nullptr;
191 
192  // The extract must be from the subset of vector elements that we inserted
193  // into. Example: if we inserted element 1 of a <2 x i64> and we are
194  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195  // of elements 4-7 of the bitcasted vector.
196  unsigned NarrowingRatio = NumElts / NumSrcElts;
197  if (ExtIndexC / NarrowingRatio != InsIndexC)
198  return nullptr;
199 
200  // We are extracting part of the original scalar. How that scalar is
201  // inserted into the vector depends on the endian-ness. Example:
202  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
203  // +--+--+--+--+--+--+--+--+
204  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
205  // extelt <4 x i16> V', 3: | |S2|S3|
206  // +--+--+--+--+--+--+--+--+
207  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209  // In this example, we must right-shift little-endian. Big-endian is just a
210  // truncate.
211  unsigned Chunk = ExtIndexC % NarrowingRatio;
212  if (IsBigEndian)
213  Chunk = NarrowingRatio - 1 - Chunk;
214 
215  // Bail out if this is an FP vector to FP vector sequence. That would take
216  // more instructions than we started with unless there is no shift, and it
217  // may not be handled as well in the backend.
218  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219  bool NeedDestBitcast = DestTy->isFloatingPointTy();
220  if (NeedSrcBitcast && NeedDestBitcast)
221  return nullptr;
222 
223  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225  unsigned ShAmt = Chunk * DestWidth;
226 
227  // TODO: This limitation is more strict than necessary. We could sum the
228  // number of new instructions and subtract the number eliminated to know if
229  // we can proceed.
230  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
231  if (NeedSrcBitcast || NeedDestBitcast)
232  return nullptr;
233 
234  if (NeedSrcBitcast) {
235  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237  }
238 
239  if (ShAmt) {
240  // Bail out if we could end with more instructions than we started with.
241  if (!Ext.getVectorOperand()->hasOneUse())
242  return nullptr;
243  Scalar = Builder.CreateLShr(Scalar, ShAmt);
244  }
245 
246  if (NeedDestBitcast) {
247  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249  }
250  return new TruncInst(Scalar, DestTy);
251  }
252 
253  return nullptr;
254 }
255 
257  Value *SrcVec = EI.getVectorOperand();
258  Value *Index = EI.getIndexOperand();
259  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
260  SQ.getWithInstruction(&EI)))
261  return replaceInstUsesWith(EI, V);
262 
263  // If extracting a specified index from the vector, see if we can recursively
264  // find a previously computed scalar that was inserted into the vector.
265  auto *IndexC = dyn_cast<ConstantInt>(Index);
266  if (IndexC) {
267  unsigned NumElts = EI.getVectorOperandType()->getNumElements();
268 
269  // InstSimplify should handle cases where the index is invalid.
270  if (!IndexC->getValue().ule(NumElts))
271  return nullptr;
272 
273  // This instruction only demands the single element from the input vector.
274  // If the input vector has a single use, simplify it based on this use
275  // property.
276  if (SrcVec->hasOneUse() && NumElts != 1) {
277  APInt UndefElts(NumElts, 0);
278  APInt DemandedElts(NumElts, 0);
279  DemandedElts.setBit(IndexC->getZExtValue());
280  if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
281  UndefElts)) {
282  EI.setOperand(0, V);
283  return &EI;
284  }
285  }
286 
287  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
288  return I;
289 
290  // If there's a vector PHI feeding a scalar use through this extractelement
291  // instruction, try to scalarize the PHI.
292  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
293  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
294  return ScalarPHI;
295  }
296 
297  BinaryOperator *BO;
298  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
299  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
300  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
301  Value *E0 = Builder.CreateExtractElement(X, Index);
302  Value *E1 = Builder.CreateExtractElement(Y, Index);
303  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
304  }
305 
306  Value *X, *Y;
307  CmpInst::Predicate Pred;
308  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
309  cheapToScalarize(SrcVec, IndexC)) {
310  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
311  Value *E0 = Builder.CreateExtractElement(X, Index);
312  Value *E1 = Builder.CreateExtractElement(Y, Index);
313  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
314  }
315 
316  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
317  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
318  // Extracting the inserted element?
319  if (IE->getOperand(2) == Index)
320  return replaceInstUsesWith(EI, IE->getOperand(1));
321  // If the inserted and extracted elements are constants, they must not
322  // be the same value, extract from the pre-inserted value instead.
323  if (isa<Constant>(IE->getOperand(2)) && IndexC) {
324  Worklist.AddValue(SrcVec);
325  EI.setOperand(0, IE->getOperand(0));
326  return &EI;
327  }
328  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
329  // If this is extracting an element from a shufflevector, figure out where
330  // it came from and extract from the appropriate input element instead.
331  if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
332  int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
333  Value *Src;
334  unsigned LHSWidth =
335  SVI->getOperand(0)->getType()->getVectorNumElements();
336 
337  if (SrcIdx < 0)
338  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
339  if (SrcIdx < (int)LHSWidth)
340  Src = SVI->getOperand(0);
341  else {
342  SrcIdx -= LHSWidth;
343  Src = SVI->getOperand(1);
344  }
346  return ExtractElementInst::Create(Src,
347  ConstantInt::get(Int32Ty,
348  SrcIdx, false));
349  }
350  } else if (auto *CI = dyn_cast<CastInst>(I)) {
351  // Canonicalize extractelement(cast) -> cast(extractelement).
352  // Bitcasts can change the number of vector elements, and they cost
353  // nothing.
354  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
355  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
356  Worklist.AddValue(EE);
357  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
358  }
359  }
360  }
361  return nullptr;
362 }
363 
364 /// If V is a shuffle of values that ONLY returns elements from either LHS or
365 /// RHS, return the shuffle mask and true. Otherwise, return false.
366 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
368  assert(LHS->getType() == RHS->getType() &&
369  "Invalid CollectSingleShuffleElements");
370  unsigned NumElts = V->getType()->getVectorNumElements();
371 
372  if (isa<UndefValue>(V)) {
373  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
374  return true;
375  }
376 
377  if (V == LHS) {
378  for (unsigned i = 0; i != NumElts; ++i)
380  return true;
381  }
382 
383  if (V == RHS) {
384  for (unsigned i = 0; i != NumElts; ++i)
386  i+NumElts));
387  return true;
388  }
389 
390  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
391  // If this is an insert of an extract from some other vector, include it.
392  Value *VecOp = IEI->getOperand(0);
393  Value *ScalarOp = IEI->getOperand(1);
394  Value *IdxOp = IEI->getOperand(2);
395 
396  if (!isa<ConstantInt>(IdxOp))
397  return false;
398  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
399 
400  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
401  // We can handle this if the vector we are inserting into is
402  // transitively ok.
403  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
404  // If so, update the mask to reflect the inserted undef.
405  Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
406  return true;
407  }
408  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
409  if (isa<ConstantInt>(EI->getOperand(1))) {
410  unsigned ExtractedIdx =
411  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
412  unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
413 
414  // This must be extracting from either LHS or RHS.
415  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
416  // We can handle this if the vector we are inserting into is
417  // transitively ok.
418  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
419  // If so, update the mask to reflect the inserted value.
420  if (EI->getOperand(0) == LHS) {
421  Mask[InsertedIdx % NumElts] =
423  ExtractedIdx);
424  } else {
425  assert(EI->getOperand(0) == RHS);
426  Mask[InsertedIdx % NumElts] =
428  ExtractedIdx + NumLHSElts);
429  }
430  return true;
431  }
432  }
433  }
434  }
435  }
436 
437  return false;
438 }
439 
440 /// If we have insertion into a vector that is wider than the vector that we
441 /// are extracting from, try to widen the source vector to allow a single
442 /// shufflevector to replace one or more insert/extract pairs.
444  ExtractElementInst *ExtElt,
445  InstCombiner &IC) {
446  VectorType *InsVecType = InsElt->getType();
447  VectorType *ExtVecType = ExtElt->getVectorOperandType();
448  unsigned NumInsElts = InsVecType->getVectorNumElements();
449  unsigned NumExtElts = ExtVecType->getVectorNumElements();
450 
451  // The inserted-to vector must be wider than the extracted-from vector.
452  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
453  NumExtElts >= NumInsElts)
454  return;
455 
456  // Create a shuffle mask to widen the extended-from vector using undefined
457  // values. The mask selects all of the values of the original vector followed
458  // by as many undefined values as needed to create a vector of the same length
459  // as the inserted-to vector.
460  SmallVector<Constant *, 16> ExtendMask;
461  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
462  for (unsigned i = 0; i < NumExtElts; ++i)
463  ExtendMask.push_back(ConstantInt::get(IntType, i));
464  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
465  ExtendMask.push_back(UndefValue::get(IntType));
466 
467  Value *ExtVecOp = ExtElt->getVectorOperand();
468  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
469  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
470  ? ExtVecOpInst->getParent()
471  : ExtElt->getParent();
472 
473  // TODO: This restriction matches the basic block check below when creating
474  // new extractelement instructions. If that limitation is removed, this one
475  // could also be removed. But for now, we just bail out to ensure that we
476  // will replace the extractelement instruction that is feeding our
477  // insertelement instruction. This allows the insertelement to then be
478  // replaced by a shufflevector. If the insertelement is not replaced, we can
479  // induce infinite looping because there's an optimization for extractelement
480  // that will delete our widening shuffle. This would trigger another attempt
481  // here to create that shuffle, and we spin forever.
482  if (InsertionBlock != InsElt->getParent())
483  return;
484 
485  // TODO: This restriction matches the check in visitInsertElementInst() and
486  // prevents an infinite loop caused by not turning the extract/insert pair
487  // into a shuffle. We really should not need either check, but we're lacking
488  // folds for shufflevectors because we're afraid to generate shuffle masks
489  // that the backend can't handle.
490  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
491  return;
492 
493  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
494  ConstantVector::get(ExtendMask));
495 
496  // Insert the new shuffle after the vector operand of the extract is defined
497  // (as long as it's not a PHI) or at the start of the basic block of the
498  // extract, so any subsequent extracts in the same basic block can use it.
499  // TODO: Insert before the earliest ExtractElementInst that is replaced.
500  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
501  WideVec->insertAfter(ExtVecOpInst);
502  else
503  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
504 
505  // Replace extracts from the original narrow vector with extracts from the new
506  // wide vector.
507  for (User *U : ExtVecOp->users()) {
509  if (!OldExt || OldExt->getParent() != WideVec->getParent())
510  continue;
511  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
512  NewExt->insertAfter(OldExt);
513  IC.replaceInstUsesWith(*OldExt, NewExt);
514  }
515 }
516 
517 /// We are building a shuffle to create V, which is a sequence of insertelement,
518 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
519 /// not rely on the second vector source. Return a std::pair containing the
520 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
521 /// parameter as required.
522 ///
523 /// Note: we intentionally don't try to fold earlier shuffles since they have
524 /// often been chosen carefully to be efficiently implementable on the target.
525 using ShuffleOps = std::pair<Value *, Value *>;
526 
529  Value *PermittedRHS,
530  InstCombiner &IC) {
531  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
532  unsigned NumElts = V->getType()->getVectorNumElements();
533 
534  if (isa<UndefValue>(V)) {
535  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
536  return std::make_pair(
537  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
538  }
539 
540  if (isa<ConstantAggregateZero>(V)) {
541  Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
542  return std::make_pair(V, nullptr);
543  }
544 
545  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
546  // If this is an insert of an extract from some other vector, include it.
547  Value *VecOp = IEI->getOperand(0);
548  Value *ScalarOp = IEI->getOperand(1);
549  Value *IdxOp = IEI->getOperand(2);
550 
551  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
552  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
553  unsigned ExtractedIdx =
554  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
555  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
556 
557  // Either the extracted from or inserted into vector must be RHSVec,
558  // otherwise we'd end up with a shuffle of three inputs.
559  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
560  Value *RHS = EI->getOperand(0);
561  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
562  assert(LR.second == nullptr || LR.second == RHS);
563 
564  if (LR.first->getType() != RHS->getType()) {
565  // Although we are giving up for now, see if we can create extracts
566  // that match the inserts for another round of combining.
567  replaceExtractElements(IEI, EI, IC);
568 
569  // We tried our best, but we can't find anything compatible with RHS
570  // further up the chain. Return a trivial shuffle.
571  for (unsigned i = 0; i < NumElts; ++i)
572  Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
573  return std::make_pair(V, nullptr);
574  }
575 
576  unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
577  Mask[InsertedIdx % NumElts] =
579  NumLHSElts+ExtractedIdx);
580  return std::make_pair(LR.first, RHS);
581  }
582 
583  if (VecOp == PermittedRHS) {
584  // We've gone as far as we can: anything on the other side of the
585  // extractelement will already have been converted into a shuffle.
586  unsigned NumLHSElts =
588  for (unsigned i = 0; i != NumElts; ++i)
591  i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
592  return std::make_pair(EI->getOperand(0), PermittedRHS);
593  }
594 
595  // If this insertelement is a chain that comes from exactly these two
596  // vectors, return the vector and the effective shuffle.
597  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
598  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
599  Mask))
600  return std::make_pair(EI->getOperand(0), PermittedRHS);
601  }
602  }
603  }
604 
605  // Otherwise, we can't do anything fancy. Return an identity vector.
606  for (unsigned i = 0; i != NumElts; ++i)
608  return std::make_pair(V, nullptr);
609 }
610 
611 /// Try to find redundant insertvalue instructions, like the following ones:
612 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
613 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
614 /// Here the second instruction inserts values at the same indices, as the
615 /// first one, making the first one redundant.
616 /// It should be transformed to:
617 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
619  bool IsRedundant = false;
620  ArrayRef<unsigned int> FirstIndices = I.getIndices();
621 
622  // If there is a chain of insertvalue instructions (each of them except the
623  // last one has only one use and it's another insertvalue insn from this
624  // chain), check if any of the 'children' uses the same indices as the first
625  // instruction. In this case, the first one is redundant.
626  Value *V = &I;
627  unsigned Depth = 0;
628  while (V->hasOneUse() && Depth < 10) {
629  User *U = V->user_back();
630  auto UserInsInst = dyn_cast<InsertValueInst>(U);
631  if (!UserInsInst || U->getOperand(0) != V)
632  break;
633  if (UserInsInst->getIndices() == FirstIndices) {
634  IsRedundant = true;
635  break;
636  }
637  V = UserInsInst;
638  Depth++;
639  }
640 
641  if (IsRedundant)
642  return replaceInstUsesWith(I, I.getOperand(0));
643  return nullptr;
644 }
645 
647  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
648  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
649 
650  // A vector select does not change the size of the operands.
651  if (MaskSize != VecSize)
652  return false;
653 
654  // Each mask element must be undefined or choose a vector element from one of
655  // the source operands without crossing vector lanes.
656  for (int i = 0; i != MaskSize; ++i) {
657  int Elt = Shuf.getMaskValue(i);
658  if (Elt != -1 && Elt != i && Elt != i + VecSize)
659  return false;
660  }
661 
662  return true;
663 }
664 
665 // Turn a chain of inserts that splats a value into a canonical insert + shuffle
666 // splat. That is:
667 // insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
668 // shufflevector(insertelt(X, %k, 0), undef, zero)
670  // We are interested in the last insert in a chain. So, if this insert
671  // has a single user, and that user is an insert, bail.
672  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
673  return nullptr;
674 
675  VectorType *VT = cast<VectorType>(InsElt.getType());
676  int NumElements = VT->getNumElements();
677 
678  // Do not try to do this for a one-element vector, since that's a nop,
679  // and will cause an inf-loop.
680  if (NumElements == 1)
681  return nullptr;
682 
683  Value *SplatVal = InsElt.getOperand(1);
684  InsertElementInst *CurrIE = &InsElt;
685  SmallVector<bool, 16> ElementPresent(NumElements, false);
686  InsertElementInst *FirstIE = nullptr;
687 
688  // Walk the chain backwards, keeping track of which indices we inserted into,
689  // until we hit something that isn't an insert of the splatted value.
690  while (CurrIE) {
691  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
692  if (!Idx || CurrIE->getOperand(1) != SplatVal)
693  return nullptr;
694 
695  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
696  // Check none of the intermediate steps have any additional uses, except
697  // for the root insertelement instruction, which can be re-used, if it
698  // inserts at position 0.
699  if (CurrIE != &InsElt &&
700  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
701  return nullptr;
702 
703  ElementPresent[Idx->getZExtValue()] = true;
704  FirstIE = CurrIE;
705  CurrIE = NextIE;
706  }
707 
708  // Make sure we've seen an insert into every element.
709  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
710  return nullptr;
711 
712  // All right, create the insert + shuffle.
713  Instruction *InsertFirst;
714  if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
715  InsertFirst = FirstIE;
716  else
717  InsertFirst = InsertElementInst::Create(
718  UndefValue::get(VT), SplatVal,
720  "", &InsElt);
721 
723  VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
724 
725  return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
726 }
727 
728 /// If we have an insertelement instruction feeding into another insertelement
729 /// and the 2nd is inserting a constant into the vector, canonicalize that
730 /// constant insertion before the insertion of a variable:
731 ///
732 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
733 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
734 ///
735 /// This has the potential of eliminating the 2nd insertelement instruction
736 /// via constant folding of the scalar constant into a vector constant.
738  InstCombiner::BuilderTy &Builder) {
739  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
740  if (!InsElt1 || !InsElt1->hasOneUse())
741  return nullptr;
742 
743  Value *X, *Y;
744  Constant *ScalarC;
745  ConstantInt *IdxC1, *IdxC2;
746  if (match(InsElt1->getOperand(0), m_Value(X)) &&
747  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
748  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
749  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
750  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
751  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
752  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
753  }
754 
755  return nullptr;
756 }
757 
758 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
759 /// --> shufflevector X, CVec', Mask'
761  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
762  // Bail out if the parent has more than one use. In that case, we'd be
763  // replacing the insertelt with a shuffle, and that's not a clear win.
764  if (!Inst || !Inst->hasOneUse())
765  return nullptr;
766  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
767  // The shuffle must have a constant vector operand. The insertelt must have
768  // a constant scalar being inserted at a constant position in the vector.
769  Constant *ShufConstVec, *InsEltScalar;
770  uint64_t InsEltIndex;
771  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
772  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
773  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
774  return nullptr;
775 
776  // Adding an element to an arbitrary shuffle could be expensive, but a
777  // shuffle that selects elements from vectors without crossing lanes is
778  // assumed cheap.
779  // If we're just adding a constant into that shuffle, it will still be
780  // cheap.
781  if (!isShuffleEquivalentToSelect(*Shuf))
782  return nullptr;
783 
784  // From the above 'select' check, we know that the mask has the same number
785  // of elements as the vector input operands. We also know that each constant
786  // input element is used in its lane and can not be used more than once by
787  // the shuffle. Therefore, replace the constant in the shuffle's constant
788  // vector with the insertelt constant. Replace the constant in the shuffle's
789  // mask vector with the insertelt index plus the length of the vector
790  // (because the constant vector operand of a shuffle is always the 2nd
791  // operand).
792  Constant *Mask = Shuf->getMask();
793  unsigned NumElts = Mask->getType()->getVectorNumElements();
794  SmallVector<Constant *, 16> NewShufElts(NumElts);
795  SmallVector<Constant *, 16> NewMaskElts(NumElts);
796  for (unsigned I = 0; I != NumElts; ++I) {
797  if (I == InsEltIndex) {
798  NewShufElts[I] = InsEltScalar;
799  Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
800  NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
801  } else {
802  // Copy over the existing values.
803  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
804  NewMaskElts[I] = Mask->getAggregateElement(I);
805  }
806  }
807 
808  // Create new operands for a shuffle that includes the constant of the
809  // original insertelt. The old shuffle will be dead now.
810  return new ShuffleVectorInst(Shuf->getOperand(0),
811  ConstantVector::get(NewShufElts),
812  ConstantVector::get(NewMaskElts));
813  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
814  // Transform sequences of insertelements ops with constant data/indexes into
815  // a single shuffle op.
816  unsigned NumElts = InsElt.getType()->getNumElements();
817 
818  uint64_t InsertIdx[2];
819  Constant *Val[2];
820  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
821  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
822  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
823  !match(IEI->getOperand(1), m_Constant(Val[1])))
824  return nullptr;
825  SmallVector<Constant *, 16> Values(NumElts);
827  auto ValI = std::begin(Val);
828  // Generate new constant vector and mask.
829  // We have 2 values/masks from the insertelements instructions. Insert them
830  // into new value/mask vectors.
831  for (uint64_t I : InsertIdx) {
832  if (!Values[I]) {
833  assert(!Mask[I]);
834  Values[I] = *ValI;
835  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
836  NumElts + I);
837  }
838  ++ValI;
839  }
840  // Remaining values are filled with 'undef' values.
841  for (unsigned I = 0; I < NumElts; ++I) {
842  if (!Values[I]) {
843  assert(!Mask[I]);
844  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
845  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
846  }
847  }
848  // Create new operands for a shuffle that includes the constant of the
849  // original insertelt.
850  return new ShuffleVectorInst(IEI->getOperand(0),
851  ConstantVector::get(Values),
852  ConstantVector::get(Mask));
853  }
854  return nullptr;
855 }
856 
858  Value *VecOp = IE.getOperand(0);
859  Value *ScalarOp = IE.getOperand(1);
860  Value *IdxOp = IE.getOperand(2);
861 
862  if (auto *V = SimplifyInsertElementInst(
863  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
864  return replaceInstUsesWith(IE, V);
865 
866  // Inserting an undef or into an undefined place, remove this.
867  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
868  replaceInstUsesWith(IE, VecOp);
869 
870  // If the vector and scalar are both bitcast from the same element type, do
871  // the insert in that source type followed by bitcast.
872  Value *VecSrc, *ScalarSrc;
873  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
874  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
875  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
876  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
877  VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
878  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
879  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
880  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
881  return new BitCastInst(NewInsElt, IE.getType());
882  }
883 
884  // If the inserted element was extracted from some other vector and both
885  // indexes are constant, try to turn this into a shuffle.
886  uint64_t InsertedIdx, ExtractedIdx;
887  Value *ExtVecOp;
888  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
889  match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
890  m_ConstantInt(ExtractedIdx)))) {
891  unsigned NumInsertVectorElts = IE.getType()->getNumElements();
892  unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
893  if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
894  return replaceInstUsesWith(IE, VecOp);
895 
896  if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
897  return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
898 
899  // If we are extracting a value from a vector, then inserting it right
900  // back into the same place, just use the input vector.
901  if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
902  return replaceInstUsesWith(IE, VecOp);
903 
904  // TODO: Looking at the user(s) to determine if this insert is a
905  // fold-to-shuffle opportunity does not match the usual instcombine
906  // constraints. We should decide if the transform is worthy based only
907  // on this instruction and its operands, but that may not work currently.
908  //
909  // Here, we are trying to avoid creating shuffles before reaching
910  // the end of a chain of extract-insert pairs. This is complicated because
911  // we do not generally form arbitrary shuffle masks in instcombine
912  // (because those may codegen poorly), but collectShuffleElements() does
913  // exactly that.
914  //
915  // The rules for determining what is an acceptable target-independent
916  // shuffle mask are fuzzy because they evolve based on the backend's
917  // capabilities and real-world impact.
918  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
919  if (!Insert.hasOneUse())
920  return true;
921  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
922  if (!InsertUser)
923  return true;
924  return false;
925  };
926 
927  // Try to form a shuffle from a chain of extract-insert ops.
928  if (isShuffleRootCandidate(IE)) {
930  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
931 
932  // The proposed shuffle may be trivial, in which case we shouldn't
933  // perform the combine.
934  if (LR.first != &IE && LR.second != &IE) {
935  // We now have a shuffle of LHS, RHS, Mask.
936  if (LR.second == nullptr)
937  LR.second = UndefValue::get(LR.first->getType());
938  return new ShuffleVectorInst(LR.first, LR.second,
939  ConstantVector::get(Mask));
940  }
941  }
942  }
943 
944  unsigned VWidth = VecOp->getType()->getVectorNumElements();
945  APInt UndefElts(VWidth, 0);
946  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
947  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
948  if (V != &IE)
949  return replaceInstUsesWith(IE, V);
950  return &IE;
951  }
952 
954  return Shuf;
955 
956  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
957  return NewInsElt;
958 
959  // Turn a sequence of inserts that broadcasts a scalar into a single
960  // insert + shufflevector.
961  if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
962  return Broadcast;
963 
964  return nullptr;
965 }
966 
967 /// Return true if we can evaluate the specified expression tree if the vector
968 /// elements were shuffled in a different order.
970  unsigned Depth = 5) {
971  // We can always reorder the elements of a constant.
972  if (isa<Constant>(V))
973  return true;
974 
975  // We won't reorder vector arguments. No IPO here.
977  if (!I) return false;
978 
979  // Two users may expect different orders of the elements. Don't try it.
980  if (!I->hasOneUse())
981  return false;
982 
983  if (Depth == 0) return false;
984 
985  switch (I->getOpcode()) {
986  case Instruction::Add:
987  case Instruction::FAdd:
988  case Instruction::Sub:
989  case Instruction::FSub:
990  case Instruction::Mul:
991  case Instruction::FMul:
992  case Instruction::UDiv:
993  case Instruction::SDiv:
994  case Instruction::FDiv:
995  case Instruction::URem:
996  case Instruction::SRem:
997  case Instruction::FRem:
998  case Instruction::Shl:
999  case Instruction::LShr:
1000  case Instruction::AShr:
1001  case Instruction::And:
1002  case Instruction::Or:
1003  case Instruction::Xor:
1004  case Instruction::ICmp:
1005  case Instruction::FCmp:
1006  case Instruction::Trunc:
1007  case Instruction::ZExt:
1008  case Instruction::SExt:
1009  case Instruction::FPToUI:
1010  case Instruction::FPToSI:
1011  case Instruction::UIToFP:
1012  case Instruction::SIToFP:
1013  case Instruction::FPTrunc:
1014  case Instruction::FPExt:
1015  case Instruction::GetElementPtr: {
1016  // Bail out if we would create longer vector ops. We could allow creating
1017  // longer vector ops, but that may result in more expensive codegen. We
1018  // would also need to limit the transform to avoid undefined behavior for
1019  // integer div/rem.
1020  Type *ITy = I->getType();
1021  if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1022  return false;
1023  for (Value *Operand : I->operands()) {
1024  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1025  return false;
1026  }
1027  return true;
1028  }
1029  case Instruction::InsertElement: {
1031  if (!CI) return false;
1032  int ElementNumber = CI->getLimitedValue();
1033 
1034  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1035  // can't put an element into multiple indices.
1036  bool SeenOnce = false;
1037  for (int i = 0, e = Mask.size(); i != e; ++i) {
1038  if (Mask[i] == ElementNumber) {
1039  if (SeenOnce)
1040  return false;
1041  SeenOnce = true;
1042  }
1043  }
1044  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1045  }
1046  }
1047  return false;
1048 }
1049 
1050 /// Rebuild a new instruction just like 'I' but with the new operands given.
1051 /// In the event of type mismatch, the type of the operands is correct.
1053  // We don't want to use the IRBuilder here because we want the replacement
1054  // instructions to appear next to 'I', not the builder's insertion point.
1055  switch (I->getOpcode()) {
1056  case Instruction::Add:
1057  case Instruction::FAdd:
1058  case Instruction::Sub:
1059  case Instruction::FSub:
1060  case Instruction::Mul:
1061  case Instruction::FMul:
1062  case Instruction::UDiv:
1063  case Instruction::SDiv:
1064  case Instruction::FDiv:
1065  case Instruction::URem:
1066  case Instruction::SRem:
1067  case Instruction::FRem:
1068  case Instruction::Shl:
1069  case Instruction::LShr:
1070  case Instruction::AShr:
1071  case Instruction::And:
1072  case Instruction::Or:
1073  case Instruction::Xor: {
1074  BinaryOperator *BO = cast<BinaryOperator>(I);
1075  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1076  BinaryOperator *New =
1077  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1078  NewOps[0], NewOps[1], "", BO);
1079  if (isa<OverflowingBinaryOperator>(BO)) {
1080  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1081  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1082  }
1083  if (isa<PossiblyExactOperator>(BO)) {
1084  New->setIsExact(BO->isExact());
1085  }
1086  if (isa<FPMathOperator>(BO))
1087  New->copyFastMathFlags(I);
1088  return New;
1089  }
1090  case Instruction::ICmp:
1091  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1092  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1093  NewOps[0], NewOps[1]);
1094  case Instruction::FCmp:
1095  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1096  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1097  NewOps[0], NewOps[1]);
1098  case Instruction::Trunc:
1099  case Instruction::ZExt:
1100  case Instruction::SExt:
1101  case Instruction::FPToUI:
1102  case Instruction::FPToSI:
1103  case Instruction::UIToFP:
1104  case Instruction::SIToFP:
1105  case Instruction::FPTrunc:
1106  case Instruction::FPExt: {
1107  // It's possible that the mask has a different number of elements from
1108  // the original cast. We recompute the destination type to match the mask.
1109  Type *DestTy =
1111  NewOps[0]->getType()->getVectorNumElements());
1112  assert(NewOps.size() == 1 && "cast with #ops != 1");
1113  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1114  "", I);
1115  }
1116  case Instruction::GetElementPtr: {
1117  Value *Ptr = NewOps[0];
1118  ArrayRef<Value*> Idx = NewOps.slice(1);
1120  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1121  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1122  return GEP;
1123  }
1124  }
1125  llvm_unreachable("failed to rebuild vector instructions");
1126 }
1127 
1129  // Mask.size() does not need to be equal to the number of vector elements.
1130 
1131  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1132  Type *EltTy = V->getType()->getScalarType();
1133  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1134  if (isa<UndefValue>(V))
1135  return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1136 
1137  if (isa<ConstantAggregateZero>(V))
1138  return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1139 
1140  if (Constant *C = dyn_cast<Constant>(V)) {
1141  SmallVector<Constant *, 16> MaskValues;
1142  for (int i = 0, e = Mask.size(); i != e; ++i) {
1143  if (Mask[i] == -1)
1144  MaskValues.push_back(UndefValue::get(I32Ty));
1145  else
1146  MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1147  }
1149  ConstantVector::get(MaskValues));
1150  }
1151 
1152  Instruction *I = cast<Instruction>(V);
1153  switch (I->getOpcode()) {
1154  case Instruction::Add:
1155  case Instruction::FAdd:
1156  case Instruction::Sub:
1157  case Instruction::FSub:
1158  case Instruction::Mul:
1159  case Instruction::FMul:
1160  case Instruction::UDiv:
1161  case Instruction::SDiv:
1162  case Instruction::FDiv:
1163  case Instruction::URem:
1164  case Instruction::SRem:
1165  case Instruction::FRem:
1166  case Instruction::Shl:
1167  case Instruction::LShr:
1168  case Instruction::AShr:
1169  case Instruction::And:
1170  case Instruction::Or:
1171  case Instruction::Xor:
1172  case Instruction::ICmp:
1173  case Instruction::FCmp:
1174  case Instruction::Trunc:
1175  case Instruction::ZExt:
1176  case Instruction::SExt:
1177  case Instruction::FPToUI:
1178  case Instruction::FPToSI:
1179  case Instruction::UIToFP:
1180  case Instruction::SIToFP:
1181  case Instruction::FPTrunc:
1182  case Instruction::FPExt:
1183  case Instruction::Select:
1184  case Instruction::GetElementPtr: {
1185  SmallVector<Value*, 8> NewOps;
1186  bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1187  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1188  Value *V;
1189  // Recursively call evaluateInDifferentElementOrder on vector arguments
1190  // as well. E.g. GetElementPtr may have scalar operands even if the
1191  // return value is a vector, so we need to examine the operand type.
1192  if (I->getOperand(i)->getType()->isVectorTy())
1193  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1194  else
1195  V = I->getOperand(i);
1196  NewOps.push_back(V);
1197  NeedsRebuild |= (V != I->getOperand(i));
1198  }
1199  if (NeedsRebuild) {
1200  return buildNew(I, NewOps);
1201  }
1202  return I;
1203  }
1204  case Instruction::InsertElement: {
1205  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1206 
1207  // The insertelement was inserting at Element. Figure out which element
1208  // that becomes after shuffling. The answer is guaranteed to be unique
1209  // by CanEvaluateShuffled.
1210  bool Found = false;
1211  int Index = 0;
1212  for (int e = Mask.size(); Index != e; ++Index) {
1213  if (Mask[Index] == Element) {
1214  Found = true;
1215  break;
1216  }
1217  }
1218 
1219  // If element is not in Mask, no need to handle the operand 1 (element to
1220  // be inserted). Just evaluate values in operand 0 according to Mask.
1221  if (!Found)
1222  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1223 
1224  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1225  return InsertElementInst::Create(V, I->getOperand(1),
1226  ConstantInt::get(I32Ty, Index), "", I);
1227  }
1228  }
1229  llvm_unreachable("failed to reorder elements of vector instruction!");
1230 }
1231 
1233  bool &isLHSID, bool &isRHSID) {
1234  isLHSID = isRHSID = true;
1235 
1236  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1237  if (Mask[i] < 0) continue; // Ignore undef values.
1238  // Is this an identity shuffle of the LHS value?
1239  isLHSID &= (Mask[i] == (int)i);
1240 
1241  // Is this an identity shuffle of the RHS value?
1242  isRHSID &= (Mask[i]-e == i);
1243  }
1244 }
1245 
1246 // Returns true if the shuffle is extracting a contiguous range of values from
1247 // LHS, for example:
1248 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1249 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1250 // Shuffles to: |EE|FF|GG|HH|
1251 // +--+--+--+--+
1254  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1255  unsigned MaskElems = Mask.size();
1256  unsigned BegIdx = Mask.front();
1257  unsigned EndIdx = Mask.back();
1258  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1259  return false;
1260  for (unsigned I = 0; I != MaskElems; ++I)
1261  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1262  return false;
1263  return true;
1264 }
1265 
1266 /// These are the ingredients in an alternate form binary operator as described
1267 /// below.
1268 struct BinopElts {
1273  Value *V0 = nullptr, Value *V1 = nullptr) :
1274  Opcode(Opc), Op0(V0), Op1(V1) {}
1275  operator bool() const { return Opcode != 0; }
1276 };
1277 
1278 /// Binops may be transformed into binops with different opcodes and operands.
1279 /// Reverse the usual canonicalization to enable folds with the non-canonical
1280 /// form of the binop. If a transform is possible, return the elements of the
1281 /// new binop. If not, return invalid elements.
1283  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1284  Type *Ty = BO->getType();
1285  switch (BO->getOpcode()) {
1286  case Instruction::Shl: {
1287  // shl X, C --> mul X, (1 << C)
1288  Constant *C;
1289  if (match(BO1, m_Constant(C))) {
1290  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1291  return { Instruction::Mul, BO0, ShlOne };
1292  }
1293  break;
1294  }
1295  case Instruction::Or: {
1296  // or X, C --> add X, C (when X and C have no common bits set)
1297  const APInt *C;
1298  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1299  return { Instruction::Add, BO0, BO1 };
1300  break;
1301  }
1302  default:
1303  break;
1304  }
1305  return {};
1306 }
1307 
1309  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1310 
1311  // Are we shuffling together some value and that same value after it has been
1312  // modified by a binop with a constant?
1313  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1314  Constant *C;
1315  bool Op0IsBinop;
1316  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1317  Op0IsBinop = true;
1318  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1319  Op0IsBinop = false;
1320  else
1321  return nullptr;
1322 
1323  // The identity constant for a binop leaves a variable operand unchanged. For
1324  // a vector, this is a splat of something like 0, -1, or 1.
1325  // If there's no identity constant for this binop, we're done.
1326  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1327  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1328  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1329  if (!IdC)
1330  return nullptr;
1331 
1332  // Shuffle identity constants into the lanes that return the original value.
1333  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1334  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1335  // The existing binop constant vector remains in the same operand position.
1336  Constant *Mask = Shuf.getMask();
1337  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1338  ConstantExpr::getShuffleVector(IdC, C, Mask);
1339 
1340  bool MightCreatePoisonOrUB =
1341  Mask->containsUndefElement() &&
1342  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1343  if (MightCreatePoisonOrUB)
1344  NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1345 
1346  // shuf (bop X, C), X, M --> bop X, C'
1347  // shuf X, (bop X, C), M --> bop X, C'
1348  Value *X = Op0IsBinop ? Op1 : Op0;
1349  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1350  NewBO->copyIRFlags(BO);
1351 
1352  // An undef shuffle mask element may propagate as an undef constant element in
1353  // the new binop. That would produce poison where the original code might not.
1354  // If we already made a safe constant, then there's no danger.
1355  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1356  NewBO->dropPoisonGeneratingFlags();
1357  return NewBO;
1358 }
1359 
1360 /// Try to fold shuffles that are the equivalent of a vector select.
1362  InstCombiner::BuilderTy &Builder,
1363  const DataLayout &DL) {
1364  if (!Shuf.isSelect())
1365  return nullptr;
1366 
1367  // Canonicalize to choose from operand 0 first.
1368  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1369  if (Shuf.getMaskValue(0) >= (int)NumElts) {
1370  // TODO: Can we assert that both operands of a shuffle-select are not undef
1371  // (otherwise, it would have been folded by instsimplify?
1372  Shuf.commute();
1373  return &Shuf;
1374  }
1375 
1377  return I;
1378 
1379  BinaryOperator *B0, *B1;
1380  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1381  !match(Shuf.getOperand(1), m_BinOp(B1)))
1382  return nullptr;
1383 
1384  Value *X, *Y;
1385  Constant *C0, *C1;
1386  bool ConstantsAreOp1;
1387  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1388  match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1389  ConstantsAreOp1 = true;
1390  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1391  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1392  ConstantsAreOp1 = false;
1393  else
1394  return nullptr;
1395 
1396  // We need matching binops to fold the lanes together.
1397  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1398  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1399  bool DropNSW = false;
1400  if (ConstantsAreOp1 && Opc0 != Opc1) {
1401  // TODO: We drop "nsw" if shift is converted into multiply because it may
1402  // not be correct when the shift amount is BitWidth - 1. We could examine
1403  // each vector element to determine if it is safe to keep that flag.
1404  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1405  DropNSW = true;
1406  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1407  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1408  Opc0 = AltB0.Opcode;
1409  C0 = cast<Constant>(AltB0.Op1);
1410  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1411  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1412  Opc1 = AltB1.Opcode;
1413  C1 = cast<Constant>(AltB1.Op1);
1414  }
1415  }
1416 
1417  if (Opc0 != Opc1)
1418  return nullptr;
1419 
1420  // The opcodes must be the same. Use a new name to make that clear.
1421  BinaryOperator::BinaryOps BOpc = Opc0;
1422 
1423  // Select the constant elements needed for the single binop.
1424  Constant *Mask = Shuf.getMask();
1425  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1426 
1427  // We are moving a binop after a shuffle. When a shuffle has an undefined
1428  // mask element, the result is undefined, but it is not poison or undefined
1429  // behavior. That is not necessarily true for div/rem/shift.
1430  bool MightCreatePoisonOrUB =
1431  Mask->containsUndefElement() &&
1433  if (MightCreatePoisonOrUB)
1434  NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1435 
1436  Value *V;
1437  if (X == Y) {
1438  // Remove a binop and the shuffle by rearranging the constant:
1439  // shuffle (op V, C0), (op V, C1), M --> op V, C'
1440  // shuffle (op C0, V), (op C1, V), M --> op C', V
1441  V = X;
1442  } else {
1443  // If there are 2 different variable operands, we must create a new shuffle
1444  // (select) first, so check uses to ensure that we don't end up with more
1445  // instructions than we started with.
1446  if (!B0->hasOneUse() && !B1->hasOneUse())
1447  return nullptr;
1448 
1449  // If we use the original shuffle mask and op1 is *variable*, we would be
1450  // putting an undef into operand 1 of div/rem/shift. This is either UB or
1451  // poison. We do not have to guard against UB when *constants* are op1
1452  // because safe constants guarantee that we do not overflow sdiv/srem (and
1453  // there's no danger for other opcodes).
1454  // TODO: To allow this case, create a new shuffle mask with no undefs.
1455  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1456  return nullptr;
1457 
1458  // Note: In general, we do not create new shuffles in InstCombine because we
1459  // do not know if a target can lower an arbitrary shuffle optimally. In this
1460  // case, the shuffle uses the existing mask, so there is no additional risk.
1461 
1462  // Select the variable vectors first, then perform the binop:
1463  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1464  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1465  V = Builder.CreateShuffleVector(X, Y, Mask);
1466  }
1467 
1468  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1469  BinaryOperator::Create(BOpc, NewC, V);
1470 
1471  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1472  // 1. If we changed an opcode, poison conditions might have changed.
1473  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1474  // where the original code did not. But if we already made a safe constant,
1475  // then there's no danger.
1476  NewBO->copyIRFlags(B0);
1477  NewBO->andIRFlags(B1);
1478  if (DropNSW)
1479  NewBO->setHasNoSignedWrap(false);
1480  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1481  NewBO->dropPoisonGeneratingFlags();
1482  return NewBO;
1483 }
1484 
1485 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1486 /// narrowing (concatenating with undef and extracting back to the original
1487 /// length). This allows replacing the wide select with a narrow select.
1489  InstCombiner::BuilderTy &Builder) {
1490  // This must be a narrowing identity shuffle. It extracts the 1st N elements
1491  // of the 1st vector operand of a shuffle.
1492  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1493  return nullptr;
1494 
1495  // The vector being shuffled must be a vector select that we can eliminate.
1496  // TODO: The one-use requirement could be eased if X and/or Y are constants.
1497  Value *Cond, *X, *Y;
1498  if (!match(Shuf.getOperand(0),
1499  m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1500  return nullptr;
1501 
1502  // We need a narrow condition value. It must be extended with undef elements
1503  // and have the same number of elements as this shuffle.
1504  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1505  Value *NarrowCond;
1506  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1507  m_Constant()))) ||
1508  NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1509  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1510  return nullptr;
1511 
1512  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1513  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1515  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1516  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1517  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1518 }
1519 
1520 /// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1522  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1523  if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1524  return nullptr;
1525 
1526  Value *X, *Y;
1527  Constant *Mask;
1528  if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1529  return nullptr;
1530 
1531  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1532  // then combining may result in worse codegen.
1533  if (!Op0->hasOneUse())
1534  return nullptr;
1535 
1536  // We are extracting a subvector from a shuffle. Remove excess elements from
1537  // the 1st shuffle mask to eliminate the extract.
1538  //
1539  // This transform is conservatively limited to identity extracts because we do
1540  // not allow arbitrary shuffle mask creation as a target-independent transform
1541  // (because we can't guarantee that will lower efficiently).
1542  //
1543  // If the extracting shuffle has an undef mask element, it transfers to the
1544  // new shuffle mask. Otherwise, copy the original mask element. Example:
1545  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1546  // shuf X, Y, <C0, undef, C2, undef>
1547  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1548  SmallVector<Constant *, 16> NewMask(NumElts);
1549  assert(NumElts < Mask->getType()->getVectorNumElements() &&
1550  "Identity with extract must have less elements than its inputs");
1551 
1552  for (unsigned i = 0; i != NumElts; ++i) {
1553  Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1554  Constant *MaskElt = Mask->getAggregateElement(i);
1555  NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1556  }
1557  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1558 }
1559 
1560 /// Try to replace a shuffle with an insertelement.
1562  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1564 
1565  // The shuffle must not change vector sizes.
1566  // TODO: This restriction could be removed if the insert has only one use
1567  // (because the transform would require a new length-changing shuffle).
1568  int NumElts = Mask.size();
1569  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1570  return nullptr;
1571 
1572  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1573  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1574  // We need an insertelement with a constant index.
1575  if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1576  m_ConstantInt(IndexC))))
1577  return false;
1578 
1579  // Test the shuffle mask to see if it splices the inserted scalar into the
1580  // operand 1 vector of the shuffle.
1581  int NewInsIndex = -1;
1582  for (int i = 0; i != NumElts; ++i) {
1583  // Ignore undef mask elements.
1584  if (Mask[i] == -1)
1585  continue;
1586 
1587  // The shuffle takes elements of operand 1 without lane changes.
1588  if (Mask[i] == NumElts + i)
1589  continue;
1590 
1591  // The shuffle must choose the inserted scalar exactly once.
1592  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1593  return false;
1594 
1595  // The shuffle is placing the inserted scalar into element i.
1596  NewInsIndex = i;
1597  }
1598 
1599  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1600 
1601  // Index is updated to the potentially translated insertion lane.
1602  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1603  return true;
1604  };
1605 
1606  // If the shuffle is unnecessary, insert the scalar operand directly into
1607  // operand 1 of the shuffle. Example:
1608  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1609  Value *Scalar;
1610  ConstantInt *IndexC;
1611  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1612  return InsertElementInst::Create(V1, Scalar, IndexC);
1613 
1614  // Try again after commuting shuffle. Example:
1615  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1616  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1617  std::swap(V0, V1);
1619  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1620  return InsertElementInst::Create(V1, Scalar, IndexC);
1621 
1622  return nullptr;
1623 }
1624 
1626  // Match the operands as identity with padding (also known as concatenation
1627  // with undef) shuffles of the same source type. The backend is expected to
1628  // recreate these concatenations from a shuffle of narrow operands.
1629  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1630  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1631  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1632  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1633  return nullptr;
1634 
1635  // We limit this transform to power-of-2 types because we expect that the
1636  // backend can convert the simplified IR patterns to identical nodes as the
1637  // original IR.
1638  // TODO: If we can verify that behavior for arbitrary types, the power-of-2
1639  // checks can be removed.
1640  Value *X = Shuffle0->getOperand(0);
1641  Value *Y = Shuffle1->getOperand(0);
1642  if (X->getType() != Y->getType() ||
1644  !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1646  isa<UndefValue>(X) || isa<UndefValue>(Y))
1647  return nullptr;
1648  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1649  isa<UndefValue>(Shuffle1->getOperand(1)) &&
1650  "Unexpected operand for identity shuffle");
1651 
1652  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1653  // operands directly by adjusting the shuffle mask to account for the narrower
1654  // types:
1655  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1656  int NarrowElts = X->getType()->getVectorNumElements();
1657  int WideElts = Shuffle0->getType()->getVectorNumElements();
1658  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1659 
1660  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1662  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1663  for (int i = 0, e = Mask.size(); i != e; ++i) {
1664  if (Mask[i] == -1)
1665  continue;
1666  if (Mask[i] < WideElts)
1667  NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1668  else
1669  NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1670  }
1671  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1672 }
1673 
1675  Value *LHS = SVI.getOperand(0);
1676  Value *RHS = SVI.getOperand(1);
1677  if (auto *V = SimplifyShuffleVectorInst(
1678  LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1679  return replaceInstUsesWith(SVI, V);
1680 
1681  // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
1682  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1683  unsigned VWidth = SVI.getType()->getVectorNumElements();
1684  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1687  if (LHS == RHS || isa<UndefValue>(LHS)) {
1688  // Remap any references to RHS to use LHS.
1690  for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
1691  if (Mask[i] < 0) {
1692  Elts.push_back(UndefValue::get(Int32Ty));
1693  continue;
1694  }
1695 
1696  if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
1697  (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
1698  Mask[i] = -1; // Turn into undef.
1699  Elts.push_back(UndefValue::get(Int32Ty));
1700  } else {
1701  Mask[i] = Mask[i] % e; // Force to LHS.
1702  Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1703  }
1704  }
1705  SVI.setOperand(0, SVI.getOperand(1));
1706  SVI.setOperand(1, UndefValue::get(RHS->getType()));
1707  SVI.setOperand(2, ConstantVector::get(Elts));
1708  return &SVI;
1709  }
1710 
1711  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1712  return I;
1713 
1714  if (Instruction *I = narrowVectorSelect(SVI, Builder))
1715  return I;
1716 
1717  APInt UndefElts(VWidth, 0);
1718  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1719  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1720  if (V != &SVI)
1721  return replaceInstUsesWith(SVI, V);
1722  return &SVI;
1723  }
1724 
1726  return I;
1727 
1728  // These transforms have the potential to lose undef knowledge, so they are
1729  // intentionally placed after SimplifyDemandedVectorElts().
1730  if (Instruction *I = foldShuffleWithInsert(SVI))
1731  return I;
1733  return I;
1734 
1735  if (VWidth == LHSWidth) {
1736  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1737  bool isLHSID, isRHSID;
1738  recognizeIdentityMask(Mask, isLHSID, isRHSID);
1739 
1740  // Eliminate identity shuffles.
1741  if (isLHSID) return replaceInstUsesWith(SVI, LHS);
1742  if (isRHSID) return replaceInstUsesWith(SVI, RHS);
1743  }
1744 
1745  if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
1746  Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1747  return replaceInstUsesWith(SVI, V);
1748  }
1749 
1750  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1751  // a non-vector type. We can instead bitcast the original vector followed by
1752  // an extract of the desired element:
1753  //
1754  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1755  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1756  // %1 = bitcast <4 x i8> %sroa to i32
1757  // Becomes:
1758  // %bc = bitcast <16 x i8> %in to <4 x i32>
1759  // %ext = extractelement <4 x i32> %bc, i32 0
1760  //
1761  // If the shuffle is extracting a contiguous range of values from the input
1762  // vector then each use which is a bitcast of the extracted size can be
1763  // replaced. This will work if the vector types are compatible, and the begin
1764  // index is aligned to a value in the casted vector type. If the begin index
1765  // isn't aligned then we can shuffle the original vector (keeping the same
1766  // vector type) before extracting.
1767  //
1768  // This code will bail out if the target type is fundamentally incompatible
1769  // with vectors of the source type.
1770  //
1771  // Example of <16 x i8>, target type i32:
1772  // Index range [4,8): v-----------v Will work.
1773  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1774  // <16 x i8>: | | | | | | | | | | | | | | | | |
1775  // <4 x i32>: | | | | |
1776  // +-----------+-----------+-----------+-----------+
1777  // Index range [6,10): ^-----------^ Needs an extra shuffle.
1778  // Target type i40: ^--------------^ Won't work, bail.
1779  bool MadeChange = false;
1780  if (isShuffleExtractingFromLHS(SVI, Mask)) {
1781  Value *V = LHS;
1782  unsigned MaskElems = Mask.size();
1783  VectorType *SrcTy = cast<VectorType>(V->getType());
1784  unsigned VecBitWidth = SrcTy->getBitWidth();
1785  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1786  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
1787  unsigned SrcNumElems = SrcTy->getNumElements();
1790  for (User *U : SVI.users())
1791  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
1792  if (!BC->use_empty())
1793  // Only visit bitcasts that weren't previously handled.
1794  BCs.push_back(BC);
1795  for (BitCastInst *BC : BCs) {
1796  unsigned BegIdx = Mask.front();
1797  Type *TgtTy = BC->getDestTy();
1798  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
1799  if (!TgtElemBitWidth)
1800  continue;
1801  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
1802  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
1803  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
1804  if (!VecBitWidthsEqual)
1805  continue;
1806  if (!VectorType::isValidElementType(TgtTy))
1807  continue;
1808  VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
1809  if (!BegIsAligned) {
1810  // Shuffle the input so [0,NumElements) contains the output, and
1811  // [NumElems,SrcNumElems) is undef.
1812  SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
1813  UndefValue::get(Int32Ty));
1814  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
1815  ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
1816  V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
1817  ConstantVector::get(ShuffleMask),
1818  SVI.getName() + ".extract");
1819  BegIdx = 0;
1820  }
1821  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
1822  assert(SrcElemsPerTgtElem);
1823  BegIdx /= SrcElemsPerTgtElem;
1824  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
1825  auto *NewBC =
1826  BCAlreadyExists
1827  ? NewBCs[CastSrcTy]
1828  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
1829  if (!BCAlreadyExists)
1830  NewBCs[CastSrcTy] = NewBC;
1831  auto *Ext = Builder.CreateExtractElement(
1832  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
1833  // The shufflevector isn't being replaced: the bitcast that used it
1834  // is. InstCombine will visit the newly-created instructions.
1835  replaceInstUsesWith(*BC, Ext);
1836  MadeChange = true;
1837  }
1838  }
1839 
1840  // If the LHS is a shufflevector itself, see if we can combine it with this
1841  // one without producing an unusual shuffle.
1842  // Cases that might be simplified:
1843  // 1.
1844  // x1=shuffle(v1,v2,mask1)
1845  // x=shuffle(x1,undef,mask)
1846  // ==>
1847  // x=shuffle(v1,undef,newMask)
1848  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
1849  // 2.
1850  // x1=shuffle(v1,undef,mask1)
1851  // x=shuffle(x1,x2,mask)
1852  // where v1.size() == mask1.size()
1853  // ==>
1854  // x=shuffle(v1,x2,newMask)
1855  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
1856  // 3.
1857  // x2=shuffle(v2,undef,mask2)
1858  // x=shuffle(x1,x2,mask)
1859  // where v2.size() == mask2.size()
1860  // ==>
1861  // x=shuffle(x1,v2,newMask)
1862  // newMask[i] = (mask[i] < x1.size())
1863  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
1864  // 4.
1865  // x1=shuffle(v1,undef,mask1)
1866  // x2=shuffle(v2,undef,mask2)
1867  // x=shuffle(x1,x2,mask)
1868  // where v1.size() == v2.size()
1869  // ==>
1870  // x=shuffle(v1,v2,newMask)
1871  // newMask[i] = (mask[i] < x1.size())
1872  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
1873  //
1874  // Here we are really conservative:
1875  // we are absolutely afraid of producing a shuffle mask not in the input
1876  // program, because the code gen may not be smart enough to turn a merged
1877  // shuffle into two specific shuffles: it may produce worse code. As such,
1878  // we only merge two shuffles if the result is either a splat or one of the
1879  // input shuffle masks. In this case, merging the shuffles just removes
1880  // one instruction, which we know is safe. This is good for things like
1881  // turning: (splat(splat)) -> splat, or
1882  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
1883  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
1884  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
1885  if (LHSShuffle)
1886  if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
1887  LHSShuffle = nullptr;
1888  if (RHSShuffle)
1889  if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
1890  RHSShuffle = nullptr;
1891  if (!LHSShuffle && !RHSShuffle)
1892  return MadeChange ? &SVI : nullptr;
1893 
1894  Value* LHSOp0 = nullptr;
1895  Value* LHSOp1 = nullptr;
1896  Value* RHSOp0 = nullptr;
1897  unsigned LHSOp0Width = 0;
1898  unsigned RHSOp0Width = 0;
1899  if (LHSShuffle) {
1900  LHSOp0 = LHSShuffle->getOperand(0);
1901  LHSOp1 = LHSShuffle->getOperand(1);
1902  LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
1903  }
1904  if (RHSShuffle) {
1905  RHSOp0 = RHSShuffle->getOperand(0);
1906  RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
1907  }
1908  Value* newLHS = LHS;
1909  Value* newRHS = RHS;
1910  if (LHSShuffle) {
1911  // case 1
1912  if (isa<UndefValue>(RHS)) {
1913  newLHS = LHSOp0;
1914  newRHS = LHSOp1;
1915  }
1916  // case 2 or 4
1917  else if (LHSOp0Width == LHSWidth) {
1918  newLHS = LHSOp0;
1919  }
1920  }
1921  // case 3 or 4
1922  if (RHSShuffle && RHSOp0Width == LHSWidth) {
1923  newRHS = RHSOp0;
1924  }
1925  // case 4
1926  if (LHSOp0 == RHSOp0) {
1927  newLHS = LHSOp0;
1928  newRHS = nullptr;
1929  }
1930 
1931  if (newLHS == LHS && newRHS == RHS)
1932  return MadeChange ? &SVI : nullptr;
1933 
1934  SmallVector<int, 16> LHSMask;
1935  SmallVector<int, 16> RHSMask;
1936  if (newLHS != LHS)
1937  LHSMask = LHSShuffle->getShuffleMask();
1938  if (RHSShuffle && newRHS != RHS)
1939  RHSMask = RHSShuffle->getShuffleMask();
1940 
1941  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
1942  SmallVector<int, 16> newMask;
1943  bool isSplat = true;
1944  int SplatElt = -1;
1945  // Create a new mask for the new ShuffleVectorInst so that the new
1946  // ShuffleVectorInst is equivalent to the original one.
1947  for (unsigned i = 0; i < VWidth; ++i) {
1948  int eltMask;
1949  if (Mask[i] < 0) {
1950  // This element is an undef value.
1951  eltMask = -1;
1952  } else if (Mask[i] < (int)LHSWidth) {
1953  // This element is from left hand side vector operand.
1954  //
1955  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
1956  // new mask value for the element.
1957  if (newLHS != LHS) {
1958  eltMask = LHSMask[Mask[i]];
1959  // If the value selected is an undef value, explicitly specify it
1960  // with a -1 mask value.
1961  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
1962  eltMask = -1;
1963  } else
1964  eltMask = Mask[i];
1965  } else {
1966  // This element is from right hand side vector operand
1967  //
1968  // If the value selected is an undef value, explicitly specify it
1969  // with a -1 mask value. (case 1)
1970  if (isa<UndefValue>(RHS))
1971  eltMask = -1;
1972  // If RHS is going to be replaced (case 3 or 4), calculate the
1973  // new mask value for the element.
1974  else if (newRHS != RHS) {
1975  eltMask = RHSMask[Mask[i]-LHSWidth];
1976  // If the value selected is an undef value, explicitly specify it
1977  // with a -1 mask value.
1978  if (eltMask >= (int)RHSOp0Width) {
1979  assert(isa<UndefValue>(RHSShuffle->getOperand(1))
1980  && "should have been check above");
1981  eltMask = -1;
1982  }
1983  } else
1984  eltMask = Mask[i]-LHSWidth;
1985 
1986  // If LHS's width is changed, shift the mask value accordingly.
1987  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
1988  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
1989  // If newRHS == newLHS, we want to remap any references from newRHS to
1990  // newLHS so that we can properly identify splats that may occur due to
1991  // obfuscation across the two vectors.
1992  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
1993  eltMask += newLHSWidth;
1994  }
1995 
1996  // Check if this could still be a splat.
1997  if (eltMask >= 0) {
1998  if (SplatElt >= 0 && SplatElt != eltMask)
1999  isSplat = false;
2000  SplatElt = eltMask;
2001  }
2002 
2003  newMask.push_back(eltMask);
2004  }
2005 
2006  // If the result mask is equal to one of the original shuffle masks,
2007  // or is a splat, do the replacement.
2008  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2010  for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2011  if (newMask[i] < 0) {
2012  Elts.push_back(UndefValue::get(Int32Ty));
2013  } else {
2014  Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2015  }
2016  }
2017  if (!newRHS)
2018  newRHS = UndefValue::get(newLHS->getType());
2019  return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2020  }
2021 
2022  // If the result mask is an identity, replace uses of this instruction with
2023  // corresponding argument.
2024  bool isLHSID, isRHSID;
2025  recognizeIdentityMask(newMask, isLHSID, isRHSID);
2026  if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
2027  if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
2028 
2029  return MadeChange ? &SVI : nullptr;
2030 }
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8...
Type * getVectorElementType() const
Definition: Type.h:370
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
uint64_t CallInst * C
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:86
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:78
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:379
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex –> shufflevector X...
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1341
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:899
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:89
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, SmallVector< int, 16 > &Mask)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
Constant * getMask() const
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
unsigned getBitWidth() const
Return the number of bits in the Vector type.
Definition: DerivedTypes.h:483
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1402
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElement(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The core instruction combiner logic.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if &#39;V & Mask&#39; is known to be zero.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="")
Definition: InstrTypes.h:226
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
This file implements a class to represent arbitrary precision integral constant values and operations...
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:416
static Instruction * foldInsSequenceIntoBroadcast(InsertElementInst &InsElt)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf)
Try to replace a shuffle with an insertelement.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:620
This instruction compares its operands according to the predicate given to the constructor.
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
This class represents a no-op cast from one type to another.
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:81
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Instruction * visitExtractElementInst(ExtractElementInst &EI)
These are the ingredients in an alternate form binary operator as described below.
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< Constant *> &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
VectorType * getType() const
Overload to return most specific vector type.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:344
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
Instruction * visitInsertElementInst(InsertElementInst &IE)
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:61
This instruction inserts a single (scalar) element into a VectorType value.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:175
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:501
static Constant * getShuffleVector(Constant *V1, Constant *V2, Constant *Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2138
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:709
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2316
op_range operands()
Definition: User.h:237
self_iterator getIterator()
Definition: ilist_node.h:81
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
Class to represent integer types.
Definition: DerivedTypes.h:39
VectorType * getVectorOperandType() const
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool isExact() const
Determine whether the exact flag is set.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1690
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
void commute()
Swap the first 2 operands and adjust the mask to preserve the semantics of the instruction.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:250
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
BinaryOperator::BinaryOps Opcode
bool containsUndefElement() const
Return true if this is a vector constant that includes any undefined elements.
Definition: Constants.cpp:253
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:63
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2090
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2104
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class to represent vector types.
Definition: DerivedTypes.h:424
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:399
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:178
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:549
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:79
#define I(x, y, z)
Definition: MD5.cpp:58
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2299
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder, bool IsBigEndian)
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:72
This file provides internal interfaces used to implement the InstCombine.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
static Value * buildNew(Instruction *I, ArrayRef< Value *> NewOps)
Rebuild a new instruction just like &#39;I&#39; but with the new operands given.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1159
Type * getElementType() const
Definition: DerivedTypes.h:391
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
bool isIntDivRem() const
Definition: Instruction.h:131
static bool isSplat(ArrayRef< Value *> VL)
static void recognizeIdentityMask(const SmallVectorImpl< int > &Mask, bool &isLHSID, bool &isRHSID)
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< Constant *> &Mask, Value *PermittedRHS, InstCombiner &IC)
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1088
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
IntegerType * Int32Ty
User * user_back()
Definition: Value.h:385
const BasicBlock * getParent() const
Definition: Instruction.h:66
This instruction inserts a struct field of array element value into an aggregate value.