LLVM  8.0.0svn
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements instcombine for ExtractElement, InsertElement and
11 // ShuffleVector.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "InstCombineInternal.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/Constant.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Operator.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/User.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <utility>
42 
43 using namespace llvm;
44 using namespace PatternMatch;
45 
46 #define DEBUG_TYPE "instcombine"
47 
48 /// Return true if the value is cheaper to scalarize than it is to leave as a
49 /// vector operation. isConstant indicates whether we're extracting one known
50 /// element. If false we're extracting a variable index.
51 static bool cheapToScalarize(Value *V, bool isConstant) {
52  if (Constant *C = dyn_cast<Constant>(V)) {
53  if (isConstant) return true;
54 
55  // If all elts are the same, we can extract it and use any of the values.
56  if (Constant *Op0 = C->getAggregateElement(0U)) {
57  for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
58  ++i)
59  if (C->getAggregateElement(i) != Op0)
60  return false;
61  return true;
62  }
63  }
65  if (!I) return false;
66 
67  // Insert element gets simplified to the inserted element or is deleted if
68  // this is constant idx extract element and its a constant idx insertelt.
69  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
70  isa<ConstantInt>(I->getOperand(2)))
71  return true;
72  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
73  return true;
74  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
75  if (BO->hasOneUse() &&
76  (cheapToScalarize(BO->getOperand(0), isConstant) ||
77  cheapToScalarize(BO->getOperand(1), isConstant)))
78  return true;
79  if (CmpInst *CI = dyn_cast<CmpInst>(I))
80  if (CI->hasOneUse() &&
81  (cheapToScalarize(CI->getOperand(0), isConstant) ||
82  cheapToScalarize(CI->getOperand(1), isConstant)))
83  return true;
84 
85  return false;
86 }
87 
88 // If we have a PHI node with a vector type that is only used to feed
89 // itself and be an operand of extractelement at a constant location,
90 // try to replace the PHI of the vector type with a PHI of a scalar type.
91 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
93  // The users we want the PHI to have are:
94  // 1) The EI ExtractElement (we already know this)
95  // 2) Possibly more ExtractElements with the same index.
96  // 3) Another operand, which will feed back into the PHI.
97  Instruction *PHIUser = nullptr;
98  for (auto U : PN->users()) {
99  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
100  if (EI.getIndexOperand() == EU->getIndexOperand())
101  Extracts.push_back(EU);
102  else
103  return nullptr;
104  } else if (!PHIUser) {
105  PHIUser = cast<Instruction>(U);
106  } else {
107  return nullptr;
108  }
109  }
110 
111  if (!PHIUser)
112  return nullptr;
113 
114  // Verify that this PHI user has one use, which is the PHI itself,
115  // and that it is a binary operation which is cheap to scalarize.
116  // otherwise return nullptr.
117  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
118  !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
119  return nullptr;
120 
121  // Create a scalar PHI node that will replace the vector PHI node
122  // just before the current PHI node.
123  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
124  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
125  // Scalarize each PHI operand.
126  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
127  Value *PHIInVal = PN->getIncomingValue(i);
128  BasicBlock *inBB = PN->getIncomingBlock(i);
129  Value *Elt = EI.getIndexOperand();
130  // If the operand is the PHI induction variable:
131  if (PHIInVal == PHIUser) {
132  // Scalarize the binary operation. Its first operand is the
133  // scalar PHI, and the second operand is extracted from the other
134  // vector operand.
135  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
136  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
137  Value *Op = InsertNewInstWith(
138  ExtractElementInst::Create(B0->getOperand(opId), Elt,
139  B0->getOperand(opId)->getName() + ".Elt"),
140  *B0);
141  Value *newPHIUser = InsertNewInstWith(
143  scalarPHI, Op, B0), *B0);
144  scalarPHI->addIncoming(newPHIUser, inBB);
145  } else {
146  // Scalarize PHI input:
147  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
148  // Insert the new instruction into the predecessor basic block.
149  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
150  BasicBlock::iterator InsertPos;
151  if (pos && !isa<PHINode>(pos)) {
152  InsertPos = ++pos->getIterator();
153  } else {
154  InsertPos = inBB->getFirstInsertionPt();
155  }
156 
157  InsertNewInstWith(newEI, *InsertPos);
158 
159  scalarPHI->addIncoming(newEI, inBB);
160  }
161  }
162 
163  for (auto E : Extracts)
164  replaceInstUsesWith(*E, scalarPHI);
165 
166  return &EI;
167 }
168 
170  InstCombiner::BuilderTy &Builder) {
171  Value *X;
172  uint64_t ExtIndexC;
173  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
174  !X->getType()->isVectorTy() ||
175  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
176  return nullptr;
177 
178  // If this extractelement is using a bitcast from a vector of the same number
179  // of elements, see if we can find the source element from the source vector:
180  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
181  Type *SrcTy = X->getType();
182  Type *DestTy = Ext.getType();
183  unsigned NumSrcElts = SrcTy->getVectorNumElements();
184  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
185  if (NumSrcElts == NumElts)
186  if (Value *Elt = findScalarElement(X, ExtIndexC))
187  return new BitCastInst(Elt, DestTy);
188 
189  return nullptr;
190 }
191 
194  EI.getIndexOperand(),
195  SQ.getWithInstruction(&EI)))
196  return replaceInstUsesWith(EI, V);
197 
198  // If vector val is constant with all elements the same, replace EI with
199  // that element. We handle a known element # below.
200  if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
201  if (cheapToScalarize(C, false))
202  return replaceInstUsesWith(EI, C->getAggregateElement(0U));
203 
204  // If extracting a specified index from the vector, see if we can recursively
205  // find a previously computed scalar that was inserted into the vector.
206  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
207  unsigned NumElts = EI.getVectorOperandType()->getNumElements();
208 
209  // InstSimplify should handle cases where the index is invalid.
210  if (!IdxC->getValue().ule(NumElts))
211  return nullptr;
212 
213  // This instruction only demands the single element from the input vector.
214  // If the input vector has a single use, simplify it based on this use
215  // property.
216  if (EI.getOperand(0)->hasOneUse() && NumElts != 1) {
217  APInt UndefElts(NumElts, 0);
218  APInt DemandedMask(NumElts, 0);
219  DemandedMask.setBit(IdxC->getZExtValue());
220  if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
221  UndefElts)) {
222  EI.setOperand(0, V);
223  return &EI;
224  }
225  }
226 
227  if (Instruction *I = foldBitcastExtElt(EI, Builder))
228  return I;
229 
230  // If there's a vector PHI feeding a scalar use through this extractelement
231  // instruction, try to scalarize the PHI.
232  if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
233  Instruction *scalarPHI = scalarizePHI(EI, PN);
234  if (scalarPHI)
235  return scalarPHI;
236  }
237  }
238 
239  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
240  // Push extractelement into predecessor operation if legal and
241  // profitable to do so.
242  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
243  if (I->hasOneUse() &&
244  cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
245  Value *newEI0 =
246  Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
247  EI.getName()+".lhs");
248  Value *newEI1 =
249  Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
250  EI.getName()+".rhs");
251  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
252  newEI0, newEI1, BO);
253  }
254  } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
255  // Extracting the inserted element?
256  if (IE->getOperand(2) == EI.getOperand(1))
257  return replaceInstUsesWith(EI, IE->getOperand(1));
258  // If the inserted and extracted elements are constants, they must not
259  // be the same value, extract from the pre-inserted value instead.
260  if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
261  Worklist.AddValue(EI.getOperand(0));
262  EI.setOperand(0, IE->getOperand(0));
263  return &EI;
264  }
265  } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
266  // If this is extracting an element from a shufflevector, figure out where
267  // it came from and extract from the appropriate input element instead.
268  if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
269  int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
270  Value *Src;
271  unsigned LHSWidth =
272  SVI->getOperand(0)->getType()->getVectorNumElements();
273 
274  if (SrcIdx < 0)
275  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
276  if (SrcIdx < (int)LHSWidth)
277  Src = SVI->getOperand(0);
278  else {
279  SrcIdx -= LHSWidth;
280  Src = SVI->getOperand(1);
281  }
283  return ExtractElementInst::Create(Src,
284  ConstantInt::get(Int32Ty,
285  SrcIdx, false));
286  }
287  } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
288  // Canonicalize extractelement(cast) -> cast(extractelement).
289  // Bitcasts can change the number of vector elements, and they cost
290  // nothing.
291  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
292  Value *EE = Builder.CreateExtractElement(CI->getOperand(0),
293  EI.getIndexOperand());
294  Worklist.AddValue(EE);
295  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
296  }
297  }
298  }
299  return nullptr;
300 }
301 
302 /// If V is a shuffle of values that ONLY returns elements from either LHS or
303 /// RHS, return the shuffle mask and true. Otherwise, return false.
304 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
306  assert(LHS->getType() == RHS->getType() &&
307  "Invalid CollectSingleShuffleElements");
308  unsigned NumElts = V->getType()->getVectorNumElements();
309 
310  if (isa<UndefValue>(V)) {
311  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
312  return true;
313  }
314 
315  if (V == LHS) {
316  for (unsigned i = 0; i != NumElts; ++i)
318  return true;
319  }
320 
321  if (V == RHS) {
322  for (unsigned i = 0; i != NumElts; ++i)
324  i+NumElts));
325  return true;
326  }
327 
328  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
329  // If this is an insert of an extract from some other vector, include it.
330  Value *VecOp = IEI->getOperand(0);
331  Value *ScalarOp = IEI->getOperand(1);
332  Value *IdxOp = IEI->getOperand(2);
333 
334  if (!isa<ConstantInt>(IdxOp))
335  return false;
336  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
337 
338  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
339  // We can handle this if the vector we are inserting into is
340  // transitively ok.
341  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
342  // If so, update the mask to reflect the inserted undef.
343  Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
344  return true;
345  }
346  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
347  if (isa<ConstantInt>(EI->getOperand(1))) {
348  unsigned ExtractedIdx =
349  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
350  unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
351 
352  // This must be extracting from either LHS or RHS.
353  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
354  // We can handle this if the vector we are inserting into is
355  // transitively ok.
356  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
357  // If so, update the mask to reflect the inserted value.
358  if (EI->getOperand(0) == LHS) {
359  Mask[InsertedIdx % NumElts] =
361  ExtractedIdx);
362  } else {
363  assert(EI->getOperand(0) == RHS);
364  Mask[InsertedIdx % NumElts] =
366  ExtractedIdx + NumLHSElts);
367  }
368  return true;
369  }
370  }
371  }
372  }
373  }
374 
375  return false;
376 }
377 
378 /// If we have insertion into a vector that is wider than the vector that we
379 /// are extracting from, try to widen the source vector to allow a single
380 /// shufflevector to replace one or more insert/extract pairs.
382  ExtractElementInst *ExtElt,
383  InstCombiner &IC) {
384  VectorType *InsVecType = InsElt->getType();
385  VectorType *ExtVecType = ExtElt->getVectorOperandType();
386  unsigned NumInsElts = InsVecType->getVectorNumElements();
387  unsigned NumExtElts = ExtVecType->getVectorNumElements();
388 
389  // The inserted-to vector must be wider than the extracted-from vector.
390  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
391  NumExtElts >= NumInsElts)
392  return;
393 
394  // Create a shuffle mask to widen the extended-from vector using undefined
395  // values. The mask selects all of the values of the original vector followed
396  // by as many undefined values as needed to create a vector of the same length
397  // as the inserted-to vector.
398  SmallVector<Constant *, 16> ExtendMask;
399  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
400  for (unsigned i = 0; i < NumExtElts; ++i)
401  ExtendMask.push_back(ConstantInt::get(IntType, i));
402  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
403  ExtendMask.push_back(UndefValue::get(IntType));
404 
405  Value *ExtVecOp = ExtElt->getVectorOperand();
406  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
407  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
408  ? ExtVecOpInst->getParent()
409  : ExtElt->getParent();
410 
411  // TODO: This restriction matches the basic block check below when creating
412  // new extractelement instructions. If that limitation is removed, this one
413  // could also be removed. But for now, we just bail out to ensure that we
414  // will replace the extractelement instruction that is feeding our
415  // insertelement instruction. This allows the insertelement to then be
416  // replaced by a shufflevector. If the insertelement is not replaced, we can
417  // induce infinite looping because there's an optimization for extractelement
418  // that will delete our widening shuffle. This would trigger another attempt
419  // here to create that shuffle, and we spin forever.
420  if (InsertionBlock != InsElt->getParent())
421  return;
422 
423  // TODO: This restriction matches the check in visitInsertElementInst() and
424  // prevents an infinite loop caused by not turning the extract/insert pair
425  // into a shuffle. We really should not need either check, but we're lacking
426  // folds for shufflevectors because we're afraid to generate shuffle masks
427  // that the backend can't handle.
428  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
429  return;
430 
431  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
432  ConstantVector::get(ExtendMask));
433 
434  // Insert the new shuffle after the vector operand of the extract is defined
435  // (as long as it's not a PHI) or at the start of the basic block of the
436  // extract, so any subsequent extracts in the same basic block can use it.
437  // TODO: Insert before the earliest ExtractElementInst that is replaced.
438  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
439  WideVec->insertAfter(ExtVecOpInst);
440  else
441  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
442 
443  // Replace extracts from the original narrow vector with extracts from the new
444  // wide vector.
445  for (User *U : ExtVecOp->users()) {
447  if (!OldExt || OldExt->getParent() != WideVec->getParent())
448  continue;
449  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
450  NewExt->insertAfter(OldExt);
451  IC.replaceInstUsesWith(*OldExt, NewExt);
452  }
453 }
454 
455 /// We are building a shuffle to create V, which is a sequence of insertelement,
456 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
457 /// not rely on the second vector source. Return a std::pair containing the
458 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
459 /// parameter as required.
460 ///
461 /// Note: we intentionally don't try to fold earlier shuffles since they have
462 /// often been chosen carefully to be efficiently implementable on the target.
463 using ShuffleOps = std::pair<Value *, Value *>;
464 
467  Value *PermittedRHS,
468  InstCombiner &IC) {
469  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
470  unsigned NumElts = V->getType()->getVectorNumElements();
471 
472  if (isa<UndefValue>(V)) {
473  Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
474  return std::make_pair(
475  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
476  }
477 
478  if (isa<ConstantAggregateZero>(V)) {
479  Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
480  return std::make_pair(V, nullptr);
481  }
482 
483  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
484  // If this is an insert of an extract from some other vector, include it.
485  Value *VecOp = IEI->getOperand(0);
486  Value *ScalarOp = IEI->getOperand(1);
487  Value *IdxOp = IEI->getOperand(2);
488 
489  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
490  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
491  unsigned ExtractedIdx =
492  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
493  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
494 
495  // Either the extracted from or inserted into vector must be RHSVec,
496  // otherwise we'd end up with a shuffle of three inputs.
497  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
498  Value *RHS = EI->getOperand(0);
499  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
500  assert(LR.second == nullptr || LR.second == RHS);
501 
502  if (LR.first->getType() != RHS->getType()) {
503  // Although we are giving up for now, see if we can create extracts
504  // that match the inserts for another round of combining.
505  replaceExtractElements(IEI, EI, IC);
506 
507  // We tried our best, but we can't find anything compatible with RHS
508  // further up the chain. Return a trivial shuffle.
509  for (unsigned i = 0; i < NumElts; ++i)
510  Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
511  return std::make_pair(V, nullptr);
512  }
513 
514  unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
515  Mask[InsertedIdx % NumElts] =
517  NumLHSElts+ExtractedIdx);
518  return std::make_pair(LR.first, RHS);
519  }
520 
521  if (VecOp == PermittedRHS) {
522  // We've gone as far as we can: anything on the other side of the
523  // extractelement will already have been converted into a shuffle.
524  unsigned NumLHSElts =
526  for (unsigned i = 0; i != NumElts; ++i)
529  i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
530  return std::make_pair(EI->getOperand(0), PermittedRHS);
531  }
532 
533  // If this insertelement is a chain that comes from exactly these two
534  // vectors, return the vector and the effective shuffle.
535  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
536  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
537  Mask))
538  return std::make_pair(EI->getOperand(0), PermittedRHS);
539  }
540  }
541  }
542 
543  // Otherwise, we can't do anything fancy. Return an identity vector.
544  for (unsigned i = 0; i != NumElts; ++i)
546  return std::make_pair(V, nullptr);
547 }
548 
549 /// Try to find redundant insertvalue instructions, like the following ones:
550 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
551 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
552 /// Here the second instruction inserts values at the same indices, as the
553 /// first one, making the first one redundant.
554 /// It should be transformed to:
555 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
557  bool IsRedundant = false;
558  ArrayRef<unsigned int> FirstIndices = I.getIndices();
559 
560  // If there is a chain of insertvalue instructions (each of them except the
561  // last one has only one use and it's another insertvalue insn from this
562  // chain), check if any of the 'children' uses the same indices as the first
563  // instruction. In this case, the first one is redundant.
564  Value *V = &I;
565  unsigned Depth = 0;
566  while (V->hasOneUse() && Depth < 10) {
567  User *U = V->user_back();
568  auto UserInsInst = dyn_cast<InsertValueInst>(U);
569  if (!UserInsInst || U->getOperand(0) != V)
570  break;
571  if (UserInsInst->getIndices() == FirstIndices) {
572  IsRedundant = true;
573  break;
574  }
575  V = UserInsInst;
576  Depth++;
577  }
578 
579  if (IsRedundant)
580  return replaceInstUsesWith(I, I.getOperand(0));
581  return nullptr;
582 }
583 
585  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
586  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
587 
588  // A vector select does not change the size of the operands.
589  if (MaskSize != VecSize)
590  return false;
591 
592  // Each mask element must be undefined or choose a vector element from one of
593  // the source operands without crossing vector lanes.
594  for (int i = 0; i != MaskSize; ++i) {
595  int Elt = Shuf.getMaskValue(i);
596  if (Elt != -1 && Elt != i && Elt != i + VecSize)
597  return false;
598  }
599 
600  return true;
601 }
602 
603 // Turn a chain of inserts that splats a value into a canonical insert + shuffle
604 // splat. That is:
605 // insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
606 // shufflevector(insertelt(X, %k, 0), undef, zero)
608  // We are interested in the last insert in a chain. So, if this insert
609  // has a single user, and that user is an insert, bail.
610  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
611  return nullptr;
612 
613  VectorType *VT = cast<VectorType>(InsElt.getType());
614  int NumElements = VT->getNumElements();
615 
616  // Do not try to do this for a one-element vector, since that's a nop,
617  // and will cause an inf-loop.
618  if (NumElements == 1)
619  return nullptr;
620 
621  Value *SplatVal = InsElt.getOperand(1);
622  InsertElementInst *CurrIE = &InsElt;
623  SmallVector<bool, 16> ElementPresent(NumElements, false);
624  InsertElementInst *FirstIE = nullptr;
625 
626  // Walk the chain backwards, keeping track of which indices we inserted into,
627  // until we hit something that isn't an insert of the splatted value.
628  while (CurrIE) {
629  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
630  if (!Idx || CurrIE->getOperand(1) != SplatVal)
631  return nullptr;
632 
633  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
634  // Check none of the intermediate steps have any additional uses, except
635  // for the root insertelement instruction, which can be re-used, if it
636  // inserts at position 0.
637  if (CurrIE != &InsElt &&
638  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
639  return nullptr;
640 
641  ElementPresent[Idx->getZExtValue()] = true;
642  FirstIE = CurrIE;
643  CurrIE = NextIE;
644  }
645 
646  // Make sure we've seen an insert into every element.
647  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
648  return nullptr;
649 
650  // All right, create the insert + shuffle.
651  Instruction *InsertFirst;
652  if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
653  InsertFirst = FirstIE;
654  else
655  InsertFirst = InsertElementInst::Create(
656  UndefValue::get(VT), SplatVal,
658  "", &InsElt);
659 
661  VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
662 
663  return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
664 }
665 
666 /// If we have an insertelement instruction feeding into another insertelement
667 /// and the 2nd is inserting a constant into the vector, canonicalize that
668 /// constant insertion before the insertion of a variable:
669 ///
670 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
671 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
672 ///
673 /// This has the potential of eliminating the 2nd insertelement instruction
674 /// via constant folding of the scalar constant into a vector constant.
676  InstCombiner::BuilderTy &Builder) {
677  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
678  if (!InsElt1 || !InsElt1->hasOneUse())
679  return nullptr;
680 
681  Value *X, *Y;
682  Constant *ScalarC;
683  ConstantInt *IdxC1, *IdxC2;
684  if (match(InsElt1->getOperand(0), m_Value(X)) &&
685  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
686  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
687  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
688  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
689  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
690  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
691  }
692 
693  return nullptr;
694 }
695 
696 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
697 /// --> shufflevector X, CVec', Mask'
699  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
700  // Bail out if the parent has more than one use. In that case, we'd be
701  // replacing the insertelt with a shuffle, and that's not a clear win.
702  if (!Inst || !Inst->hasOneUse())
703  return nullptr;
704  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
705  // The shuffle must have a constant vector operand. The insertelt must have
706  // a constant scalar being inserted at a constant position in the vector.
707  Constant *ShufConstVec, *InsEltScalar;
708  uint64_t InsEltIndex;
709  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
710  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
711  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
712  return nullptr;
713 
714  // Adding an element to an arbitrary shuffle could be expensive, but a
715  // shuffle that selects elements from vectors without crossing lanes is
716  // assumed cheap.
717  // If we're just adding a constant into that shuffle, it will still be
718  // cheap.
719  if (!isShuffleEquivalentToSelect(*Shuf))
720  return nullptr;
721 
722  // From the above 'select' check, we know that the mask has the same number
723  // of elements as the vector input operands. We also know that each constant
724  // input element is used in its lane and can not be used more than once by
725  // the shuffle. Therefore, replace the constant in the shuffle's constant
726  // vector with the insertelt constant. Replace the constant in the shuffle's
727  // mask vector with the insertelt index plus the length of the vector
728  // (because the constant vector operand of a shuffle is always the 2nd
729  // operand).
730  Constant *Mask = Shuf->getMask();
731  unsigned NumElts = Mask->getType()->getVectorNumElements();
732  SmallVector<Constant *, 16> NewShufElts(NumElts);
733  SmallVector<Constant *, 16> NewMaskElts(NumElts);
734  for (unsigned I = 0; I != NumElts; ++I) {
735  if (I == InsEltIndex) {
736  NewShufElts[I] = InsEltScalar;
737  Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
738  NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
739  } else {
740  // Copy over the existing values.
741  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
742  NewMaskElts[I] = Mask->getAggregateElement(I);
743  }
744  }
745 
746  // Create new operands for a shuffle that includes the constant of the
747  // original insertelt. The old shuffle will be dead now.
748  return new ShuffleVectorInst(Shuf->getOperand(0),
749  ConstantVector::get(NewShufElts),
750  ConstantVector::get(NewMaskElts));
751  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
752  // Transform sequences of insertelements ops with constant data/indexes into
753  // a single shuffle op.
754  unsigned NumElts = InsElt.getType()->getNumElements();
755 
756  uint64_t InsertIdx[2];
757  Constant *Val[2];
758  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
759  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
760  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
761  !match(IEI->getOperand(1), m_Constant(Val[1])))
762  return nullptr;
763  SmallVector<Constant *, 16> Values(NumElts);
765  auto ValI = std::begin(Val);
766  // Generate new constant vector and mask.
767  // We have 2 values/masks from the insertelements instructions. Insert them
768  // into new value/mask vectors.
769  for (uint64_t I : InsertIdx) {
770  if (!Values[I]) {
771  assert(!Mask[I]);
772  Values[I] = *ValI;
773  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
774  NumElts + I);
775  }
776  ++ValI;
777  }
778  // Remaining values are filled with 'undef' values.
779  for (unsigned I = 0; I < NumElts; ++I) {
780  if (!Values[I]) {
781  assert(!Mask[I]);
782  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
783  Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
784  }
785  }
786  // Create new operands for a shuffle that includes the constant of the
787  // original insertelt.
788  return new ShuffleVectorInst(IEI->getOperand(0),
789  ConstantVector::get(Values),
790  ConstantVector::get(Mask));
791  }
792  return nullptr;
793 }
794 
796  Value *VecOp = IE.getOperand(0);
797  Value *ScalarOp = IE.getOperand(1);
798  Value *IdxOp = IE.getOperand(2);
799 
800  if (auto *V = SimplifyInsertElementInst(
801  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
802  return replaceInstUsesWith(IE, V);
803 
804  // Inserting an undef or into an undefined place, remove this.
805  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
806  replaceInstUsesWith(IE, VecOp);
807 
808  // If the inserted element was extracted from some other vector, and if the
809  // indexes are constant, try to turn this into a shufflevector operation.
810  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
811  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
812  unsigned NumInsertVectorElts = IE.getType()->getNumElements();
813  unsigned NumExtractVectorElts =
815  unsigned ExtractedIdx =
816  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
817  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
818 
819  if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
820  return replaceInstUsesWith(IE, VecOp);
821 
822  if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
823  return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
824 
825  // If we are extracting a value from a vector, then inserting it right
826  // back into the same place, just use the input vector.
827  if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
828  return replaceInstUsesWith(IE, VecOp);
829 
830  // If this insertelement isn't used by some other insertelement, turn it
831  // (and any insertelements it points to), into one big shuffle.
832  if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
834  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
835 
836  // The proposed shuffle may be trivial, in which case we shouldn't
837  // perform the combine.
838  if (LR.first != &IE && LR.second != &IE) {
839  // We now have a shuffle of LHS, RHS, Mask.
840  if (LR.second == nullptr)
841  LR.second = UndefValue::get(LR.first->getType());
842  return new ShuffleVectorInst(LR.first, LR.second,
843  ConstantVector::get(Mask));
844  }
845  }
846  }
847  }
848 
849  unsigned VWidth = VecOp->getType()->getVectorNumElements();
850  APInt UndefElts(VWidth, 0);
851  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
852  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
853  if (V != &IE)
854  return replaceInstUsesWith(IE, V);
855  return &IE;
856  }
857 
859  return Shuf;
860 
861  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
862  return NewInsElt;
863 
864  // Turn a sequence of inserts that broadcasts a scalar into a single
865  // insert + shufflevector.
866  if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
867  return Broadcast;
868 
869  return nullptr;
870 }
871 
872 /// Return true if we can evaluate the specified expression tree if the vector
873 /// elements were shuffled in a different order.
875  unsigned Depth = 5) {
876  // We can always reorder the elements of a constant.
877  if (isa<Constant>(V))
878  return true;
879 
880  // We won't reorder vector arguments. No IPO here.
882  if (!I) return false;
883 
884  // Two users may expect different orders of the elements. Don't try it.
885  if (!I->hasOneUse())
886  return false;
887 
888  if (Depth == 0) return false;
889 
890  switch (I->getOpcode()) {
891  case Instruction::Add:
892  case Instruction::FAdd:
893  case Instruction::Sub:
894  case Instruction::FSub:
895  case Instruction::Mul:
896  case Instruction::FMul:
897  case Instruction::UDiv:
898  case Instruction::SDiv:
899  case Instruction::FDiv:
900  case Instruction::URem:
901  case Instruction::SRem:
902  case Instruction::FRem:
903  case Instruction::Shl:
904  case Instruction::LShr:
905  case Instruction::AShr:
906  case Instruction::And:
907  case Instruction::Or:
908  case Instruction::Xor:
909  case Instruction::ICmp:
910  case Instruction::FCmp:
911  case Instruction::Trunc:
912  case Instruction::ZExt:
913  case Instruction::SExt:
914  case Instruction::FPToUI:
915  case Instruction::FPToSI:
916  case Instruction::UIToFP:
917  case Instruction::SIToFP:
918  case Instruction::FPTrunc:
919  case Instruction::FPExt:
920  case Instruction::GetElementPtr: {
921  for (Value *Operand : I->operands()) {
922  if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
923  return false;
924  }
925  return true;
926  }
927  case Instruction::InsertElement: {
929  if (!CI) return false;
930  int ElementNumber = CI->getLimitedValue();
931 
932  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
933  // can't put an element into multiple indices.
934  bool SeenOnce = false;
935  for (int i = 0, e = Mask.size(); i != e; ++i) {
936  if (Mask[i] == ElementNumber) {
937  if (SeenOnce)
938  return false;
939  SeenOnce = true;
940  }
941  }
942  return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
943  }
944  }
945  return false;
946 }
947 
948 /// Rebuild a new instruction just like 'I' but with the new operands given.
949 /// In the event of type mismatch, the type of the operands is correct.
951  // We don't want to use the IRBuilder here because we want the replacement
952  // instructions to appear next to 'I', not the builder's insertion point.
953  switch (I->getOpcode()) {
954  case Instruction::Add:
955  case Instruction::FAdd:
956  case Instruction::Sub:
957  case Instruction::FSub:
958  case Instruction::Mul:
959  case Instruction::FMul:
960  case Instruction::UDiv:
961  case Instruction::SDiv:
962  case Instruction::FDiv:
963  case Instruction::URem:
964  case Instruction::SRem:
965  case Instruction::FRem:
966  case Instruction::Shl:
967  case Instruction::LShr:
968  case Instruction::AShr:
969  case Instruction::And:
970  case Instruction::Or:
971  case Instruction::Xor: {
972  BinaryOperator *BO = cast<BinaryOperator>(I);
973  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
974  BinaryOperator *New =
975  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
976  NewOps[0], NewOps[1], "", BO);
977  if (isa<OverflowingBinaryOperator>(BO)) {
978  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
979  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
980  }
981  if (isa<PossiblyExactOperator>(BO)) {
982  New->setIsExact(BO->isExact());
983  }
984  if (isa<FPMathOperator>(BO))
985  New->copyFastMathFlags(I);
986  return New;
987  }
988  case Instruction::ICmp:
989  assert(NewOps.size() == 2 && "icmp with #ops != 2");
990  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
991  NewOps[0], NewOps[1]);
992  case Instruction::FCmp:
993  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
994  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
995  NewOps[0], NewOps[1]);
996  case Instruction::Trunc:
997  case Instruction::ZExt:
998  case Instruction::SExt:
999  case Instruction::FPToUI:
1000  case Instruction::FPToSI:
1001  case Instruction::UIToFP:
1002  case Instruction::SIToFP:
1003  case Instruction::FPTrunc:
1004  case Instruction::FPExt: {
1005  // It's possible that the mask has a different number of elements from
1006  // the original cast. We recompute the destination type to match the mask.
1007  Type *DestTy =
1009  NewOps[0]->getType()->getVectorNumElements());
1010  assert(NewOps.size() == 1 && "cast with #ops != 1");
1011  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1012  "", I);
1013  }
1014  case Instruction::GetElementPtr: {
1015  Value *Ptr = NewOps[0];
1016  ArrayRef<Value*> Idx = NewOps.slice(1);
1018  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1019  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1020  return GEP;
1021  }
1022  }
1023  llvm_unreachable("failed to rebuild vector instructions");
1024 }
1025 
1026 Value *
1027 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1028  // Mask.size() does not need to be equal to the number of vector elements.
1029 
1030  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1031  Type *EltTy = V->getType()->getScalarType();
1032  if (isa<UndefValue>(V))
1033  return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1034 
1035  if (isa<ConstantAggregateZero>(V))
1036  return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1037 
1038  if (Constant *C = dyn_cast<Constant>(V)) {
1039  SmallVector<Constant *, 16> MaskValues;
1040  for (int i = 0, e = Mask.size(); i != e; ++i) {
1041  if (Mask[i] == -1)
1042  MaskValues.push_back(UndefValue::get(Builder.getInt32Ty()));
1043  else
1044  MaskValues.push_back(Builder.getInt32(Mask[i]));
1045  }
1047  ConstantVector::get(MaskValues));
1048  }
1049 
1050  Instruction *I = cast<Instruction>(V);
1051  switch (I->getOpcode()) {
1052  case Instruction::Add:
1053  case Instruction::FAdd:
1054  case Instruction::Sub:
1055  case Instruction::FSub:
1056  case Instruction::Mul:
1057  case Instruction::FMul:
1058  case Instruction::UDiv:
1059  case Instruction::SDiv:
1060  case Instruction::FDiv:
1061  case Instruction::URem:
1062  case Instruction::SRem:
1063  case Instruction::FRem:
1064  case Instruction::Shl:
1065  case Instruction::LShr:
1066  case Instruction::AShr:
1067  case Instruction::And:
1068  case Instruction::Or:
1069  case Instruction::Xor:
1070  case Instruction::ICmp:
1071  case Instruction::FCmp:
1072  case Instruction::Trunc:
1073  case Instruction::ZExt:
1074  case Instruction::SExt:
1075  case Instruction::FPToUI:
1076  case Instruction::FPToSI:
1077  case Instruction::UIToFP:
1078  case Instruction::SIToFP:
1079  case Instruction::FPTrunc:
1080  case Instruction::FPExt:
1081  case Instruction::Select:
1082  case Instruction::GetElementPtr: {
1083  SmallVector<Value*, 8> NewOps;
1084  bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1085  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1086  Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
1087  NewOps.push_back(V);
1088  NeedsRebuild |= (V != I->getOperand(i));
1089  }
1090  if (NeedsRebuild) {
1091  return buildNew(I, NewOps);
1092  }
1093  return I;
1094  }
1095  case Instruction::InsertElement: {
1096  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1097 
1098  // The insertelement was inserting at Element. Figure out which element
1099  // that becomes after shuffling. The answer is guaranteed to be unique
1100  // by CanEvaluateShuffled.
1101  bool Found = false;
1102  int Index = 0;
1103  for (int e = Mask.size(); Index != e; ++Index) {
1104  if (Mask[Index] == Element) {
1105  Found = true;
1106  break;
1107  }
1108  }
1109 
1110  // If element is not in Mask, no need to handle the operand 1 (element to
1111  // be inserted). Just evaluate values in operand 0 according to Mask.
1112  if (!Found)
1113  return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
1114 
1115  Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
1116  return InsertElementInst::Create(V, I->getOperand(1),
1117  Builder.getInt32(Index), "", I);
1118  }
1119  }
1120  llvm_unreachable("failed to reorder elements of vector instruction!");
1121 }
1122 
1124  bool &isLHSID, bool &isRHSID) {
1125  isLHSID = isRHSID = true;
1126 
1127  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1128  if (Mask[i] < 0) continue; // Ignore undef values.
1129  // Is this an identity shuffle of the LHS value?
1130  isLHSID &= (Mask[i] == (int)i);
1131 
1132  // Is this an identity shuffle of the RHS value?
1133  isRHSID &= (Mask[i]-e == i);
1134  }
1135 }
1136 
1137 // Returns true if the shuffle is extracting a contiguous range of values from
1138 // LHS, for example:
1139 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1140 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1141 // Shuffles to: |EE|FF|GG|HH|
1142 // +--+--+--+--+
1144  SmallVector<int, 16> &Mask) {
1145  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1146  unsigned MaskElems = Mask.size();
1147  unsigned BegIdx = Mask.front();
1148  unsigned EndIdx = Mask.back();
1149  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1150  return false;
1151  for (unsigned I = 0; I != MaskElems; ++I)
1152  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1153  return false;
1154  return true;
1155 }
1156 
1157 /// These are the ingredients in an alternate form binary operator as described
1158 /// below.
1159 struct BinopElts {
1164  Value *V0 = nullptr, Value *V1 = nullptr) :
1165  Opcode(Opc), Op0(V0), Op1(V1) {}
1166  operator bool() const { return Opcode != 0; }
1167 };
1168 
1169 /// Binops may be transformed into binops with different opcodes and operands.
1170 /// Reverse the usual canonicalization to enable folds with the non-canonical
1171 /// form of the binop. If a transform is possible, return the elements of the
1172 /// new binop. If not, return invalid elements.
1174  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1175  Type *Ty = BO->getType();
1176  switch (BO->getOpcode()) {
1177  case Instruction::Shl: {
1178  // shl X, C --> mul X, (1 << C)
1179  Constant *C;
1180  if (match(BO1, m_Constant(C))) {
1181  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1182  return { Instruction::Mul, BO0, ShlOne };
1183  }
1184  break;
1185  }
1186  case Instruction::Or: {
1187  // or X, C --> add X, C (when X and C have no common bits set)
1188  const APInt *C;
1189  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1190  return { Instruction::Add, BO0, BO1 };
1191  break;
1192  }
1193  default:
1194  break;
1195  }
1196  return {};
1197 }
1198 
1200  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1201 
1202  // Are we shuffling together some value and that same value after it has been
1203  // modified by a binop with a constant?
1204  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1205  Constant *C;
1206  bool Op0IsBinop;
1207  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1208  Op0IsBinop = true;
1209  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1210  Op0IsBinop = false;
1211  else
1212  return nullptr;
1213 
1214  // The identity constant for a binop leaves a variable operand unchanged. For
1215  // a vector, this is a splat of something like 0, -1, or 1.
1216  // If there's no identity constant for this binop, we're done.
1217  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1218  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1219  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1220  if (!IdC)
1221  return nullptr;
1222 
1223  // Shuffle identity constants into the lanes that return the original value.
1224  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1225  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1226  // The existing binop constant vector remains in the same operand position.
1227  Constant *Mask = Shuf.getMask();
1228  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1229  ConstantExpr::getShuffleVector(IdC, C, Mask);
1230 
1231  bool MightCreatePoisonOrUB =
1232  Mask->containsUndefElement() &&
1233  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1234  if (MightCreatePoisonOrUB)
1235  NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1236 
1237  // shuf (bop X, C), X, M --> bop X, C'
1238  // shuf X, (bop X, C), M --> bop X, C'
1239  Value *X = Op0IsBinop ? Op1 : Op0;
1240  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1241  NewBO->copyIRFlags(BO);
1242 
1243  // An undef shuffle mask element may propagate as an undef constant element in
1244  // the new binop. That would produce poison where the original code might not.
1245  // If we already made a safe constant, then there's no danger.
1246  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1247  NewBO->dropPoisonGeneratingFlags();
1248  return NewBO;
1249 }
1250 
1251 /// Try to fold shuffles that are the equivalent of a vector select.
1253  InstCombiner::BuilderTy &Builder,
1254  const DataLayout &DL) {
1255  if (!Shuf.isSelect())
1256  return nullptr;
1257 
1259  return I;
1260 
1261  BinaryOperator *B0, *B1;
1262  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1263  !match(Shuf.getOperand(1), m_BinOp(B1)))
1264  return nullptr;
1265 
1266  Value *X, *Y;
1267  Constant *C0, *C1;
1268  bool ConstantsAreOp1;
1269  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1270  match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1271  ConstantsAreOp1 = true;
1272  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1273  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1274  ConstantsAreOp1 = false;
1275  else
1276  return nullptr;
1277 
1278  // We need matching binops to fold the lanes together.
1279  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1280  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1281  bool DropNSW = false;
1282  if (ConstantsAreOp1 && Opc0 != Opc1) {
1283  // TODO: We drop "nsw" if shift is converted into multiply because it may
1284  // not be correct when the shift amount is BitWidth - 1. We could examine
1285  // each vector element to determine if it is safe to keep that flag.
1286  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1287  DropNSW = true;
1288  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1289  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1290  Opc0 = AltB0.Opcode;
1291  C0 = cast<Constant>(AltB0.Op1);
1292  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1293  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1294  Opc1 = AltB1.Opcode;
1295  C1 = cast<Constant>(AltB1.Op1);
1296  }
1297  }
1298 
1299  if (Opc0 != Opc1)
1300  return nullptr;
1301 
1302  // The opcodes must be the same. Use a new name to make that clear.
1303  BinaryOperator::BinaryOps BOpc = Opc0;
1304 
1305  // Select the constant elements needed for the single binop.
1306  Constant *Mask = Shuf.getMask();
1307  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1308 
1309  // We are moving a binop after a shuffle. When a shuffle has an undefined
1310  // mask element, the result is undefined, but it is not poison or undefined
1311  // behavior. That is not necessarily true for div/rem/shift.
1312  bool MightCreatePoisonOrUB =
1313  Mask->containsUndefElement() &&
1315  if (MightCreatePoisonOrUB)
1316  NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1317 
1318  Value *V;
1319  if (X == Y) {
1320  // Remove a binop and the shuffle by rearranging the constant:
1321  // shuffle (op V, C0), (op V, C1), M --> op V, C'
1322  // shuffle (op C0, V), (op C1, V), M --> op C', V
1323  V = X;
1324  } else {
1325  // If there are 2 different variable operands, we must create a new shuffle
1326  // (select) first, so check uses to ensure that we don't end up with more
1327  // instructions than we started with.
1328  if (!B0->hasOneUse() && !B1->hasOneUse())
1329  return nullptr;
1330 
1331  // If we use the original shuffle mask and op1 is *variable*, we would be
1332  // putting an undef into operand 1 of div/rem/shift. This is either UB or
1333  // poison. We do not have to guard against UB when *constants* are op1
1334  // because safe constants guarantee that we do not overflow sdiv/srem (and
1335  // there's no danger for other opcodes).
1336  // TODO: To allow this case, create a new shuffle mask with no undefs.
1337  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1338  return nullptr;
1339 
1340  // Note: In general, we do not create new shuffles in InstCombine because we
1341  // do not know if a target can lower an arbitrary shuffle optimally. In this
1342  // case, the shuffle uses the existing mask, so there is no additional risk.
1343 
1344  // Select the variable vectors first, then perform the binop:
1345  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1346  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1347  V = Builder.CreateShuffleVector(X, Y, Mask);
1348  }
1349 
1350  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1351  BinaryOperator::Create(BOpc, NewC, V);
1352 
1353  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1354  // 1. If we changed an opcode, poison conditions might have changed.
1355  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1356  // where the original code did not. But if we already made a safe constant,
1357  // then there's no danger.
1358  NewBO->copyIRFlags(B0);
1359  NewBO->andIRFlags(B1);
1360  if (DropNSW)
1361  NewBO->setHasNoSignedWrap(false);
1362  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1363  NewBO->dropPoisonGeneratingFlags();
1364  return NewBO;
1365 }
1366 
1367 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1368 /// narrowing (concatenating with undef and extracting back to the original
1369 /// length). This allows replacing the wide select with a narrow select.
1371  InstCombiner::BuilderTy &Builder) {
1372  // This must be a narrowing identity shuffle. It extracts the 1st N elements
1373  // of the 1st vector operand of a shuffle.
1374  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1375  return nullptr;
1376 
1377  // The vector being shuffled must be a vector select that we can eliminate.
1378  // TODO: The one-use requirement could be eased if X and/or Y are constants.
1379  Value *Cond, *X, *Y;
1380  if (!match(Shuf.getOperand(0),
1381  m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1382  return nullptr;
1383 
1384  // We need a narrow condition value. It must be extended with undef elements
1385  // and have the same number of elements as this shuffle.
1386  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1387  Value *NarrowCond;
1388  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1389  m_Constant()))) ||
1390  NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1391  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1392  return nullptr;
1393 
1394  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1395  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1397  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1398  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1399  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1400 }
1401 
1403  Value *LHS = SVI.getOperand(0);
1404  Value *RHS = SVI.getOperand(1);
1405  if (auto *V = SimplifyShuffleVectorInst(
1406  LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1407  return replaceInstUsesWith(SVI, V);
1408 
1409  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1410  return I;
1411 
1412  if (Instruction *I = narrowVectorSelect(SVI, Builder))
1413  return I;
1414 
1415  unsigned VWidth = SVI.getType()->getVectorNumElements();
1416  APInt UndefElts(VWidth, 0);
1417  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1418  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1419  if (V != &SVI)
1420  return replaceInstUsesWith(SVI, V);
1421  return &SVI;
1422  }
1423 
1424  SmallVector<int, 16> Mask = SVI.getShuffleMask();
1426  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1427  bool MadeChange = false;
1428 
1429  // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
1430  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1431  if (LHS == RHS || isa<UndefValue>(LHS)) {
1432  // Remap any references to RHS to use LHS.
1434  for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
1435  if (Mask[i] < 0) {
1436  Elts.push_back(UndefValue::get(Int32Ty));
1437  continue;
1438  }
1439 
1440  if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
1441  (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
1442  Mask[i] = -1; // Turn into undef.
1443  Elts.push_back(UndefValue::get(Int32Ty));
1444  } else {
1445  Mask[i] = Mask[i] % e; // Force to LHS.
1446  Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1447  }
1448  }
1449  SVI.setOperand(0, SVI.getOperand(1));
1450  SVI.setOperand(1, UndefValue::get(RHS->getType()));
1451  SVI.setOperand(2, ConstantVector::get(Elts));
1452  LHS = SVI.getOperand(0);
1453  RHS = SVI.getOperand(1);
1454  MadeChange = true;
1455  }
1456 
1457  if (VWidth == LHSWidth) {
1458  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1459  bool isLHSID, isRHSID;
1460  recognizeIdentityMask(Mask, isLHSID, isRHSID);
1461 
1462  // Eliminate identity shuffles.
1463  if (isLHSID) return replaceInstUsesWith(SVI, LHS);
1464  if (isRHSID) return replaceInstUsesWith(SVI, RHS);
1465  }
1466 
1467  if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
1468  Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
1469  return replaceInstUsesWith(SVI, V);
1470  }
1471 
1472  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1473  // a non-vector type. We can instead bitcast the original vector followed by
1474  // an extract of the desired element:
1475  //
1476  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1477  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1478  // %1 = bitcast <4 x i8> %sroa to i32
1479  // Becomes:
1480  // %bc = bitcast <16 x i8> %in to <4 x i32>
1481  // %ext = extractelement <4 x i32> %bc, i32 0
1482  //
1483  // If the shuffle is extracting a contiguous range of values from the input
1484  // vector then each use which is a bitcast of the extracted size can be
1485  // replaced. This will work if the vector types are compatible, and the begin
1486  // index is aligned to a value in the casted vector type. If the begin index
1487  // isn't aligned then we can shuffle the original vector (keeping the same
1488  // vector type) before extracting.
1489  //
1490  // This code will bail out if the target type is fundamentally incompatible
1491  // with vectors of the source type.
1492  //
1493  // Example of <16 x i8>, target type i32:
1494  // Index range [4,8): v-----------v Will work.
1495  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1496  // <16 x i8>: | | | | | | | | | | | | | | | | |
1497  // <4 x i32>: | | | | |
1498  // +-----------+-----------+-----------+-----------+
1499  // Index range [6,10): ^-----------^ Needs an extra shuffle.
1500  // Target type i40: ^--------------^ Won't work, bail.
1501  if (isShuffleExtractingFromLHS(SVI, Mask)) {
1502  Value *V = LHS;
1503  unsigned MaskElems = Mask.size();
1504  VectorType *SrcTy = cast<VectorType>(V->getType());
1505  unsigned VecBitWidth = SrcTy->getBitWidth();
1506  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1507  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
1508  unsigned SrcNumElems = SrcTy->getNumElements();
1511  for (User *U : SVI.users())
1512  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
1513  if (!BC->use_empty())
1514  // Only visit bitcasts that weren't previously handled.
1515  BCs.push_back(BC);
1516  for (BitCastInst *BC : BCs) {
1517  unsigned BegIdx = Mask.front();
1518  Type *TgtTy = BC->getDestTy();
1519  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
1520  if (!TgtElemBitWidth)
1521  continue;
1522  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
1523  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
1524  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
1525  if (!VecBitWidthsEqual)
1526  continue;
1527  if (!VectorType::isValidElementType(TgtTy))
1528  continue;
1529  VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
1530  if (!BegIsAligned) {
1531  // Shuffle the input so [0,NumElements) contains the output, and
1532  // [NumElems,SrcNumElems) is undef.
1533  SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
1534  UndefValue::get(Int32Ty));
1535  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
1536  ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
1537  V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
1538  ConstantVector::get(ShuffleMask),
1539  SVI.getName() + ".extract");
1540  BegIdx = 0;
1541  }
1542  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
1543  assert(SrcElemsPerTgtElem);
1544  BegIdx /= SrcElemsPerTgtElem;
1545  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
1546  auto *NewBC =
1547  BCAlreadyExists
1548  ? NewBCs[CastSrcTy]
1549  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
1550  if (!BCAlreadyExists)
1551  NewBCs[CastSrcTy] = NewBC;
1552  auto *Ext = Builder.CreateExtractElement(
1553  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
1554  // The shufflevector isn't being replaced: the bitcast that used it
1555  // is. InstCombine will visit the newly-created instructions.
1556  replaceInstUsesWith(*BC, Ext);
1557  MadeChange = true;
1558  }
1559  }
1560 
1561  // If the LHS is a shufflevector itself, see if we can combine it with this
1562  // one without producing an unusual shuffle.
1563  // Cases that might be simplified:
1564  // 1.
1565  // x1=shuffle(v1,v2,mask1)
1566  // x=shuffle(x1,undef,mask)
1567  // ==>
1568  // x=shuffle(v1,undef,newMask)
1569  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
1570  // 2.
1571  // x1=shuffle(v1,undef,mask1)
1572  // x=shuffle(x1,x2,mask)
1573  // where v1.size() == mask1.size()
1574  // ==>
1575  // x=shuffle(v1,x2,newMask)
1576  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
1577  // 3.
1578  // x2=shuffle(v2,undef,mask2)
1579  // x=shuffle(x1,x2,mask)
1580  // where v2.size() == mask2.size()
1581  // ==>
1582  // x=shuffle(x1,v2,newMask)
1583  // newMask[i] = (mask[i] < x1.size())
1584  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
1585  // 4.
1586  // x1=shuffle(v1,undef,mask1)
1587  // x2=shuffle(v2,undef,mask2)
1588  // x=shuffle(x1,x2,mask)
1589  // where v1.size() == v2.size()
1590  // ==>
1591  // x=shuffle(v1,v2,newMask)
1592  // newMask[i] = (mask[i] < x1.size())
1593  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
1594  //
1595  // Here we are really conservative:
1596  // we are absolutely afraid of producing a shuffle mask not in the input
1597  // program, because the code gen may not be smart enough to turn a merged
1598  // shuffle into two specific shuffles: it may produce worse code. As such,
1599  // we only merge two shuffles if the result is either a splat or one of the
1600  // input shuffle masks. In this case, merging the shuffles just removes
1601  // one instruction, which we know is safe. This is good for things like
1602  // turning: (splat(splat)) -> splat, or
1603  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
1604  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
1605  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
1606  if (LHSShuffle)
1607  if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
1608  LHSShuffle = nullptr;
1609  if (RHSShuffle)
1610  if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
1611  RHSShuffle = nullptr;
1612  if (!LHSShuffle && !RHSShuffle)
1613  return MadeChange ? &SVI : nullptr;
1614 
1615  Value* LHSOp0 = nullptr;
1616  Value* LHSOp1 = nullptr;
1617  Value* RHSOp0 = nullptr;
1618  unsigned LHSOp0Width = 0;
1619  unsigned RHSOp0Width = 0;
1620  if (LHSShuffle) {
1621  LHSOp0 = LHSShuffle->getOperand(0);
1622  LHSOp1 = LHSShuffle->getOperand(1);
1623  LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
1624  }
1625  if (RHSShuffle) {
1626  RHSOp0 = RHSShuffle->getOperand(0);
1627  RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
1628  }
1629  Value* newLHS = LHS;
1630  Value* newRHS = RHS;
1631  if (LHSShuffle) {
1632  // case 1
1633  if (isa<UndefValue>(RHS)) {
1634  newLHS = LHSOp0;
1635  newRHS = LHSOp1;
1636  }
1637  // case 2 or 4
1638  else if (LHSOp0Width == LHSWidth) {
1639  newLHS = LHSOp0;
1640  }
1641  }
1642  // case 3 or 4
1643  if (RHSShuffle && RHSOp0Width == LHSWidth) {
1644  newRHS = RHSOp0;
1645  }
1646  // case 4
1647  if (LHSOp0 == RHSOp0) {
1648  newLHS = LHSOp0;
1649  newRHS = nullptr;
1650  }
1651 
1652  if (newLHS == LHS && newRHS == RHS)
1653  return MadeChange ? &SVI : nullptr;
1654 
1655  SmallVector<int, 16> LHSMask;
1656  SmallVector<int, 16> RHSMask;
1657  if (newLHS != LHS)
1658  LHSMask = LHSShuffle->getShuffleMask();
1659  if (RHSShuffle && newRHS != RHS)
1660  RHSMask = RHSShuffle->getShuffleMask();
1661 
1662  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
1663  SmallVector<int, 16> newMask;
1664  bool isSplat = true;
1665  int SplatElt = -1;
1666  // Create a new mask for the new ShuffleVectorInst so that the new
1667  // ShuffleVectorInst is equivalent to the original one.
1668  for (unsigned i = 0; i < VWidth; ++i) {
1669  int eltMask;
1670  if (Mask[i] < 0) {
1671  // This element is an undef value.
1672  eltMask = -1;
1673  } else if (Mask[i] < (int)LHSWidth) {
1674  // This element is from left hand side vector operand.
1675  //
1676  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
1677  // new mask value for the element.
1678  if (newLHS != LHS) {
1679  eltMask = LHSMask[Mask[i]];
1680  // If the value selected is an undef value, explicitly specify it
1681  // with a -1 mask value.
1682  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
1683  eltMask = -1;
1684  } else
1685  eltMask = Mask[i];
1686  } else {
1687  // This element is from right hand side vector operand
1688  //
1689  // If the value selected is an undef value, explicitly specify it
1690  // with a -1 mask value. (case 1)
1691  if (isa<UndefValue>(RHS))
1692  eltMask = -1;
1693  // If RHS is going to be replaced (case 3 or 4), calculate the
1694  // new mask value for the element.
1695  else if (newRHS != RHS) {
1696  eltMask = RHSMask[Mask[i]-LHSWidth];
1697  // If the value selected is an undef value, explicitly specify it
1698  // with a -1 mask value.
1699  if (eltMask >= (int)RHSOp0Width) {
1700  assert(isa<UndefValue>(RHSShuffle->getOperand(1))
1701  && "should have been check above");
1702  eltMask = -1;
1703  }
1704  } else
1705  eltMask = Mask[i]-LHSWidth;
1706 
1707  // If LHS's width is changed, shift the mask value accordingly.
1708  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
1709  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
1710  // If newRHS == newLHS, we want to remap any references from newRHS to
1711  // newLHS so that we can properly identify splats that may occur due to
1712  // obfuscation across the two vectors.
1713  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
1714  eltMask += newLHSWidth;
1715  }
1716 
1717  // Check if this could still be a splat.
1718  if (eltMask >= 0) {
1719  if (SplatElt >= 0 && SplatElt != eltMask)
1720  isSplat = false;
1721  SplatElt = eltMask;
1722  }
1723 
1724  newMask.push_back(eltMask);
1725  }
1726 
1727  // If the result mask is equal to one of the original shuffle masks,
1728  // or is a splat, do the replacement.
1729  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
1731  for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
1732  if (newMask[i] < 0) {
1733  Elts.push_back(UndefValue::get(Int32Ty));
1734  } else {
1735  Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
1736  }
1737  }
1738  if (!newRHS)
1739  newRHS = UndefValue::get(newLHS->getType());
1740  return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
1741  }
1742 
1743  // If the result mask is an identity, replace uses of this instruction with
1744  // corresponding argument.
1745  bool isLHSID, isRHSID;
1746  recognizeIdentityMask(newMask, isLHSID, isRHSID);
1747  if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
1748  if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
1749 
1750  return MadeChange ? &SVI : nullptr;
1751 }
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
uint64_t CallInst * C
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:675
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:88
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isConstant(const MachineInstr &MI)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:355
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex –> shufflevector X...
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1309
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:867
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, SmallVector< int, 16 > &Mask)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Constant * getMask() const
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
unsigned getBitWidth() const
Return the number of bits in the Vector type.
Definition: DerivedTypes.h:452
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1397
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
The core instruction combiner logic.
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if &#39;V & Mask&#39; is known to be zero.
static bool CanEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
This file implements a class to represent arbitrary precision integral constant values and operations...
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:423
static Instruction * foldInsSequenceIntoBroadcast(InsertElementInst &InsElt)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
This instruction compares its operands according to the predicate given to the constructor.
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
This class represents a no-op cast from one type to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Instruction * visitExtractElementInst(ExtractElementInst &EI)
These are the ingredients in an alternate form binary operator as described below.
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< Constant *> &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
VectorType * getType() const
Overload to return most specific vector type.
Value * getOperand(unsigned i) const
Definition: User.h:170
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:338
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
Instruction * visitInsertElementInst(InsertElementInst &IE)
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
This instruction inserts a single (scalar) element into a VectorType value.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt...
Definition: PatternMatch.h:177
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
static Constant * getShuffleVector(Constant *V1, Constant *V2, Constant *Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2096
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1049
This instruction compares its operands according to the predicate given to the constructor.
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2274
op_range operands()
Definition: User.h:238
self_iterator getIterator()
Definition: ilist_node.h:82
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:75
Class to represent integer types.
Definition: DerivedTypes.h:40
VectorType * getVectorOperandType() const
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombiner &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
size_t size() const
Definition: SmallVector.h:53
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool isExact() const
Determine whether the exact flag is set.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag...
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:88
Iterator for intrusive lists based on ilist_node.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
BinaryOperator::BinaryOps Opcode
bool containsUndefElement() const
Return true if this is a vector constant that includes any undefined elements.
Definition: Constants.cpp:257
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1934
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1948
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:70
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder)
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:179
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, BinaryOperator *CopyBO, const Twine &Name="")
Definition: InstrTypes.h:186
static bool cheapToScalarize(Value *V, bool isConstant)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:80
#define I(x, y, z)
Definition: MD5.cpp:58
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2257
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
ArrayRef< unsigned > getIndices() const
LLVM Value Representation.
Definition: Value.h:73
This file provides internal interfaces used to implement the InstCombine.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
static Value * buildNew(Instruction *I, ArrayRef< Value *> NewOps)
Rebuild a new instruction just like &#39;I&#39; but with the new operands given.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Type * getElementType() const
Definition: DerivedTypes.h:360
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
bool isIntDivRem() const
Definition: Instruction.h:131
static bool isSplat(ArrayRef< Value *> VL)
static void recognizeIdentityMask(const SmallVectorImpl< int > &Mask, bool &isLHSID, bool &isRHSID)
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< Constant *> &Mask, Value *PermittedRHS, InstCombiner &IC)
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1056
IntegerType * Int32Ty
User * user_back()
Definition: Value.h:386
const BasicBlock * getParent() const
Definition: Instruction.h:67
This instruction inserts a struct field of array element value into an aggregate value.