LLVM 19.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
177 addToWorklist(E);
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if (!ShiftAmountC ||
209 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210 Ext.getVectorOperand()->hasOneUse())) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 bool HasKnownValidIndex = false;
423 if (IndexC) {
424 // Canonicalize type of constant indices to i64 to simplify CSE
425 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426 return replaceOperand(EI, 1, NewIdx);
427
429 unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
431
432 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
433 Intrinsic::ID IID = II->getIntrinsicID();
434 // Index needs to be lower than the minimum size of the vector, because
435 // for scalable vector, the vector size is known at run time.
436 if (IID == Intrinsic::experimental_stepvector &&
437 IndexC->getValue().ult(NumElts)) {
438 Type *Ty = EI.getType();
439 unsigned BitWidth = Ty->getIntegerBitWidth();
440 Value *Idx;
441 // Return index when its value does not exceed the allowed limit
442 // for the element type of the vector, otherwise return undefined.
443 if (IndexC->getValue().getActiveBits() <= BitWidth)
444 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
445 else
446 Idx = PoisonValue::get(Ty);
447 return replaceInstUsesWith(EI, Idx);
448 }
449 }
450
451 // InstSimplify should handle cases where the index is invalid.
452 // For fixed-length vector, it's invalid to extract out-of-range element.
453 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
454 return nullptr;
455
456 if (Instruction *I = foldBitcastExtElt(EI))
457 return I;
458
459 // If there's a vector PHI feeding a scalar use through this extractelement
460 // instruction, try to scalarize the PHI.
461 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
462 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
463 return ScalarPHI;
464 }
465
466 // TODO come up with a n-ary matcher that subsumes both unary and
467 // binary matchers.
468 UnaryOperator *UO;
469 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
470 // extelt (unop X), Index --> unop (extelt X, Index)
471 Value *X = UO->getOperand(0);
474 }
475
476 // If the binop is not speculatable, we cannot hoist the extractelement if
477 // it may make the operand poison.
478 BinaryOperator *BO;
479 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
480 (HasKnownValidIndex || isSafeToSpeculativelyExecute(BO))) {
481 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
482 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
485 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
486 }
487
488 Value *X, *Y;
490 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
491 cheapToScalarize(SrcVec, Index)) {
492 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
495 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
496 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
497 SrcCmpInst);
498 }
499
500 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
501 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
502 // instsimplify already handled the case where the indices are constants
503 // and equal by value, if both are constants, they must not be the same
504 // value, extract from the pre-inserted value instead.
505 if (isa<Constant>(IE->getOperand(2)) && IndexC)
506 return replaceOperand(EI, 0, IE->getOperand(0));
507 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
508 auto *VecType = cast<VectorType>(GEP->getType());
509 ElementCount EC = VecType->getElementCount();
510 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
511 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
512 // Find out why we have a vector result - these are a few examples:
513 // 1. We have a scalar pointer and a vector of indices, or
514 // 2. We have a vector of pointers and a scalar index, or
515 // 3. We have a vector of pointers and a vector of indices, etc.
516 // Here we only consider combining when there is exactly one vector
517 // operand, since the optimization is less obviously a win due to
518 // needing more than one extractelements.
519
520 unsigned VectorOps =
521 llvm::count_if(GEP->operands(), [](const Value *V) {
522 return isa<VectorType>(V->getType());
523 });
524 if (VectorOps == 1) {
525 Value *NewPtr = GEP->getPointerOperand();
526 if (isa<VectorType>(NewPtr->getType()))
527 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
528
530 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
531 Value *Op = GEP->getOperand(I);
532 if (isa<VectorType>(Op->getType()))
533 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
534 else
535 NewOps.push_back(Op);
536 }
537
539 GEP->getSourceElementType(), NewPtr, NewOps);
540 NewGEP->setIsInBounds(GEP->isInBounds());
541 return NewGEP;
542 }
543 }
544 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
545 // If this is extracting an element from a shufflevector, figure out where
546 // it came from and extract from the appropriate input element instead.
547 // Restrict the following transformation to fixed-length vector.
548 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
549 int SrcIdx =
550 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
551 Value *Src;
552 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
553 ->getNumElements();
554
555 if (SrcIdx < 0)
557 if (SrcIdx < (int)LHSWidth)
558 Src = SVI->getOperand(0);
559 else {
560 SrcIdx -= LHSWidth;
561 Src = SVI->getOperand(1);
562 }
563 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
565 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
566 }
567 } else if (auto *CI = dyn_cast<CastInst>(I)) {
568 // Canonicalize extractelement(cast) -> cast(extractelement).
569 // Bitcasts can change the number of vector elements, and they cost
570 // nothing.
571 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
572 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
573 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
574 }
575 }
576 }
577
578 // Run demanded elements after other transforms as this can drop flags on
579 // binops. If there's two paths to the same final result, we prefer the
580 // one which doesn't force us to drop flags.
581 if (IndexC) {
583 unsigned NumElts = EC.getKnownMinValue();
584 // This instruction only demands the single element from the input vector.
585 // Skip for scalable type, the number of elements is unknown at
586 // compile-time.
587 if (!EC.isScalable() && NumElts != 1) {
588 // If the input vector has a single use, simplify it based on this use
589 // property.
590 if (SrcVec->hasOneUse()) {
591 APInt PoisonElts(NumElts, 0);
592 APInt DemandedElts(NumElts, 0);
593 DemandedElts.setBit(IndexC->getZExtValue());
594 if (Value *V =
595 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
596 return replaceOperand(EI, 0, V);
597 } else {
598 // If the input vector has multiple uses, simplify it based on a union
599 // of all elements used.
600 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
601 if (!DemandedElts.isAllOnes()) {
602 APInt PoisonElts(NumElts, 0);
604 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
605 true /* AllowMultipleUsers */)) {
606 if (V != SrcVec) {
607 Worklist.addValue(SrcVec);
608 SrcVec->replaceAllUsesWith(V);
609 return &EI;
610 }
611 }
612 }
613 }
614 }
615 }
616 return nullptr;
617}
618
619/// If V is a shuffle of values that ONLY returns elements from either LHS or
620/// RHS, return the shuffle mask and true. Otherwise, return false.
622 SmallVectorImpl<int> &Mask) {
623 assert(LHS->getType() == RHS->getType() &&
624 "Invalid CollectSingleShuffleElements");
625 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
626
627 if (match(V, m_Poison())) {
628 Mask.assign(NumElts, -1);
629 return true;
630 }
631
632 if (V == LHS) {
633 for (unsigned i = 0; i != NumElts; ++i)
634 Mask.push_back(i);
635 return true;
636 }
637
638 if (V == RHS) {
639 for (unsigned i = 0; i != NumElts; ++i)
640 Mask.push_back(i + NumElts);
641 return true;
642 }
643
644 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
645 // If this is an insert of an extract from some other vector, include it.
646 Value *VecOp = IEI->getOperand(0);
647 Value *ScalarOp = IEI->getOperand(1);
648 Value *IdxOp = IEI->getOperand(2);
649
650 if (!isa<ConstantInt>(IdxOp))
651 return false;
652 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
653
654 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
655 // We can handle this if the vector we are inserting into is
656 // transitively ok.
657 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
658 // If so, update the mask to reflect the inserted poison.
659 Mask[InsertedIdx] = -1;
660 return true;
661 }
662 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
663 if (isa<ConstantInt>(EI->getOperand(1))) {
664 unsigned ExtractedIdx =
665 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
666 unsigned NumLHSElts =
667 cast<FixedVectorType>(LHS->getType())->getNumElements();
668
669 // This must be extracting from either LHS or RHS.
670 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
671 // We can handle this if the vector we are inserting into is
672 // transitively ok.
673 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
674 // If so, update the mask to reflect the inserted value.
675 if (EI->getOperand(0) == LHS) {
676 Mask[InsertedIdx % NumElts] = ExtractedIdx;
677 } else {
678 assert(EI->getOperand(0) == RHS);
679 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
680 }
681 return true;
682 }
683 }
684 }
685 }
686 }
687
688 return false;
689}
690
691/// If we have insertion into a vector that is wider than the vector that we
692/// are extracting from, try to widen the source vector to allow a single
693/// shufflevector to replace one or more insert/extract pairs.
695 ExtractElementInst *ExtElt,
696 InstCombinerImpl &IC) {
697 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
698 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
699 unsigned NumInsElts = InsVecType->getNumElements();
700 unsigned NumExtElts = ExtVecType->getNumElements();
701
702 // The inserted-to vector must be wider than the extracted-from vector.
703 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
704 NumExtElts >= NumInsElts)
705 return false;
706
707 // Create a shuffle mask to widen the extended-from vector using poison
708 // values. The mask selects all of the values of the original vector followed
709 // by as many poison values as needed to create a vector of the same length
710 // as the inserted-to vector.
711 SmallVector<int, 16> ExtendMask;
712 for (unsigned i = 0; i < NumExtElts; ++i)
713 ExtendMask.push_back(i);
714 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
715 ExtendMask.push_back(-1);
716
717 Value *ExtVecOp = ExtElt->getVectorOperand();
718 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
719 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
720 ? ExtVecOpInst->getParent()
721 : ExtElt->getParent();
722
723 // TODO: This restriction matches the basic block check below when creating
724 // new extractelement instructions. If that limitation is removed, this one
725 // could also be removed. But for now, we just bail out to ensure that we
726 // will replace the extractelement instruction that is feeding our
727 // insertelement instruction. This allows the insertelement to then be
728 // replaced by a shufflevector. If the insertelement is not replaced, we can
729 // induce infinite looping because there's an optimization for extractelement
730 // that will delete our widening shuffle. This would trigger another attempt
731 // here to create that shuffle, and we spin forever.
732 if (InsertionBlock != InsElt->getParent())
733 return false;
734
735 // TODO: This restriction matches the check in visitInsertElementInst() and
736 // prevents an infinite loop caused by not turning the extract/insert pair
737 // into a shuffle. We really should not need either check, but we're lacking
738 // folds for shufflevectors because we're afraid to generate shuffle masks
739 // that the backend can't handle.
740 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
741 return false;
742
743 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
744
745 // Insert the new shuffle after the vector operand of the extract is defined
746 // (as long as it's not a PHI) or at the start of the basic block of the
747 // extract, so any subsequent extracts in the same basic block can use it.
748 // TODO: Insert before the earliest ExtractElementInst that is replaced.
749 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
750 WideVec->insertAfter(ExtVecOpInst);
751 else
752 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
753
754 // Replace extracts from the original narrow vector with extracts from the new
755 // wide vector.
756 for (User *U : ExtVecOp->users()) {
757 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
758 if (!OldExt || OldExt->getParent() != WideVec->getParent())
759 continue;
760 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
761 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
762 IC.replaceInstUsesWith(*OldExt, NewExt);
763 // Add the old extracts to the worklist for DCE. We can't remove the
764 // extracts directly, because they may still be used by the calling code.
765 IC.addToWorklist(OldExt);
766 }
767
768 return true;
769}
770
771/// We are building a shuffle to create V, which is a sequence of insertelement,
772/// extractelement pairs. If PermittedRHS is set, then we must either use it or
773/// not rely on the second vector source. Return a std::pair containing the
774/// left and right vectors of the proposed shuffle (or 0), and set the Mask
775/// parameter as required.
776///
777/// Note: we intentionally don't try to fold earlier shuffles since they have
778/// often been chosen carefully to be efficiently implementable on the target.
779using ShuffleOps = std::pair<Value *, Value *>;
780
782 Value *PermittedRHS,
783 InstCombinerImpl &IC, bool &Rerun) {
784 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
785 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
786
787 if (match(V, m_Poison())) {
788 Mask.assign(NumElts, -1);
789 return std::make_pair(
790 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
791 }
792
793 if (isa<ConstantAggregateZero>(V)) {
794 Mask.assign(NumElts, 0);
795 return std::make_pair(V, nullptr);
796 }
797
798 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
799 // If this is an insert of an extract from some other vector, include it.
800 Value *VecOp = IEI->getOperand(0);
801 Value *ScalarOp = IEI->getOperand(1);
802 Value *IdxOp = IEI->getOperand(2);
803
804 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
805 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
806 unsigned ExtractedIdx =
807 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
808 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
809
810 // Either the extracted from or inserted into vector must be RHSVec,
811 // otherwise we'd end up with a shuffle of three inputs.
812 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
813 Value *RHS = EI->getOperand(0);
814 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
815 assert(LR.second == nullptr || LR.second == RHS);
816
817 if (LR.first->getType() != RHS->getType()) {
818 // Although we are giving up for now, see if we can create extracts
819 // that match the inserts for another round of combining.
820 if (replaceExtractElements(IEI, EI, IC))
821 Rerun = true;
822
823 // We tried our best, but we can't find anything compatible with RHS
824 // further up the chain. Return a trivial shuffle.
825 for (unsigned i = 0; i < NumElts; ++i)
826 Mask[i] = i;
827 return std::make_pair(V, nullptr);
828 }
829
830 unsigned NumLHSElts =
831 cast<FixedVectorType>(RHS->getType())->getNumElements();
832 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833 return std::make_pair(LR.first, RHS);
834 }
835
836 if (VecOp == PermittedRHS) {
837 // We've gone as far as we can: anything on the other side of the
838 // extractelement will already have been converted into a shuffle.
839 unsigned NumLHSElts =
840 cast<FixedVectorType>(EI->getOperand(0)->getType())
841 ->getNumElements();
842 for (unsigned i = 0; i != NumElts; ++i)
843 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844 return std::make_pair(EI->getOperand(0), PermittedRHS);
845 }
846
847 // If this insertelement is a chain that comes from exactly these two
848 // vectors, return the vector and the effective shuffle.
849 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
850 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
851 Mask))
852 return std::make_pair(EI->getOperand(0), PermittedRHS);
853 }
854 }
855 }
856
857 // Otherwise, we can't do anything fancy. Return an identity vector.
858 for (unsigned i = 0; i != NumElts; ++i)
859 Mask.push_back(i);
860 return std::make_pair(V, nullptr);
861}
862
863/// Look for chain of insertvalue's that fully define an aggregate, and trace
864/// back the values inserted, see if they are all were extractvalue'd from
865/// the same source aggregate from the exact same element indexes.
866/// If they were, just reuse the source aggregate.
867/// This potentially deals with PHI indirections.
869 InsertValueInst &OrigIVI) {
870 Type *AggTy = OrigIVI.getType();
871 unsigned NumAggElts;
872 switch (AggTy->getTypeID()) {
873 case Type::StructTyID:
874 NumAggElts = AggTy->getStructNumElements();
875 break;
876 case Type::ArrayTyID:
877 NumAggElts = AggTy->getArrayNumElements();
878 break;
879 default:
880 llvm_unreachable("Unhandled aggregate type?");
881 }
882
883 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
885 // FIXME: any interesting patterns to be caught with larger limit?
886 assert(NumAggElts > 0 && "Aggregate should have elements.");
887 if (NumAggElts > 2)
888 return nullptr;
889
890 static constexpr auto NotFound = std::nullopt;
891 static constexpr auto FoundMismatch = nullptr;
892
893 // Try to find a value of each element of an aggregate.
894 // FIXME: deal with more complex, not one-dimensional, aggregate types
895 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
896
897 // Do we know values for each element of the aggregate?
898 auto KnowAllElts = [&AggElts]() {
899 return !llvm::is_contained(AggElts, NotFound);
900 };
901
902 int Depth = 0;
903
904 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905 // every element being overwritten twice, which should never happen.
906 static const int DepthLimit = 2 * NumAggElts;
907
908 // Recurse up the chain of `insertvalue` aggregate operands until either we've
909 // reconstructed full initializer or can't visit any more `insertvalue`'s.
910 for (InsertValueInst *CurrIVI = &OrigIVI;
911 Depth < DepthLimit && CurrIVI && !KnowAllElts();
912 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
913 ++Depth) {
914 auto *InsertedValue =
915 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
916 if (!InsertedValue)
917 return nullptr; // Inserted value must be produced by an instruction.
918
919 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920
921 // Don't bother with more than single-level aggregates.
922 if (Indices.size() != 1)
923 return nullptr; // FIXME: deal with more complex aggregates?
924
925 // Now, we may have already previously recorded the value for this element
926 // of an aggregate. If we did, that means the CurrIVI will later be
927 // overwritten with the already-recorded value. But if not, let's record it!
928 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
929 Elt = Elt.value_or(InsertedValue);
930
931 // FIXME: should we handle chain-terminating undef base operand?
932 }
933
934 // Was that sufficient to deduce the full initializer for the aggregate?
935 if (!KnowAllElts())
936 return nullptr; // Give up then.
937
938 // We now want to find the source[s] of the aggregate elements we've found.
939 // And with "source" we mean the original aggregate[s] from which
940 // the inserted elements were extracted. This may require PHI translation.
941
942 enum class AggregateDescription {
943 /// When analyzing the value that was inserted into an aggregate, we did
944 /// not manage to find defining `extractvalue` instruction to analyze.
945 NotFound,
946 /// When analyzing the value that was inserted into an aggregate, we did
947 /// manage to find defining `extractvalue` instruction[s], and everything
948 /// matched perfectly - aggregate type, element insertion/extraction index.
949 Found,
950 /// When analyzing the value that was inserted into an aggregate, we did
951 /// manage to find defining `extractvalue` instruction, but there was
952 /// a mismatch: either the source type from which the extraction was didn't
953 /// match the aggregate type into which the insertion was,
954 /// or the extraction/insertion channels mismatched,
955 /// or different elements had different source aggregates.
956 FoundMismatch
957 };
958 auto Describe = [](std::optional<Value *> SourceAggregate) {
959 if (SourceAggregate == NotFound)
960 return AggregateDescription::NotFound;
961 if (*SourceAggregate == FoundMismatch)
962 return AggregateDescription::FoundMismatch;
963 return AggregateDescription::Found;
964 };
965
966 // Given the value \p Elt that was being inserted into element \p EltIdx of an
967 // aggregate AggTy, see if \p Elt was originally defined by an
968 // appropriate extractvalue (same element index, same aggregate type).
969 // If found, return the source aggregate from which the extraction was.
970 // If \p PredBB is provided, does PHI translation of an \p Elt first.
971 auto FindSourceAggregate =
972 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
973 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
974 // For now(?), only deal with, at most, a single level of PHI indirection.
975 if (UseBB && PredBB)
976 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
977 // FIXME: deal with multiple levels of PHI indirection?
978
979 // Did we find an extraction?
980 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
981 if (!EVI)
982 return NotFound;
983
984 Value *SourceAggregate = EVI->getAggregateOperand();
985
986 // Is the extraction from the same type into which the insertion was?
987 if (SourceAggregate->getType() != AggTy)
988 return FoundMismatch;
989 // And the element index doesn't change between extraction and insertion?
990 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
991 return FoundMismatch;
992
993 return SourceAggregate; // AggregateDescription::Found
994 };
995
996 // Given elements AggElts that were constructing an aggregate OrigIVI,
997 // see if we can find appropriate source aggregate for each of the elements,
998 // and see it's the same aggregate for each element. If so, return it.
999 auto FindCommonSourceAggregate =
1000 [&](std::optional<BasicBlock *> UseBB,
1001 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1002 std::optional<Value *> SourceAggregate;
1003
1004 for (auto I : enumerate(AggElts)) {
1005 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1006 "We don't store nullptr in SourceAggregate!");
1007 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1008 (I.index() != 0) &&
1009 "SourceAggregate should be valid after the first element,");
1010
1011 // For this element, is there a plausible source aggregate?
1012 // FIXME: we could special-case undef element, IFF we know that in the
1013 // source aggregate said element isn't poison.
1014 std::optional<Value *> SourceAggregateForElement =
1015 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1016
1017 // Okay, what have we found? Does that correlate with previous findings?
1018
1019 // Regardless of whether or not we have previously found source
1020 // aggregate for previous elements (if any), if we didn't find one for
1021 // this element, passthrough whatever we have just found.
1022 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1023 return SourceAggregateForElement;
1024
1025 // Okay, we have found source aggregate for this element.
1026 // Let's see what we already know from previous elements, if any.
1027 switch (Describe(SourceAggregate)) {
1028 case AggregateDescription::NotFound:
1029 // This is apparently the first element that we have examined.
1030 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1031 continue; // Great, now look at next element.
1032 case AggregateDescription::Found:
1033 // We have previously already successfully examined other elements.
1034 // Is this the same source aggregate we've found for other elements?
1035 if (*SourceAggregateForElement != *SourceAggregate)
1036 return FoundMismatch;
1037 continue; // Still the same aggregate, look at next element.
1038 case AggregateDescription::FoundMismatch:
1039 llvm_unreachable("Can't happen. We would have early-exited then.");
1040 };
1041 }
1042
1043 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1044 "Must be a valid Value");
1045 return *SourceAggregate;
1046 };
1047
1048 std::optional<Value *> SourceAggregate;
1049
1050 // Can we find the source aggregate without looking at predecessors?
1051 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1052 /*PredBB=*/std::nullopt);
1053 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1054 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1055 return nullptr; // Conflicting source aggregates!
1056 ++NumAggregateReconstructionsSimplified;
1057 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1058 }
1059
1060 // Okay, apparently we need to look at predecessors.
1061
1062 // We should be smart about picking the "use" basic block, which will be the
1063 // merge point for aggregate, where we'll insert the final PHI that will be
1064 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1065 // We should look in which blocks each of the AggElts is being defined,
1066 // they all should be defined in the same basic block.
1067 BasicBlock *UseBB = nullptr;
1068
1069 for (const std::optional<Instruction *> &I : AggElts) {
1070 BasicBlock *BB = (*I)->getParent();
1071 // If it's the first instruction we've encountered, record the basic block.
1072 if (!UseBB) {
1073 UseBB = BB;
1074 continue;
1075 }
1076 // Otherwise, this must be the same basic block we've seen previously.
1077 if (UseBB != BB)
1078 return nullptr;
1079 }
1080
1081 // If *all* of the elements are basic-block-independent, meaning they are
1082 // either function arguments, or constant expressions, then if we didn't
1083 // handle them without predecessor-aware handling, we won't handle them now.
1084 if (!UseBB)
1085 return nullptr;
1086
1087 // If we didn't manage to find source aggregate without looking at
1088 // predecessors, and there are no predecessors to look at, then we're done.
1089 if (pred_empty(UseBB))
1090 return nullptr;
1091
1092 // Arbitrary predecessor count limit.
1093 static const int PredCountLimit = 64;
1094
1095 // Cache the (non-uniqified!) list of predecessors in a vector,
1096 // checking the limit at the same time for efficiency.
1097 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1098 for (BasicBlock *Pred : predecessors(UseBB)) {
1099 // Don't bother if there are too many predecessors.
1100 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1101 return nullptr;
1102 Preds.emplace_back(Pred);
1103 }
1104
1105 // For each predecessor, what is the source aggregate,
1106 // from which all the elements were originally extracted from?
1107 // Note that we want for the map to have stable iteration order!
1109 for (BasicBlock *Pred : Preds) {
1110 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1111 SourceAggregates.insert({Pred, nullptr});
1112 // Did we already evaluate this predecessor?
1113 if (!IV.second)
1114 continue;
1115
1116 // Let's hope that when coming from predecessor Pred, all elements of the
1117 // aggregate produced by OrigIVI must have been originally extracted from
1118 // the same aggregate. Is that so? Can we find said original aggregate?
1119 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1120 if (Describe(SourceAggregate) != AggregateDescription::Found)
1121 return nullptr; // Give up.
1122 IV.first->second = *SourceAggregate;
1123 }
1124
1125 // All good! Now we just need to thread the source aggregates here.
1126 // Note that we have to insert the new PHI here, ourselves, because we can't
1127 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1128 // Note that the same block can be a predecessor more than once,
1129 // and we need to preserve that invariant for the PHI node.
1130 BuilderTy::InsertPointGuard Guard(Builder);
1131 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1132 auto *PHI =
1133 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1134 for (BasicBlock *Pred : Preds)
1135 PHI->addIncoming(SourceAggregates[Pred], Pred);
1136
1137 ++NumAggregateReconstructionsSimplified;
1138 return replaceInstUsesWith(OrigIVI, PHI);
1139}
1140
1141/// Try to find redundant insertvalue instructions, like the following ones:
1142/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1143/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1144/// Here the second instruction inserts values at the same indices, as the
1145/// first one, making the first one redundant.
1146/// It should be transformed to:
1147/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1150 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1152 return replaceInstUsesWith(I, V);
1153
1154 bool IsRedundant = false;
1155 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1156
1157 // If there is a chain of insertvalue instructions (each of them except the
1158 // last one has only one use and it's another insertvalue insn from this
1159 // chain), check if any of the 'children' uses the same indices as the first
1160 // instruction. In this case, the first one is redundant.
1161 Value *V = &I;
1162 unsigned Depth = 0;
1163 while (V->hasOneUse() && Depth < 10) {
1164 User *U = V->user_back();
1165 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1166 if (!UserInsInst || U->getOperand(0) != V)
1167 break;
1168 if (UserInsInst->getIndices() == FirstIndices) {
1169 IsRedundant = true;
1170 break;
1171 }
1172 V = UserInsInst;
1173 Depth++;
1174 }
1175
1176 if (IsRedundant)
1177 return replaceInstUsesWith(I, I.getOperand(0));
1178
1180 return NewI;
1181
1182 return nullptr;
1183}
1184
1186 // Can not analyze scalable type, the number of elements is not a compile-time
1187 // constant.
1188 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1189 return false;
1190
1191 int MaskSize = Shuf.getShuffleMask().size();
1192 int VecSize =
1193 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1194
1195 // A vector select does not change the size of the operands.
1196 if (MaskSize != VecSize)
1197 return false;
1198
1199 // Each mask element must be undefined or choose a vector element from one of
1200 // the source operands without crossing vector lanes.
1201 for (int i = 0; i != MaskSize; ++i) {
1202 int Elt = Shuf.getMaskValue(i);
1203 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1204 return false;
1205 }
1206
1207 return true;
1208}
1209
1210/// Turn a chain of inserts that splats a value into an insert + shuffle:
1211/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1212/// shufflevector(insertelt(X, %k, 0), poison, zero)
1214 // We are interested in the last insert in a chain. So if this insert has a
1215 // single user and that user is an insert, bail.
1216 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1217 return nullptr;
1218
1219 VectorType *VecTy = InsElt.getType();
1220 // Can not handle scalable type, the number of elements is not a compile-time
1221 // constant.
1222 if (isa<ScalableVectorType>(VecTy))
1223 return nullptr;
1224 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1225
1226 // Do not try to do this for a one-element vector, since that's a nop,
1227 // and will cause an inf-loop.
1228 if (NumElements == 1)
1229 return nullptr;
1230
1231 Value *SplatVal = InsElt.getOperand(1);
1232 InsertElementInst *CurrIE = &InsElt;
1233 SmallBitVector ElementPresent(NumElements, false);
1234 InsertElementInst *FirstIE = nullptr;
1235
1236 // Walk the chain backwards, keeping track of which indices we inserted into,
1237 // until we hit something that isn't an insert of the splatted value.
1238 while (CurrIE) {
1239 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1240 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1241 return nullptr;
1242
1243 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1244 // Check none of the intermediate steps have any additional uses, except
1245 // for the root insertelement instruction, which can be re-used, if it
1246 // inserts at position 0.
1247 if (CurrIE != &InsElt &&
1248 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1249 return nullptr;
1250
1251 ElementPresent[Idx->getZExtValue()] = true;
1252 FirstIE = CurrIE;
1253 CurrIE = NextIE;
1254 }
1255
1256 // If this is just a single insertelement (not a sequence), we are done.
1257 if (FirstIE == &InsElt)
1258 return nullptr;
1259
1260 // If we are not inserting into a poison vector, make sure we've seen an
1261 // insert into every element.
1262 // TODO: If the base vector is not undef, it might be better to create a splat
1263 // and then a select-shuffle (blend) with the base vector.
1264 if (!match(FirstIE->getOperand(0), m_Poison()))
1265 if (!ElementPresent.all())
1266 return nullptr;
1267
1268 // Create the insert + shuffle.
1269 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1270 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1271 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1272 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1273 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1274 InsElt.getIterator());
1275
1276 // Splat from element 0, but replace absent elements with poison in the mask.
1277 SmallVector<int, 16> Mask(NumElements, 0);
1278 for (unsigned i = 0; i != NumElements; ++i)
1279 if (!ElementPresent[i])
1280 Mask[i] = -1;
1281
1282 return new ShuffleVectorInst(FirstIE, Mask);
1283}
1284
1285/// Try to fold an insert element into an existing splat shuffle by changing
1286/// the shuffle's mask to include the index of this insert element.
1288 // Check if the vector operand of this insert is a canonical splat shuffle.
1289 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1290 if (!Shuf || !Shuf->isZeroEltSplat())
1291 return nullptr;
1292
1293 // Bail out early if shuffle is scalable type. The number of elements in
1294 // shuffle mask is unknown at compile-time.
1295 if (isa<ScalableVectorType>(Shuf->getType()))
1296 return nullptr;
1297
1298 // Check for a constant insertion index.
1299 uint64_t IdxC;
1300 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1301 return nullptr;
1302
1303 // Check if the splat shuffle's input is the same as this insert's scalar op.
1304 Value *X = InsElt.getOperand(1);
1305 Value *Op0 = Shuf->getOperand(0);
1306 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1307 return nullptr;
1308
1309 // Replace the shuffle mask element at the index of this insert with a zero.
1310 // For example:
1311 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1312 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1313 unsigned NumMaskElts =
1314 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1315 SmallVector<int, 16> NewMask(NumMaskElts);
1316 for (unsigned i = 0; i != NumMaskElts; ++i)
1317 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1318
1319 return new ShuffleVectorInst(Op0, NewMask);
1320}
1321
1322/// Try to fold an extract+insert element into an existing identity shuffle by
1323/// changing the shuffle's mask to include the index of this insert element.
1325 // Check if the vector operand of this insert is an identity shuffle.
1326 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1327 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1328 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1329 return nullptr;
1330
1331 // Bail out early if shuffle is scalable type. The number of elements in
1332 // shuffle mask is unknown at compile-time.
1333 if (isa<ScalableVectorType>(Shuf->getType()))
1334 return nullptr;
1335
1336 // Check for a constant insertion index.
1337 uint64_t IdxC;
1338 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1339 return nullptr;
1340
1341 // Check if this insert's scalar op is extracted from the identity shuffle's
1342 // input vector.
1343 Value *Scalar = InsElt.getOperand(1);
1344 Value *X = Shuf->getOperand(0);
1345 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1346 return nullptr;
1347
1348 // Replace the shuffle mask element at the index of this extract+insert with
1349 // that same index value.
1350 // For example:
1351 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1352 unsigned NumMaskElts =
1353 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1354 SmallVector<int, 16> NewMask(NumMaskElts);
1355 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1356 for (unsigned i = 0; i != NumMaskElts; ++i) {
1357 if (i != IdxC) {
1358 // All mask elements besides the inserted element remain the same.
1359 NewMask[i] = OldMask[i];
1360 } else if (OldMask[i] == (int)IdxC) {
1361 // If the mask element was already set, there's nothing to do
1362 // (demanded elements analysis may unset it later).
1363 return nullptr;
1364 } else {
1365 assert(OldMask[i] == PoisonMaskElem &&
1366 "Unexpected shuffle mask element for identity shuffle");
1367 NewMask[i] = IdxC;
1368 }
1369 }
1370
1371 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1372}
1373
1374/// If we have an insertelement instruction feeding into another insertelement
1375/// and the 2nd is inserting a constant into the vector, canonicalize that
1376/// constant insertion before the insertion of a variable:
1377///
1378/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1379/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1380///
1381/// This has the potential of eliminating the 2nd insertelement instruction
1382/// via constant folding of the scalar constant into a vector constant.
1384 InstCombiner::BuilderTy &Builder) {
1385 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1386 if (!InsElt1 || !InsElt1->hasOneUse())
1387 return nullptr;
1388
1389 Value *X, *Y;
1390 Constant *ScalarC;
1391 ConstantInt *IdxC1, *IdxC2;
1392 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1393 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1394 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1395 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1396 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1397 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1398 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1399 }
1400
1401 return nullptr;
1402}
1403
1404/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1405/// --> shufflevector X, CVec', Mask'
1407 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1408 // Bail out if the parent has more than one use. In that case, we'd be
1409 // replacing the insertelt with a shuffle, and that's not a clear win.
1410 if (!Inst || !Inst->hasOneUse())
1411 return nullptr;
1412 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1413 // The shuffle must have a constant vector operand. The insertelt must have
1414 // a constant scalar being inserted at a constant position in the vector.
1415 Constant *ShufConstVec, *InsEltScalar;
1416 uint64_t InsEltIndex;
1417 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1418 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1419 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1420 return nullptr;
1421
1422 // Adding an element to an arbitrary shuffle could be expensive, but a
1423 // shuffle that selects elements from vectors without crossing lanes is
1424 // assumed cheap.
1425 // If we're just adding a constant into that shuffle, it will still be
1426 // cheap.
1427 if (!isShuffleEquivalentToSelect(*Shuf))
1428 return nullptr;
1429
1430 // From the above 'select' check, we know that the mask has the same number
1431 // of elements as the vector input operands. We also know that each constant
1432 // input element is used in its lane and can not be used more than once by
1433 // the shuffle. Therefore, replace the constant in the shuffle's constant
1434 // vector with the insertelt constant. Replace the constant in the shuffle's
1435 // mask vector with the insertelt index plus the length of the vector
1436 // (because the constant vector operand of a shuffle is always the 2nd
1437 // operand).
1438 ArrayRef<int> Mask = Shuf->getShuffleMask();
1439 unsigned NumElts = Mask.size();
1440 SmallVector<Constant *, 16> NewShufElts(NumElts);
1441 SmallVector<int, 16> NewMaskElts(NumElts);
1442 for (unsigned I = 0; I != NumElts; ++I) {
1443 if (I == InsEltIndex) {
1444 NewShufElts[I] = InsEltScalar;
1445 NewMaskElts[I] = InsEltIndex + NumElts;
1446 } else {
1447 // Copy over the existing values.
1448 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1449 NewMaskElts[I] = Mask[I];
1450 }
1451
1452 // Bail if we failed to find an element.
1453 if (!NewShufElts[I])
1454 return nullptr;
1455 }
1456
1457 // Create new operands for a shuffle that includes the constant of the
1458 // original insertelt. The old shuffle will be dead now.
1459 return new ShuffleVectorInst(Shuf->getOperand(0),
1460 ConstantVector::get(NewShufElts), NewMaskElts);
1461 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1462 // Transform sequences of insertelements ops with constant data/indexes into
1463 // a single shuffle op.
1464 // Can not handle scalable type, the number of elements needed to create
1465 // shuffle mask is not a compile-time constant.
1466 if (isa<ScalableVectorType>(InsElt.getType()))
1467 return nullptr;
1468 unsigned NumElts =
1469 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1470
1471 uint64_t InsertIdx[2];
1472 Constant *Val[2];
1473 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1474 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1475 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1476 !match(IEI->getOperand(1), m_Constant(Val[1])))
1477 return nullptr;
1478 SmallVector<Constant *, 16> Values(NumElts);
1479 SmallVector<int, 16> Mask(NumElts);
1480 auto ValI = std::begin(Val);
1481 // Generate new constant vector and mask.
1482 // We have 2 values/masks from the insertelements instructions. Insert them
1483 // into new value/mask vectors.
1484 for (uint64_t I : InsertIdx) {
1485 if (!Values[I]) {
1486 Values[I] = *ValI;
1487 Mask[I] = NumElts + I;
1488 }
1489 ++ValI;
1490 }
1491 // Remaining values are filled with 'poison' values.
1492 for (unsigned I = 0; I < NumElts; ++I) {
1493 if (!Values[I]) {
1494 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1495 Mask[I] = I;
1496 }
1497 }
1498 // Create new operands for a shuffle that includes the constant of the
1499 // original insertelt.
1500 return new ShuffleVectorInst(IEI->getOperand(0),
1501 ConstantVector::get(Values), Mask);
1502 }
1503 return nullptr;
1504}
1505
1506/// If both the base vector and the inserted element are extended from the same
1507/// type, do the insert element in the narrow source type followed by extend.
1508/// TODO: This can be extended to include other cast opcodes, but particularly
1509/// if we create a wider insertelement, make sure codegen is not harmed.
1511 InstCombiner::BuilderTy &Builder) {
1512 // We are creating a vector extend. If the original vector extend has another
1513 // use, that would mean we end up with 2 vector extends, so avoid that.
1514 // TODO: We could ease the use-clause to "if at least one op has one use"
1515 // (assuming that the source types match - see next TODO comment).
1516 Value *Vec = InsElt.getOperand(0);
1517 if (!Vec->hasOneUse())
1518 return nullptr;
1519
1520 Value *Scalar = InsElt.getOperand(1);
1521 Value *X, *Y;
1522 CastInst::CastOps CastOpcode;
1523 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1524 CastOpcode = Instruction::FPExt;
1525 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1526 CastOpcode = Instruction::SExt;
1527 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1528 CastOpcode = Instruction::ZExt;
1529 else
1530 return nullptr;
1531
1532 // TODO: We can allow mismatched types by creating an intermediate cast.
1533 if (X->getType()->getScalarType() != Y->getType())
1534 return nullptr;
1535
1536 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1537 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1538 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1539}
1540
1541/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1542/// try to convert to a single insert with appropriate bitcasts.
1544 bool IsBigEndian,
1545 InstCombiner::BuilderTy &Builder) {
1546 Value *VecOp = InsElt.getOperand(0);
1547 Value *ScalarOp = InsElt.getOperand(1);
1548 Value *IndexOp = InsElt.getOperand(2);
1549
1550 // Pattern depends on endian because we expect lower index is inserted first.
1551 // Big endian:
1552 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1553 // Little endian:
1554 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1555 // Note: It is not safe to do this transform with an arbitrary base vector
1556 // because the bitcast of that vector to fewer/larger elements could
1557 // allow poison to spill into an element that was not poison before.
1558 // TODO: Detect smaller fractions of the scalar.
1559 // TODO: One-use checks are conservative.
1560 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1561 Value *Scalar0, *BaseVec;
1562 uint64_t Index0, Index1;
1563 if (!VTy || (VTy->getNumElements() & 1) ||
1564 !match(IndexOp, m_ConstantInt(Index1)) ||
1565 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1566 m_ConstantInt(Index0))) ||
1567 !match(BaseVec, m_Undef()))
1568 return nullptr;
1569
1570 // The first insert must be to the index one less than this one, and
1571 // the first insert must be to an even index.
1572 if (Index0 + 1 != Index1 || Index0 & 1)
1573 return nullptr;
1574
1575 // For big endian, the high half of the value should be inserted first.
1576 // For little endian, the low half of the value should be inserted first.
1577 Value *X;
1578 uint64_t ShAmt;
1579 if (IsBigEndian) {
1580 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1581 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1582 return nullptr;
1583 } else {
1584 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1585 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1586 return nullptr;
1587 }
1588
1589 Type *SrcTy = X->getType();
1590 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1591 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1592 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1593 return nullptr;
1594
1595 // Bitcast the base vector to a vector type with the source element type.
1596 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1597 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1598
1599 // Scale the insert index for a vector with half as many elements.
1600 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1601 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1602 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1603 return new BitCastInst(NewInsert, VTy);
1604}
1605
1607 Value *VecOp = IE.getOperand(0);
1608 Value *ScalarOp = IE.getOperand(1);
1609 Value *IdxOp = IE.getOperand(2);
1610
1611 if (auto *V = simplifyInsertElementInst(
1612 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1613 return replaceInstUsesWith(IE, V);
1614
1615 // Canonicalize type of constant indices to i64 to simplify CSE
1616 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1617 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1618 return replaceOperand(IE, 2, NewIdx);
1619
1620 Value *BaseVec, *OtherScalar;
1621 uint64_t OtherIndexVal;
1622 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1623 m_Value(OtherScalar),
1624 m_ConstantInt(OtherIndexVal)))) &&
1625 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1626 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1627 return InsertElementInst::Create(NewIns, OtherScalar,
1628 Builder.getInt64(OtherIndexVal));
1629 }
1630 }
1631
1632 // If the scalar is bitcast and inserted into undef, do the insert in the
1633 // source type followed by bitcast.
1634 // TODO: Generalize for insert into any constant, not just undef?
1635 Value *ScalarSrc;
1636 if (match(VecOp, m_Undef()) &&
1637 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1638 (ScalarSrc->getType()->isIntegerTy() ||
1639 ScalarSrc->getType()->isFloatingPointTy())) {
1640 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1641 // bitcast (inselt undef, ScalarSrc, IdxOp)
1642 Type *ScalarTy = ScalarSrc->getType();
1643 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1644 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1645 : UndefValue::get(VecTy);
1646 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1647 return new BitCastInst(NewInsElt, IE.getType());
1648 }
1649
1650 // If the vector and scalar are both bitcast from the same element type, do
1651 // the insert in that source type followed by bitcast.
1652 Value *VecSrc;
1653 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1654 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1655 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1656 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1657 cast<VectorType>(VecSrc->getType())->getElementType() ==
1658 ScalarSrc->getType()) {
1659 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1660 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1661 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1662 return new BitCastInst(NewInsElt, IE.getType());
1663 }
1664
1665 // If the inserted element was extracted from some other fixed-length vector
1666 // and both indexes are valid constants, try to turn this into a shuffle.
1667 // Can not handle scalable vector type, the number of elements needed to
1668 // create shuffle mask is not a compile-time constant.
1669 uint64_t InsertedIdx, ExtractedIdx;
1670 Value *ExtVecOp;
1671 if (isa<FixedVectorType>(IE.getType()) &&
1672 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1673 match(ScalarOp,
1674 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1675 isa<FixedVectorType>(ExtVecOp->getType()) &&
1676 ExtractedIdx <
1677 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1678 // TODO: Looking at the user(s) to determine if this insert is a
1679 // fold-to-shuffle opportunity does not match the usual instcombine
1680 // constraints. We should decide if the transform is worthy based only
1681 // on this instruction and its operands, but that may not work currently.
1682 //
1683 // Here, we are trying to avoid creating shuffles before reaching
1684 // the end of a chain of extract-insert pairs. This is complicated because
1685 // we do not generally form arbitrary shuffle masks in instcombine
1686 // (because those may codegen poorly), but collectShuffleElements() does
1687 // exactly that.
1688 //
1689 // The rules for determining what is an acceptable target-independent
1690 // shuffle mask are fuzzy because they evolve based on the backend's
1691 // capabilities and real-world impact.
1692 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1693 if (!Insert.hasOneUse())
1694 return true;
1695 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1696 if (!InsertUser)
1697 return true;
1698 return false;
1699 };
1700
1701 // Try to form a shuffle from a chain of extract-insert ops.
1702 if (isShuffleRootCandidate(IE)) {
1703 bool Rerun = true;
1704 while (Rerun) {
1705 Rerun = false;
1706
1708 ShuffleOps LR =
1709 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1710
1711 // The proposed shuffle may be trivial, in which case we shouldn't
1712 // perform the combine.
1713 if (LR.first != &IE && LR.second != &IE) {
1714 // We now have a shuffle of LHS, RHS, Mask.
1715 if (LR.second == nullptr)
1716 LR.second = PoisonValue::get(LR.first->getType());
1717 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1718 }
1719 }
1720 }
1721 }
1722
1723 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1724 unsigned VWidth = VecTy->getNumElements();
1725 APInt PoisonElts(VWidth, 0);
1726 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1727 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1728 PoisonElts)) {
1729 if (V != &IE)
1730 return replaceInstUsesWith(IE, V);
1731 return &IE;
1732 }
1733 }
1734
1736 return Shuf;
1737
1738 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1739 return NewInsElt;
1740
1741 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1742 return Broadcast;
1743
1745 return Splat;
1746
1747 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1748 return IdentityShuf;
1749
1750 if (Instruction *Ext = narrowInsElt(IE, Builder))
1751 return Ext;
1752
1754 return Ext;
1755
1756 return nullptr;
1757}
1758
1759/// Return true if we can evaluate the specified expression tree if the vector
1760/// elements were shuffled in a different order.
1762 unsigned Depth = 5) {
1763 // We can always reorder the elements of a constant.
1764 if (isa<Constant>(V))
1765 return true;
1766
1767 // We won't reorder vector arguments. No IPO here.
1768 Instruction *I = dyn_cast<Instruction>(V);
1769 if (!I) return false;
1770
1771 // Two users may expect different orders of the elements. Don't try it.
1772 if (!I->hasOneUse())
1773 return false;
1774
1775 if (Depth == 0) return false;
1776
1777 switch (I->getOpcode()) {
1778 case Instruction::UDiv:
1779 case Instruction::SDiv:
1780 case Instruction::URem:
1781 case Instruction::SRem:
1782 // Propagating an undefined shuffle mask element to integer div/rem is not
1783 // allowed because those opcodes can create immediate undefined behavior
1784 // from an undefined element in an operand.
1785 if (llvm::is_contained(Mask, -1))
1786 return false;
1787 [[fallthrough]];
1788 case Instruction::Add:
1789 case Instruction::FAdd:
1790 case Instruction::Sub:
1791 case Instruction::FSub:
1792 case Instruction::Mul:
1793 case Instruction::FMul:
1794 case Instruction::FDiv:
1795 case Instruction::FRem:
1796 case Instruction::Shl:
1797 case Instruction::LShr:
1798 case Instruction::AShr:
1799 case Instruction::And:
1800 case Instruction::Or:
1801 case Instruction::Xor:
1802 case Instruction::ICmp:
1803 case Instruction::FCmp:
1804 case Instruction::Trunc:
1805 case Instruction::ZExt:
1806 case Instruction::SExt:
1807 case Instruction::FPToUI:
1808 case Instruction::FPToSI:
1809 case Instruction::UIToFP:
1810 case Instruction::SIToFP:
1811 case Instruction::FPTrunc:
1812 case Instruction::FPExt:
1813 case Instruction::GetElementPtr: {
1814 // Bail out if we would create longer vector ops. We could allow creating
1815 // longer vector ops, but that may result in more expensive codegen.
1816 Type *ITy = I->getType();
1817 if (ITy->isVectorTy() &&
1818 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1819 return false;
1820 for (Value *Operand : I->operands()) {
1821 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1822 return false;
1823 }
1824 return true;
1825 }
1826 case Instruction::InsertElement: {
1827 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1828 if (!CI) return false;
1829 int ElementNumber = CI->getLimitedValue();
1830
1831 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1832 // can't put an element into multiple indices.
1833 bool SeenOnce = false;
1834 for (int I : Mask) {
1835 if (I == ElementNumber) {
1836 if (SeenOnce)
1837 return false;
1838 SeenOnce = true;
1839 }
1840 }
1841 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1842 }
1843 }
1844 return false;
1845}
1846
1847/// Rebuild a new instruction just like 'I' but with the new operands given.
1848/// In the event of type mismatch, the type of the operands is correct.
1850 IRBuilderBase &Builder) {
1851 Builder.SetInsertPoint(I);
1852 switch (I->getOpcode()) {
1853 case Instruction::Add:
1854 case Instruction::FAdd:
1855 case Instruction::Sub:
1856 case Instruction::FSub:
1857 case Instruction::Mul:
1858 case Instruction::FMul:
1859 case Instruction::UDiv:
1860 case Instruction::SDiv:
1861 case Instruction::FDiv:
1862 case Instruction::URem:
1863 case Instruction::SRem:
1864 case Instruction::FRem:
1865 case Instruction::Shl:
1866 case Instruction::LShr:
1867 case Instruction::AShr:
1868 case Instruction::And:
1869 case Instruction::Or:
1870 case Instruction::Xor: {
1871 BinaryOperator *BO = cast<BinaryOperator>(I);
1872 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1873 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1874 NewOps[0], NewOps[1]);
1875 if (auto *NewI = dyn_cast<Instruction>(New)) {
1876 if (isa<OverflowingBinaryOperator>(BO)) {
1877 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1878 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1879 }
1880 if (isa<PossiblyExactOperator>(BO)) {
1881 NewI->setIsExact(BO->isExact());
1882 }
1883 if (isa<FPMathOperator>(BO))
1884 NewI->copyFastMathFlags(I);
1885 }
1886 return New;
1887 }
1888 case Instruction::ICmp:
1889 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1890 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1891 NewOps[1]);
1892 case Instruction::FCmp:
1893 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1894 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1895 NewOps[1]);
1896 case Instruction::Trunc:
1897 case Instruction::ZExt:
1898 case Instruction::SExt:
1899 case Instruction::FPToUI:
1900 case Instruction::FPToSI:
1901 case Instruction::UIToFP:
1902 case Instruction::SIToFP:
1903 case Instruction::FPTrunc:
1904 case Instruction::FPExt: {
1905 // It's possible that the mask has a different number of elements from
1906 // the original cast. We recompute the destination type to match the mask.
1907 Type *DestTy = VectorType::get(
1908 I->getType()->getScalarType(),
1909 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1910 assert(NewOps.size() == 1 && "cast with #ops != 1");
1911 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1912 DestTy);
1913 }
1914 case Instruction::GetElementPtr: {
1915 Value *Ptr = NewOps[0];
1916 ArrayRef<Value*> Idx = NewOps.slice(1);
1917 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1918 Ptr, Idx, "",
1919 cast<GEPOperator>(I)->isInBounds());
1920 }
1921 }
1922 llvm_unreachable("failed to rebuild vector instructions");
1923}
1924
1926 IRBuilderBase &Builder) {
1927 // Mask.size() does not need to be equal to the number of vector elements.
1928
1929 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1930 Type *EltTy = V->getType()->getScalarType();
1931
1932 if (isa<PoisonValue>(V))
1933 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
1934
1935 if (match(V, m_Undef()))
1936 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1937
1938 if (isa<ConstantAggregateZero>(V))
1939 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1940
1941 if (Constant *C = dyn_cast<Constant>(V))
1943 Mask);
1944
1945 Instruction *I = cast<Instruction>(V);
1946 switch (I->getOpcode()) {
1947 case Instruction::Add:
1948 case Instruction::FAdd:
1949 case Instruction::Sub:
1950 case Instruction::FSub:
1951 case Instruction::Mul:
1952 case Instruction::FMul:
1953 case Instruction::UDiv:
1954 case Instruction::SDiv:
1955 case Instruction::FDiv:
1956 case Instruction::URem:
1957 case Instruction::SRem:
1958 case Instruction::FRem:
1959 case Instruction::Shl:
1960 case Instruction::LShr:
1961 case Instruction::AShr:
1962 case Instruction::And:
1963 case Instruction::Or:
1964 case Instruction::Xor:
1965 case Instruction::ICmp:
1966 case Instruction::FCmp:
1967 case Instruction::Trunc:
1968 case Instruction::ZExt:
1969 case Instruction::SExt:
1970 case Instruction::FPToUI:
1971 case Instruction::FPToSI:
1972 case Instruction::UIToFP:
1973 case Instruction::SIToFP:
1974 case Instruction::FPTrunc:
1975 case Instruction::FPExt:
1976 case Instruction::Select:
1977 case Instruction::GetElementPtr: {
1979 bool NeedsRebuild =
1980 (Mask.size() !=
1981 cast<FixedVectorType>(I->getType())->getNumElements());
1982 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1983 Value *V;
1984 // Recursively call evaluateInDifferentElementOrder on vector arguments
1985 // as well. E.g. GetElementPtr may have scalar operands even if the
1986 // return value is a vector, so we need to examine the operand type.
1987 if (I->getOperand(i)->getType()->isVectorTy())
1988 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
1989 else
1990 V = I->getOperand(i);
1991 NewOps.push_back(V);
1992 NeedsRebuild |= (V != I->getOperand(i));
1993 }
1994 if (NeedsRebuild)
1995 return buildNew(I, NewOps, Builder);
1996 return I;
1997 }
1998 case Instruction::InsertElement: {
1999 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2000
2001 // The insertelement was inserting at Element. Figure out which element
2002 // that becomes after shuffling. The answer is guaranteed to be unique
2003 // by CanEvaluateShuffled.
2004 bool Found = false;
2005 int Index = 0;
2006 for (int e = Mask.size(); Index != e; ++Index) {
2007 if (Mask[Index] == Element) {
2008 Found = true;
2009 break;
2010 }
2011 }
2012
2013 // If element is not in Mask, no need to handle the operand 1 (element to
2014 // be inserted). Just evaluate values in operand 0 according to Mask.
2015 if (!Found)
2016 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2017
2018 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2019 Builder);
2020 Builder.SetInsertPoint(I);
2021 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2022 }
2023 }
2024 llvm_unreachable("failed to reorder elements of vector instruction!");
2025}
2026
2027// Returns true if the shuffle is extracting a contiguous range of values from
2028// LHS, for example:
2029// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2030// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2031// Shuffles to: |EE|FF|GG|HH|
2032// +--+--+--+--+
2034 ArrayRef<int> Mask) {
2035 unsigned LHSElems =
2036 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2037 unsigned MaskElems = Mask.size();
2038 unsigned BegIdx = Mask.front();
2039 unsigned EndIdx = Mask.back();
2040 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2041 return false;
2042 for (unsigned I = 0; I != MaskElems; ++I)
2043 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2044 return false;
2045 return true;
2046}
2047
2048/// These are the ingredients in an alternate form binary operator as described
2049/// below.
2055 Value *V0 = nullptr, Value *V1 = nullptr) :
2056 Opcode(Opc), Op0(V0), Op1(V1) {}
2057 operator bool() const { return Opcode != 0; }
2058};
2059
2060/// Binops may be transformed into binops with different opcodes and operands.
2061/// Reverse the usual canonicalization to enable folds with the non-canonical
2062/// form of the binop. If a transform is possible, return the elements of the
2063/// new binop. If not, return invalid elements.
2065 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2066 Type *Ty = BO->getType();
2067 switch (BO->getOpcode()) {
2068 case Instruction::Shl: {
2069 // shl X, C --> mul X, (1 << C)
2070 Constant *C;
2071 if (match(BO1, m_ImmConstant(C))) {
2073 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2074 assert(ShlOne && "Constant folding of immediate constants failed");
2075 return {Instruction::Mul, BO0, ShlOne};
2076 }
2077 break;
2078 }
2079 case Instruction::Or: {
2080 // or disjoin X, C --> add X, C
2081 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2082 return {Instruction::Add, BO0, BO1};
2083 break;
2084 }
2085 case Instruction::Sub:
2086 // sub 0, X --> mul X, -1
2087 if (match(BO0, m_ZeroInt()))
2088 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2089 break;
2090 default:
2091 break;
2092 }
2093 return {};
2094}
2095
2096/// A select shuffle of a select shuffle with a shared operand can be reduced
2097/// to a single select shuffle. This is an obvious improvement in IR, and the
2098/// backend is expected to lower select shuffles efficiently.
2100 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2101
2102 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2104 Shuf.getShuffleMask(Mask);
2105 unsigned NumElts = Mask.size();
2106
2107 // Canonicalize a select shuffle with common operand as Op1.
2108 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2109 if (ShufOp && ShufOp->isSelect() &&
2110 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2111 std::swap(Op0, Op1);
2113 }
2114
2115 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2116 if (!ShufOp || !ShufOp->isSelect() ||
2117 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2118 return nullptr;
2119
2120 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2122 ShufOp->getShuffleMask(Mask1);
2123 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2124
2125 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2126 if (Y == Op0) {
2127 std::swap(X, Y);
2129 }
2130
2131 // If the mask chooses from X (operand 0), it stays the same.
2132 // If the mask chooses from the earlier shuffle, the other mask value is
2133 // transferred to the combined select shuffle:
2134 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2135 SmallVector<int, 16> NewMask(NumElts);
2136 for (unsigned i = 0; i != NumElts; ++i)
2137 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2138
2139 // A select mask with undef elements might look like an identity mask.
2140 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2141 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2142 "Unexpected shuffle mask");
2143 return new ShuffleVectorInst(X, Y, NewMask);
2144}
2145
2147 const SimplifyQuery &SQ) {
2148 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2149
2150 // Are we shuffling together some value and that same value after it has been
2151 // modified by a binop with a constant?
2152 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2153 Constant *C;
2154 bool Op0IsBinop;
2155 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2156 Op0IsBinop = true;
2157 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2158 Op0IsBinop = false;
2159 else
2160 return nullptr;
2161
2162 // The identity constant for a binop leaves a variable operand unchanged. For
2163 // a vector, this is a splat of something like 0, -1, or 1.
2164 // If there's no identity constant for this binop, we're done.
2165 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2166 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2167 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2168 if (!IdC)
2169 return nullptr;
2170
2171 Value *X = Op0IsBinop ? Op1 : Op0;
2172
2173 // Prevent folding in the case the non-binop operand might have NaN values.
2174 // If X can have NaN elements then we have that the floating point math
2175 // operation in the transformed code may not preserve the exact NaN
2176 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2177 // This makes the transformation incorrect since the original program would
2178 // have preserved the exact NaN bit-pattern.
2179 // Avoid the folding if X can have NaN elements.
2180 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2181 !isKnownNeverNaN(X, 0, SQ))
2182 return nullptr;
2183
2184 // Shuffle identity constants into the lanes that return the original value.
2185 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2186 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2187 // The existing binop constant vector remains in the same operand position.
2188 ArrayRef<int> Mask = Shuf.getShuffleMask();
2189 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2191
2192 bool MightCreatePoisonOrUB =
2194 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2195 if (MightCreatePoisonOrUB)
2196 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2197
2198 // shuf (bop X, C), X, M --> bop X, C'
2199 // shuf X, (bop X, C), M --> bop X, C'
2200 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2201 NewBO->copyIRFlags(BO);
2202
2203 // An undef shuffle mask element may propagate as an undef constant element in
2204 // the new binop. That would produce poison where the original code might not.
2205 // If we already made a safe constant, then there's no danger.
2206 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2208 return NewBO;
2209}
2210
2211/// If we have an insert of a scalar to a non-zero element of an undefined
2212/// vector and then shuffle that value, that's the same as inserting to the zero
2213/// element and shuffling. Splatting from the zero element is recognized as the
2214/// canonical form of splat.
2216 InstCombiner::BuilderTy &Builder) {
2217 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2218 ArrayRef<int> Mask = Shuf.getShuffleMask();
2219 Value *X;
2220 uint64_t IndexC;
2221
2222 // Match a shuffle that is a splat to a non-zero element.
2224 m_ConstantInt(IndexC)))) ||
2225 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2226 return nullptr;
2227
2228 // Insert into element 0 of a poison vector.
2229 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2230 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2231
2232 // Splat from element 0. Any mask element that is poison remains poison.
2233 // For example:
2234 // shuf (inselt poison, X, 2), _, <2,2,undef>
2235 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2236 unsigned NumMaskElts =
2237 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2238 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2239 for (unsigned i = 0; i != NumMaskElts; ++i)
2240 if (Mask[i] == PoisonMaskElem)
2241 NewMask[i] = Mask[i];
2242
2243 return new ShuffleVectorInst(NewIns, NewMask);
2244}
2245
2246/// Try to fold shuffles that are the equivalent of a vector select.
2248 if (!Shuf.isSelect())
2249 return nullptr;
2250
2251 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2252 // Commuting undef to operand 0 conflicts with another canonicalization.
2253 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2254 if (!match(Shuf.getOperand(1), m_Undef()) &&
2255 Shuf.getMaskValue(0) >= (int)NumElts) {
2256 // TODO: Can we assert that both operands of a shuffle-select are not undef
2257 // (otherwise, it would have been folded by instsimplify?
2258 Shuf.commute();
2259 return &Shuf;
2260 }
2261
2263 return I;
2264
2266 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2267 return I;
2268
2269 BinaryOperator *B0, *B1;
2270 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2271 !match(Shuf.getOperand(1), m_BinOp(B1)))
2272 return nullptr;
2273
2274 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2275 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2276 // with a multiply, we will exit because C0/C1 will not be set.
2277 Value *X, *Y;
2278 Constant *C0 = nullptr, *C1 = nullptr;
2279 bool ConstantsAreOp1;
2280 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2281 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2282 ConstantsAreOp1 = false;
2283 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2284 m_Neg(m_Value(X)))) &&
2286 m_Neg(m_Value(Y)))))
2287 ConstantsAreOp1 = true;
2288 else
2289 return nullptr;
2290
2291 // We need matching binops to fold the lanes together.
2294 bool DropNSW = false;
2295 if (ConstantsAreOp1 && Opc0 != Opc1) {
2296 // TODO: We drop "nsw" if shift is converted into multiply because it may
2297 // not be correct when the shift amount is BitWidth - 1. We could examine
2298 // each vector element to determine if it is safe to keep that flag.
2299 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2300 DropNSW = true;
2301 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2302 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2303 Opc0 = AltB0.Opcode;
2304 C0 = cast<Constant>(AltB0.Op1);
2305 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2306 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2307 Opc1 = AltB1.Opcode;
2308 C1 = cast<Constant>(AltB1.Op1);
2309 }
2310 }
2311
2312 if (Opc0 != Opc1 || !C0 || !C1)
2313 return nullptr;
2314
2315 // The opcodes must be the same. Use a new name to make that clear.
2316 BinaryOperator::BinaryOps BOpc = Opc0;
2317
2318 // Select the constant elements needed for the single binop.
2319 ArrayRef<int> Mask = Shuf.getShuffleMask();
2320 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2321
2322 // We are moving a binop after a shuffle. When a shuffle has an undefined
2323 // mask element, the result is undefined, but it is not poison or undefined
2324 // behavior. That is not necessarily true for div/rem/shift.
2325 bool MightCreatePoisonOrUB =
2328 if (MightCreatePoisonOrUB)
2330 ConstantsAreOp1);
2331
2332 Value *V;
2333 if (X == Y) {
2334 // Remove a binop and the shuffle by rearranging the constant:
2335 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2336 // shuffle (op C0, V), (op C1, V), M --> op C', V
2337 V = X;
2338 } else {
2339 // If there are 2 different variable operands, we must create a new shuffle
2340 // (select) first, so check uses to ensure that we don't end up with more
2341 // instructions than we started with.
2342 if (!B0->hasOneUse() && !B1->hasOneUse())
2343 return nullptr;
2344
2345 // If we use the original shuffle mask and op1 is *variable*, we would be
2346 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2347 // poison. We do not have to guard against UB when *constants* are op1
2348 // because safe constants guarantee that we do not overflow sdiv/srem (and
2349 // there's no danger for other opcodes).
2350 // TODO: To allow this case, create a new shuffle mask with no undefs.
2351 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2352 return nullptr;
2353
2354 // Note: In general, we do not create new shuffles in InstCombine because we
2355 // do not know if a target can lower an arbitrary shuffle optimally. In this
2356 // case, the shuffle uses the existing mask, so there is no additional risk.
2357
2358 // Select the variable vectors first, then perform the binop:
2359 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2360 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2361 V = Builder.CreateShuffleVector(X, Y, Mask);
2362 }
2363
2364 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2365 Builder.CreateBinOp(BOpc, NewC, V);
2366
2367 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2368 // 1. If we changed an opcode, poison conditions might have changed.
2369 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2370 // where the original code did not. But if we already made a safe constant,
2371 // then there's no danger.
2372 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2373 NewI->copyIRFlags(B0);
2374 NewI->andIRFlags(B1);
2375 if (DropNSW)
2376 NewI->setHasNoSignedWrap(false);
2377 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2378 NewI->dropPoisonGeneratingFlags();
2379 }
2380 return replaceInstUsesWith(Shuf, NewBO);
2381}
2382
2383/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2384/// Example (little endian):
2385/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2387 bool IsBigEndian) {
2388 // This must be a bitcasted shuffle of 1 vector integer operand.
2389 Type *DestType = Shuf.getType();
2390 Value *X;
2391 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2392 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2393 return nullptr;
2394
2395 // The source type must have the same number of elements as the shuffle,
2396 // and the source element type must be larger than the shuffle element type.
2397 Type *SrcType = X->getType();
2398 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2399 cast<FixedVectorType>(SrcType)->getNumElements() !=
2400 cast<FixedVectorType>(DestType)->getNumElements() ||
2401 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2402 return nullptr;
2403
2404 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2405 "Expected a shuffle that decreases length");
2406
2407 // Last, check that the mask chooses the correct low bits for each narrow
2408 // element in the result.
2409 uint64_t TruncRatio =
2410 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2411 ArrayRef<int> Mask = Shuf.getShuffleMask();
2412 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2413 if (Mask[i] == PoisonMaskElem)
2414 continue;
2415 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2416 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2417 if (Mask[i] != (int)LSBIndex)
2418 return nullptr;
2419 }
2420
2421 return new TruncInst(X, DestType);
2422}
2423
2424/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2425/// narrowing (concatenating with poison and extracting back to the original
2426/// length). This allows replacing the wide select with a narrow select.
2428 InstCombiner::BuilderTy &Builder) {
2429 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2430 // of the 1st vector operand of a shuffle.
2431 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2432 return nullptr;
2433
2434 // The vector being shuffled must be a vector select that we can eliminate.
2435 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2436 Value *Cond, *X, *Y;
2437 if (!match(Shuf.getOperand(0),
2439 return nullptr;
2440
2441 // We need a narrow condition value. It must be extended with poison elements
2442 // and have the same number of elements as this shuffle.
2443 unsigned NarrowNumElts =
2444 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2445 Value *NarrowCond;
2446 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2447 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2448 NarrowNumElts ||
2449 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2450 return nullptr;
2451
2452 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2453 // -->
2454 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2455 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2456 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2457 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2458}
2459
2460/// Canonicalize FP negate/abs after shuffle.
2462 InstCombiner::BuilderTy &Builder) {
2463 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2464 Value *X;
2465 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2466 return nullptr;
2467
2468 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2469
2470 // Match 1-input (unary) shuffle.
2471 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2472 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
2473 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2474 if (IsFNeg)
2475 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2476
2478 Intrinsic::fabs, Shuf.getType());
2479 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2480 NewF->setFastMathFlags(S0->getFastMathFlags());
2481 return NewF;
2482 }
2483
2484 // Match 2-input (binary) shuffle.
2485 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2486 Value *Y;
2487 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2488 S0->getOpcode() != S1->getOpcode() ||
2489 (!S0->hasOneUse() && !S1->hasOneUse()))
2490 return nullptr;
2491
2492 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2493 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2494 Instruction *NewF;
2495 if (IsFNeg) {
2496 NewF = UnaryOperator::CreateFNeg(NewShuf);
2497 } else {
2499 Intrinsic::fabs, Shuf.getType());
2500 NewF = CallInst::Create(FAbs, {NewShuf});
2501 }
2502 NewF->copyIRFlags(S0);
2503 NewF->andIRFlags(S1);
2504 return NewF;
2505}
2506
2507/// Canonicalize casts after shuffle.
2509 InstCombiner::BuilderTy &Builder) {
2510 // Do we have 2 matching cast operands?
2511 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2512 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2513 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2514 Cast0->getSrcTy() != Cast1->getSrcTy())
2515 return nullptr;
2516
2517 // TODO: Allow other opcodes? That would require easing the type restrictions
2518 // below here.
2519 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2520 switch (CastOpcode) {
2521 case Instruction::FPToSI:
2522 case Instruction::FPToUI:
2523 case Instruction::SIToFP:
2524 case Instruction::UIToFP:
2525 break;
2526 default:
2527 return nullptr;
2528 }
2529
2530 VectorType *ShufTy = Shuf.getType();
2531 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2532 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2533
2534 // TODO: Allow length-increasing shuffles?
2535 if (ShufTy->getElementCount().getKnownMinValue() >
2536 ShufOpTy->getElementCount().getKnownMinValue())
2537 return nullptr;
2538
2539 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2540 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2541 "Expected fixed vector operands for casts and binary shuffle");
2542 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2543 return nullptr;
2544
2545 // At least one of the operands must have only one use (the shuffle).
2546 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2547 return nullptr;
2548
2549 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2550 Value *X = Cast0->getOperand(0);
2551 Value *Y = Cast1->getOperand(0);
2552 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2553 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2554}
2555
2556/// Try to fold an extract subvector operation.
2558 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2559 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2560 return nullptr;
2561
2562 // Check if we are extracting all bits of an inserted scalar:
2563 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2564 Value *X;
2565 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2566 X->getType()->getPrimitiveSizeInBits() ==
2568 return new BitCastInst(X, Shuf.getType());
2569
2570 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2571 Value *Y;
2572 ArrayRef<int> Mask;
2573 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2574 return nullptr;
2575
2576 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2577 // then combining may result in worse codegen.
2578 if (!Op0->hasOneUse())
2579 return nullptr;
2580
2581 // We are extracting a subvector from a shuffle. Remove excess elements from
2582 // the 1st shuffle mask to eliminate the extract.
2583 //
2584 // This transform is conservatively limited to identity extracts because we do
2585 // not allow arbitrary shuffle mask creation as a target-independent transform
2586 // (because we can't guarantee that will lower efficiently).
2587 //
2588 // If the extracting shuffle has an poison mask element, it transfers to the
2589 // new shuffle mask. Otherwise, copy the original mask element. Example:
2590 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2591 // shuf X, Y, <C0, poison, C2, poison>
2592 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2593 SmallVector<int, 16> NewMask(NumElts);
2594 assert(NumElts < Mask.size() &&
2595 "Identity with extract must have less elements than its inputs");
2596
2597 for (unsigned i = 0; i != NumElts; ++i) {
2598 int ExtractMaskElt = Shuf.getMaskValue(i);
2599 int MaskElt = Mask[i];
2600 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2601 }
2602 return new ShuffleVectorInst(X, Y, NewMask);
2603}
2604
2605/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2606/// operand with the operand of an insertelement.
2608 InstCombinerImpl &IC) {
2609 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2611 Shuf.getShuffleMask(Mask);
2612
2613 int NumElts = Mask.size();
2614 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2615
2616 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2617 // not be able to handle it there if the insertelement has >1 use.
2618 // If the shuffle has an insertelement operand but does not choose the
2619 // inserted scalar element from that value, then we can replace that shuffle
2620 // operand with the source vector of the insertelement.
2621 Value *X;
2622 uint64_t IdxC;
2623 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2624 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2625 if (!is_contained(Mask, (int)IdxC))
2626 return IC.replaceOperand(Shuf, 0, X);
2627 }
2628 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2629 // Offset the index constant by the vector width because we are checking for
2630 // accesses to the 2nd vector input of the shuffle.
2631 IdxC += InpNumElts;
2632 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2633 if (!is_contained(Mask, (int)IdxC))
2634 return IC.replaceOperand(Shuf, 1, X);
2635 }
2636 // For the rest of the transform, the shuffle must not change vector sizes.
2637 // TODO: This restriction could be removed if the insert has only one use
2638 // (because the transform would require a new length-changing shuffle).
2639 if (NumElts != InpNumElts)
2640 return nullptr;
2641
2642 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2643 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2644 // We need an insertelement with a constant index.
2645 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2646 m_ConstantInt(IndexC))))
2647 return false;
2648
2649 // Test the shuffle mask to see if it splices the inserted scalar into the
2650 // operand 1 vector of the shuffle.
2651 int NewInsIndex = -1;
2652 for (int i = 0; i != NumElts; ++i) {
2653 // Ignore undef mask elements.
2654 if (Mask[i] == -1)
2655 continue;
2656
2657 // The shuffle takes elements of operand 1 without lane changes.
2658 if (Mask[i] == NumElts + i)
2659 continue;
2660
2661 // The shuffle must choose the inserted scalar exactly once.
2662 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2663 return false;
2664
2665 // The shuffle is placing the inserted scalar into element i.
2666 NewInsIndex = i;
2667 }
2668
2669 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2670
2671 // Index is updated to the potentially translated insertion lane.
2672 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2673 return true;
2674 };
2675
2676 // If the shuffle is unnecessary, insert the scalar operand directly into
2677 // operand 1 of the shuffle. Example:
2678 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2679 Value *Scalar;
2680 ConstantInt *IndexC;
2681 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2682 return InsertElementInst::Create(V1, Scalar, IndexC);
2683
2684 // Try again after commuting shuffle. Example:
2685 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2686 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2687 std::swap(V0, V1);
2689 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2690 return InsertElementInst::Create(V1, Scalar, IndexC);
2691
2692 return nullptr;
2693}
2694
2696 // Match the operands as identity with padding (also known as concatenation
2697 // with undef) shuffles of the same source type. The backend is expected to
2698 // recreate these concatenations from a shuffle of narrow operands.
2699 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2700 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2701 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2702 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2703 return nullptr;
2704
2705 // We limit this transform to power-of-2 types because we expect that the
2706 // backend can convert the simplified IR patterns to identical nodes as the
2707 // original IR.
2708 // TODO: If we can verify the same behavior for arbitrary types, the
2709 // power-of-2 checks can be removed.
2710 Value *X = Shuffle0->getOperand(0);
2711 Value *Y = Shuffle1->getOperand(0);
2712 if (X->getType() != Y->getType() ||
2713 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2715 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2716 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2717 match(X, m_Undef()) || match(Y, m_Undef()))
2718 return nullptr;
2719 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2720 match(Shuffle1->getOperand(1), m_Undef()) &&
2721 "Unexpected operand for identity shuffle");
2722
2723 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2724 // operands directly by adjusting the shuffle mask to account for the narrower
2725 // types:
2726 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2727 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2728 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2729 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2730
2731 ArrayRef<int> Mask = Shuf.getShuffleMask();
2732 SmallVector<int, 16> NewMask(Mask.size(), -1);
2733 for (int i = 0, e = Mask.size(); i != e; ++i) {
2734 if (Mask[i] == -1)
2735 continue;
2736
2737 // If this shuffle is choosing an undef element from 1 of the sources, that
2738 // element is undef.
2739 if (Mask[i] < WideElts) {
2740 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2741 continue;
2742 } else {
2743 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2744 continue;
2745 }
2746
2747 // If this shuffle is choosing from the 1st narrow op, the mask element is
2748 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2749 // element is offset down to adjust for the narrow vector widths.
2750 if (Mask[i] < WideElts) {
2751 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2752 NewMask[i] = Mask[i];
2753 } else {
2754 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2755 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2756 }
2757 }
2758 return new ShuffleVectorInst(X, Y, NewMask);
2759}
2760
2761// Splatting the first element of the result of a BinOp, where any of the
2762// BinOp's operands are the result of a first element splat can be simplified to
2763// splatting the first element of the result of the BinOp
2765 if (!match(SVI.getOperand(1), m_Poison()) ||
2766 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2767 !SVI.getOperand(0)->hasOneUse())
2768 return nullptr;
2769
2770 Value *Op0 = SVI.getOperand(0);
2771 Value *X, *Y;
2773 m_Value(Y))) &&
2774 !match(Op0, m_BinOp(m_Value(X),
2776 return nullptr;
2777 if (X->getType() != Y->getType())
2778 return nullptr;
2779
2780 auto *BinOp = cast<BinaryOperator>(Op0);
2781 if (!isSafeToSpeculativelyExecute(BinOp))
2782 return nullptr;
2783
2784 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2785 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2786 NewBOI->copyIRFlags(BinOp);
2787
2788 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2789}
2790
2792 Value *LHS = SVI.getOperand(0);
2793 Value *RHS = SVI.getOperand(1);
2794 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2795 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2796 SVI.getType(), ShufQuery))
2797 return replaceInstUsesWith(SVI, V);
2798
2799 if (Instruction *I = simplifyBinOpSplats(SVI))
2800 return I;
2801
2802 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2803 // order to support scalable vectors.
2804 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2805 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2806
2807 if (isa<ScalableVectorType>(LHS->getType()))
2808 return nullptr;
2809
2810 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2811 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2812
2813 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2814 //
2815 // if X and Y are of the same (vector) type, and the element size is not
2816 // changed by the bitcasts, we can distribute the bitcasts through the
2817 // shuffle, hopefully reducing the number of instructions. We make sure that
2818 // at least one bitcast only has one use, so we don't *increase* the number of
2819 // instructions here.
2820 Value *X, *Y;
2822 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2823 X->getType()->getScalarSizeInBits() ==
2824 SVI.getType()->getScalarSizeInBits() &&
2825 (LHS->hasOneUse() || RHS->hasOneUse())) {
2827 SVI.getName() + ".uncasted");
2828 return new BitCastInst(V, SVI.getType());
2829 }
2830
2831 ArrayRef<int> Mask = SVI.getShuffleMask();
2832
2833 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2834 // simulated shuffle can simplify, then this shuffle is unnecessary:
2835 // shuf (bitcast X), undef, Mask --> bitcast X'
2836 // TODO: This could be extended to allow length-changing shuffles.
2837 // The transform might also be obsoleted if we allowed canonicalization
2838 // of bitcasted shuffles.
2839 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2840 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2841 // Try to create a scaled mask constant.
2842 auto *XType = cast<FixedVectorType>(X->getType());
2843 unsigned XNumElts = XType->getNumElements();
2844 SmallVector<int, 16> ScaledMask;
2845 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2846 // If the shuffled source vector simplifies, cast that value to this
2847 // shuffle's type.
2848 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2849 ScaledMask, XType, ShufQuery))
2850 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2851 }
2852 }
2853
2854 // shuffle x, x, mask --> shuffle x, undef, mask'
2855 if (LHS == RHS) {
2856 assert(!match(RHS, m_Undef()) &&
2857 "Shuffle with 2 undef ops not simplified?");
2858 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2859 }
2860
2861 // shuffle undef, x, mask --> shuffle x, undef, mask'
2862 if (match(LHS, m_Undef())) {
2863 SVI.commute();
2864 return &SVI;
2865 }
2866
2868 return I;
2869
2870 if (Instruction *I = foldSelectShuffle(SVI))
2871 return I;
2872
2874 return I;
2875
2877 return I;
2878
2880 return I;
2881
2882 if (Instruction *I = foldCastShuffle(SVI, Builder))
2883 return I;
2884
2885 APInt PoisonElts(VWidth, 0);
2886 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2887 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2888 if (V != &SVI)
2889 return replaceInstUsesWith(SVI, V);
2890 return &SVI;
2891 }
2892
2894 return I;
2895
2896 // These transforms have the potential to lose undef knowledge, so they are
2897 // intentionally placed after SimplifyDemandedVectorElts().
2898 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2899 return I;
2901 return I;
2902
2903 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
2905 return replaceInstUsesWith(SVI, V);
2906 }
2907
2908 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2909 // a non-vector type. We can instead bitcast the original vector followed by
2910 // an extract of the desired element:
2911 //
2912 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2913 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2914 // %1 = bitcast <4 x i8> %sroa to i32
2915 // Becomes:
2916 // %bc = bitcast <16 x i8> %in to <4 x i32>
2917 // %ext = extractelement <4 x i32> %bc, i32 0
2918 //
2919 // If the shuffle is extracting a contiguous range of values from the input
2920 // vector then each use which is a bitcast of the extracted size can be
2921 // replaced. This will work if the vector types are compatible, and the begin
2922 // index is aligned to a value in the casted vector type. If the begin index
2923 // isn't aligned then we can shuffle the original vector (keeping the same
2924 // vector type) before extracting.
2925 //
2926 // This code will bail out if the target type is fundamentally incompatible
2927 // with vectors of the source type.
2928 //
2929 // Example of <16 x i8>, target type i32:
2930 // Index range [4,8): v-----------v Will work.
2931 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2932 // <16 x i8>: | | | | | | | | | | | | | | | | |
2933 // <4 x i32>: | | | | |
2934 // +-----------+-----------+-----------+-----------+
2935 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2936 // Target type i40: ^--------------^ Won't work, bail.
2937 bool MadeChange = false;
2938 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2939 Value *V = LHS;
2940 unsigned MaskElems = Mask.size();
2941 auto *SrcTy = cast<FixedVectorType>(V->getType());
2942 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2943 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2944 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2945 unsigned SrcNumElems = SrcTy->getNumElements();
2948 for (User *U : SVI.users())
2949 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2950 if (!BC->use_empty())
2951 // Only visit bitcasts that weren't previously handled.
2952 BCs.push_back(BC);
2953 for (BitCastInst *BC : BCs) {
2954 unsigned BegIdx = Mask.front();
2955 Type *TgtTy = BC->getDestTy();
2956 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2957 if (!TgtElemBitWidth)
2958 continue;
2959 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2960 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2961 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2962 if (!VecBitWidthsEqual)
2963 continue;
2965 continue;
2966 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2967 if (!BegIsAligned) {
2968 // Shuffle the input so [0,NumElements) contains the output, and
2969 // [NumElems,SrcNumElems) is undef.
2970 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2971 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2972 ShuffleMask[I] = Idx;
2973 V = Builder.CreateShuffleVector(V, ShuffleMask,
2974 SVI.getName() + ".extract");
2975 BegIdx = 0;
2976 }
2977 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2978 assert(SrcElemsPerTgtElem);
2979 BegIdx /= SrcElemsPerTgtElem;
2980 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2981 auto *NewBC =
2982 BCAlreadyExists
2983 ? NewBCs[CastSrcTy]
2984 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2985 if (!BCAlreadyExists)
2986 NewBCs[CastSrcTy] = NewBC;
2987 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
2988 SVI.getName() + ".extract");
2989 // The shufflevector isn't being replaced: the bitcast that used it
2990 // is. InstCombine will visit the newly-created instructions.
2991 replaceInstUsesWith(*BC, Ext);
2992 MadeChange = true;
2993 }
2994 }
2995
2996 // If the LHS is a shufflevector itself, see if we can combine it with this
2997 // one without producing an unusual shuffle.
2998 // Cases that might be simplified:
2999 // 1.
3000 // x1=shuffle(v1,v2,mask1)
3001 // x=shuffle(x1,undef,mask)
3002 // ==>
3003 // x=shuffle(v1,undef,newMask)
3004 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3005 // 2.
3006 // x1=shuffle(v1,undef,mask1)
3007 // x=shuffle(x1,x2,mask)
3008 // where v1.size() == mask1.size()
3009 // ==>
3010 // x=shuffle(v1,x2,newMask)
3011 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3012 // 3.
3013 // x2=shuffle(v2,undef,mask2)
3014 // x=shuffle(x1,x2,mask)
3015 // where v2.size() == mask2.size()
3016 // ==>
3017 // x=shuffle(x1,v2,newMask)
3018 // newMask[i] = (mask[i] < x1.size())
3019 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3020 // 4.
3021 // x1=shuffle(v1,undef,mask1)
3022 // x2=shuffle(v2,undef,mask2)
3023 // x=shuffle(x1,x2,mask)
3024 // where v1.size() == v2.size()
3025 // ==>
3026 // x=shuffle(v1,v2,newMask)
3027 // newMask[i] = (mask[i] < x1.size())
3028 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3029 //
3030 // Here we are really conservative:
3031 // we are absolutely afraid of producing a shuffle mask not in the input
3032 // program, because the code gen may not be smart enough to turn a merged
3033 // shuffle into two specific shuffles: it may produce worse code. As such,
3034 // we only merge two shuffles if the result is either a splat or one of the
3035 // input shuffle masks. In this case, merging the shuffles just removes
3036 // one instruction, which we know is safe. This is good for things like
3037 // turning: (splat(splat)) -> splat, or
3038 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3039 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3040 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3041 if (LHSShuffle)
3042 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3043 !match(RHS, m_Poison()))
3044 LHSShuffle = nullptr;
3045 if (RHSShuffle)
3046 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3047 RHSShuffle = nullptr;
3048 if (!LHSShuffle && !RHSShuffle)
3049 return MadeChange ? &SVI : nullptr;
3050
3051 Value* LHSOp0 = nullptr;
3052 Value* LHSOp1 = nullptr;
3053 Value* RHSOp0 = nullptr;
3054 unsigned LHSOp0Width = 0;
3055 unsigned RHSOp0Width = 0;
3056 if (LHSShuffle) {
3057 LHSOp0 = LHSShuffle->getOperand(0);
3058 LHSOp1 = LHSShuffle->getOperand(1);
3059 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3060 }
3061 if (RHSShuffle) {
3062 RHSOp0 = RHSShuffle->getOperand(0);
3063 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3064 }
3065 Value* newLHS = LHS;
3066 Value* newRHS = RHS;
3067 if (LHSShuffle) {
3068 // case 1
3069 if (match(RHS, m_Poison())) {
3070 newLHS = LHSOp0;
3071 newRHS = LHSOp1;
3072 }
3073 // case 2 or 4
3074 else if (LHSOp0Width == LHSWidth) {
3075 newLHS = LHSOp0;
3076 }
3077 }
3078 // case 3 or 4
3079 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3080 newRHS = RHSOp0;
3081 }
3082 // case 4
3083 if (LHSOp0 == RHSOp0) {
3084 newLHS = LHSOp0;
3085 newRHS = nullptr;
3086 }
3087
3088 if (newLHS == LHS && newRHS == RHS)
3089 return MadeChange ? &SVI : nullptr;
3090
3091 ArrayRef<int> LHSMask;
3092 ArrayRef<int> RHSMask;
3093 if (newLHS != LHS)
3094 LHSMask = LHSShuffle->getShuffleMask();
3095 if (RHSShuffle && newRHS != RHS)
3096 RHSMask = RHSShuffle->getShuffleMask();
3097
3098 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3099 SmallVector<int, 16> newMask;
3100 bool isSplat = true;
3101 int SplatElt = -1;
3102 // Create a new mask for the new ShuffleVectorInst so that the new
3103 // ShuffleVectorInst is equivalent to the original one.
3104 for (unsigned i = 0; i < VWidth; ++i) {
3105 int eltMask;
3106 if (Mask[i] < 0) {
3107 // This element is a poison value.
3108 eltMask = -1;
3109 } else if (Mask[i] < (int)LHSWidth) {
3110 // This element is from left hand side vector operand.
3111 //
3112 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3113 // new mask value for the element.
3114 if (newLHS != LHS) {
3115 eltMask = LHSMask[Mask[i]];
3116 // If the value selected is an poison value, explicitly specify it
3117 // with a -1 mask value.
3118 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3119 eltMask = -1;
3120 } else
3121 eltMask = Mask[i];
3122 } else {
3123 // This element is from right hand side vector operand
3124 //
3125 // If the value selected is a poison value, explicitly specify it
3126 // with a -1 mask value. (case 1)
3127 if (match(RHS, m_Poison()))
3128 eltMask = -1;
3129 // If RHS is going to be replaced (case 3 or 4), calculate the
3130 // new mask value for the element.
3131 else if (newRHS != RHS) {
3132 eltMask = RHSMask[Mask[i]-LHSWidth];
3133 // If the value selected is an poison value, explicitly specify it
3134 // with a -1 mask value.
3135 if (eltMask >= (int)RHSOp0Width) {
3136 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3137 "should have been check above");
3138 eltMask = -1;
3139 }
3140 } else
3141 eltMask = Mask[i]-LHSWidth;
3142
3143 // If LHS's width is changed, shift the mask value accordingly.
3144 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3145 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3146 // If newRHS == newLHS, we want to remap any references from newRHS to
3147 // newLHS so that we can properly identify splats that may occur due to
3148 // obfuscation across the two vectors.
3149 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3150 eltMask += newLHSWidth;
3151 }
3152
3153 // Check if this could still be a splat.
3154 if (eltMask >= 0) {
3155 if (SplatElt >= 0 && SplatElt != eltMask)
3156 isSplat = false;
3157 SplatElt = eltMask;
3158 }
3159
3160 newMask.push_back(eltMask);
3161 }
3162
3163 // If the result mask is equal to one of the original shuffle masks,
3164 // or is a splat, do the replacement.
3165 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3166 if (!newRHS)
3167 newRHS = PoisonValue::get(newLHS->getType());
3168 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3169 }
3170
3171 return MadeChange ? &SVI : nullptr;
3172}
static const LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:246
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:842
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2561
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2653
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:938
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2465
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1871
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:488
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2166
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2356
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:76
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
const DataLayout & DL
Definition: InstCombiner.h:75
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
BuilderTy & Builder
Definition: InstCombiner.h:60
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:342
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:281
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:280
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1433
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:156
UnaryOps getOpcode() const
Definition: InstrTypes.h:171
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1067
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const