LLVM 20.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
177 addToWorklist(E);
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if ((!ShiftAmountC ||
209 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
210 Ext.getVectorOperand()->hasOneUse()) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
400 Value *Index = EI.getIndexOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 bool HasKnownValidIndex = false;
423 if (IndexC) {
424 // Canonicalize type of constant indices to i64 to simplify CSE
425 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
426 return replaceOperand(EI, 1, NewIdx);
427
429 unsigned NumElts = EC.getKnownMinValue();
430 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
431
432 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
433 Intrinsic::ID IID = II->getIntrinsicID();
434 // Index needs to be lower than the minimum size of the vector, because
435 // for scalable vector, the vector size is known at run time.
436 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
437 Type *Ty = EI.getType();
438 unsigned BitWidth = Ty->getIntegerBitWidth();
439 Value *Idx;
440 // Return index when its value does not exceed the allowed limit
441 // for the element type of the vector, otherwise return undefined.
442 if (IndexC->getValue().getActiveBits() <= BitWidth)
443 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
444 else
445 Idx = PoisonValue::get(Ty);
446 return replaceInstUsesWith(EI, Idx);
447 }
448 }
449
450 // InstSimplify should handle cases where the index is invalid.
451 // For fixed-length vector, it's invalid to extract out-of-range element.
452 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
453 return nullptr;
454
455 if (Instruction *I = foldBitcastExtElt(EI))
456 return I;
457
458 // If there's a vector PHI feeding a scalar use through this extractelement
459 // instruction, try to scalarize the PHI.
460 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
461 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
462 return ScalarPHI;
463 }
464
465 // TODO come up with a n-ary matcher that subsumes both unary and
466 // binary matchers.
467 UnaryOperator *UO;
468 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
469 // extelt (unop X), Index --> unop (extelt X, Index)
470 Value *X = UO->getOperand(0);
471 Value *E = Builder.CreateExtractElement(X, Index);
473 }
474
475 // If the binop is not speculatable, we cannot hoist the extractelement if
476 // it may make the operand poison.
477 BinaryOperator *BO;
478 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
479 (HasKnownValidIndex ||
481 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
482 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
483 Value *E0 = Builder.CreateExtractElement(X, Index);
484 Value *E1 = Builder.CreateExtractElement(Y, Index);
485 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
486 }
487
488 Value *X, *Y;
489 CmpPredicate Pred;
490 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
491 cheapToScalarize(SrcVec, Index)) {
492 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
493 Value *E0 = Builder.CreateExtractElement(X, Index);
494 Value *E1 = Builder.CreateExtractElement(Y, Index);
495 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
496 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
497 SrcCmpInst);
498 }
499
500 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
501 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
502 // instsimplify already handled the case where the indices are constants
503 // and equal by value, if both are constants, they must not be the same
504 // value, extract from the pre-inserted value instead.
505 if (isa<Constant>(IE->getOperand(2)) && IndexC)
506 return replaceOperand(EI, 0, IE->getOperand(0));
507 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
508 auto *VecType = cast<VectorType>(GEP->getType());
509 ElementCount EC = VecType->getElementCount();
510 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
511 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
512 // Find out why we have a vector result - these are a few examples:
513 // 1. We have a scalar pointer and a vector of indices, or
514 // 2. We have a vector of pointers and a scalar index, or
515 // 3. We have a vector of pointers and a vector of indices, etc.
516 // Here we only consider combining when there is exactly one vector
517 // operand, since the optimization is less obviously a win due to
518 // needing more than one extractelements.
519
520 unsigned VectorOps =
521 llvm::count_if(GEP->operands(), [](const Value *V) {
522 return isa<VectorType>(V->getType());
523 });
524 if (VectorOps == 1) {
525 Value *NewPtr = GEP->getPointerOperand();
526 if (isa<VectorType>(NewPtr->getType()))
527 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
528
530 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
531 Value *Op = GEP->getOperand(I);
532 if (isa<VectorType>(Op->getType()))
533 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
534 else
535 NewOps.push_back(Op);
536 }
537
539 GEP->getSourceElementType(), NewPtr, NewOps);
540 NewGEP->setIsInBounds(GEP->isInBounds());
541 return NewGEP;
542 }
543 }
544 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
545 // If this is extracting an element from a shufflevector, figure out where
546 // it came from and extract from the appropriate input element instead.
547 // Restrict the following transformation to fixed-length vector.
548 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
549 int SrcIdx =
550 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
551 Value *Src;
552 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
553 ->getNumElements();
554
555 if (SrcIdx < 0)
557 if (SrcIdx < (int)LHSWidth)
558 Src = SVI->getOperand(0);
559 else {
560 SrcIdx -= LHSWidth;
561 Src = SVI->getOperand(1);
562 }
563 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
565 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
566 }
567 } else if (auto *CI = dyn_cast<CastInst>(I)) {
568 // Canonicalize extractelement(cast) -> cast(extractelement).
569 // Bitcasts can change the number of vector elements, and they cost
570 // nothing.
571 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
572 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
573 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
574 }
575 }
576 }
577
578 // Run demanded elements after other transforms as this can drop flags on
579 // binops. If there's two paths to the same final result, we prefer the
580 // one which doesn't force us to drop flags.
581 if (IndexC) {
583 unsigned NumElts = EC.getKnownMinValue();
584 // This instruction only demands the single element from the input vector.
585 // Skip for scalable type, the number of elements is unknown at
586 // compile-time.
587 if (!EC.isScalable() && NumElts != 1) {
588 // If the input vector has a single use, simplify it based on this use
589 // property.
590 if (SrcVec->hasOneUse()) {
591 APInt PoisonElts(NumElts, 0);
592 APInt DemandedElts(NumElts, 0);
593 DemandedElts.setBit(IndexC->getZExtValue());
594 if (Value *V =
595 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
596 return replaceOperand(EI, 0, V);
597 } else {
598 // If the input vector has multiple uses, simplify it based on a union
599 // of all elements used.
600 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
601 if (!DemandedElts.isAllOnes()) {
602 APInt PoisonElts(NumElts, 0);
604 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
605 true /* AllowMultipleUsers */)) {
606 if (V != SrcVec) {
607 Worklist.addValue(SrcVec);
608 SrcVec->replaceAllUsesWith(V);
609 return &EI;
610 }
611 }
612 }
613 }
614 }
615 }
616 return nullptr;
617}
618
619/// If V is a shuffle of values that ONLY returns elements from either LHS or
620/// RHS, return the shuffle mask and true. Otherwise, return false.
622 SmallVectorImpl<int> &Mask) {
623 assert(LHS->getType() == RHS->getType() &&
624 "Invalid CollectSingleShuffleElements");
625 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
626
627 if (match(V, m_Poison())) {
628 Mask.assign(NumElts, -1);
629 return true;
630 }
631
632 if (V == LHS) {
633 for (unsigned i = 0; i != NumElts; ++i)
634 Mask.push_back(i);
635 return true;
636 }
637
638 if (V == RHS) {
639 for (unsigned i = 0; i != NumElts; ++i)
640 Mask.push_back(i + NumElts);
641 return true;
642 }
643
644 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
645 // If this is an insert of an extract from some other vector, include it.
646 Value *VecOp = IEI->getOperand(0);
647 Value *ScalarOp = IEI->getOperand(1);
648 Value *IdxOp = IEI->getOperand(2);
649
650 if (!isa<ConstantInt>(IdxOp))
651 return false;
652 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
653
654 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
655 // We can handle this if the vector we are inserting into is
656 // transitively ok.
657 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
658 // If so, update the mask to reflect the inserted poison.
659 Mask[InsertedIdx] = -1;
660 return true;
661 }
662 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
663 if (isa<ConstantInt>(EI->getOperand(1))) {
664 unsigned ExtractedIdx =
665 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
666 unsigned NumLHSElts =
667 cast<FixedVectorType>(LHS->getType())->getNumElements();
668
669 // This must be extracting from either LHS or RHS.
670 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
671 // We can handle this if the vector we are inserting into is
672 // transitively ok.
673 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
674 // If so, update the mask to reflect the inserted value.
675 if (EI->getOperand(0) == LHS) {
676 Mask[InsertedIdx % NumElts] = ExtractedIdx;
677 } else {
678 assert(EI->getOperand(0) == RHS);
679 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
680 }
681 return true;
682 }
683 }
684 }
685 }
686 }
687
688 return false;
689}
690
691/// If we have insertion into a vector that is wider than the vector that we
692/// are extracting from, try to widen the source vector to allow a single
693/// shufflevector to replace one or more insert/extract pairs.
695 ExtractElementInst *ExtElt,
696 InstCombinerImpl &IC) {
697 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
698 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
699 unsigned NumInsElts = InsVecType->getNumElements();
700 unsigned NumExtElts = ExtVecType->getNumElements();
701
702 // The inserted-to vector must be wider than the extracted-from vector.
703 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
704 NumExtElts >= NumInsElts)
705 return false;
706
707 // Create a shuffle mask to widen the extended-from vector using poison
708 // values. The mask selects all of the values of the original vector followed
709 // by as many poison values as needed to create a vector of the same length
710 // as the inserted-to vector.
711 SmallVector<int, 16> ExtendMask;
712 for (unsigned i = 0; i < NumExtElts; ++i)
713 ExtendMask.push_back(i);
714 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
715 ExtendMask.push_back(-1);
716
717 Value *ExtVecOp = ExtElt->getVectorOperand();
718 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
719 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
720 ? ExtVecOpInst->getParent()
721 : ExtElt->getParent();
722
723 // TODO: This restriction matches the basic block check below when creating
724 // new extractelement instructions. If that limitation is removed, this one
725 // could also be removed. But for now, we just bail out to ensure that we
726 // will replace the extractelement instruction that is feeding our
727 // insertelement instruction. This allows the insertelement to then be
728 // replaced by a shufflevector. If the insertelement is not replaced, we can
729 // induce infinite looping because there's an optimization for extractelement
730 // that will delete our widening shuffle. This would trigger another attempt
731 // here to create that shuffle, and we spin forever.
732 if (InsertionBlock != InsElt->getParent())
733 return false;
734
735 // TODO: This restriction matches the check in visitInsertElementInst() and
736 // prevents an infinite loop caused by not turning the extract/insert pair
737 // into a shuffle. We really should not need either check, but we're lacking
738 // folds for shufflevectors because we're afraid to generate shuffle masks
739 // that the backend can't handle.
740 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
741 return false;
742
743 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
744
745 // Insert the new shuffle after the vector operand of the extract is defined
746 // (as long as it's not a PHI) or at the start of the basic block of the
747 // extract, so any subsequent extracts in the same basic block can use it.
748 // TODO: Insert before the earliest ExtractElementInst that is replaced.
749 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
750 WideVec->insertAfter(ExtVecOpInst);
751 else
752 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
753
754 // Replace extracts from the original narrow vector with extracts from the new
755 // wide vector.
756 for (User *U : ExtVecOp->users()) {
757 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
758 if (!OldExt || OldExt->getParent() != WideVec->getParent())
759 continue;
760 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
761 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
762 IC.replaceInstUsesWith(*OldExt, NewExt);
763 // Add the old extracts to the worklist for DCE. We can't remove the
764 // extracts directly, because they may still be used by the calling code.
765 IC.addToWorklist(OldExt);
766 }
767
768 return true;
769}
770
771/// We are building a shuffle to create V, which is a sequence of insertelement,
772/// extractelement pairs. If PermittedRHS is set, then we must either use it or
773/// not rely on the second vector source. Return a std::pair containing the
774/// left and right vectors of the proposed shuffle (or 0), and set the Mask
775/// parameter as required.
776///
777/// Note: we intentionally don't try to fold earlier shuffles since they have
778/// often been chosen carefully to be efficiently implementable on the target.
779using ShuffleOps = std::pair<Value *, Value *>;
780
782 Value *PermittedRHS,
783 InstCombinerImpl &IC, bool &Rerun) {
784 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
785 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
786
787 if (match(V, m_Poison())) {
788 Mask.assign(NumElts, -1);
789 return std::make_pair(
790 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
791 }
792
793 if (isa<ConstantAggregateZero>(V)) {
794 Mask.assign(NumElts, 0);
795 return std::make_pair(V, nullptr);
796 }
797
798 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
799 // If this is an insert of an extract from some other vector, include it.
800 Value *VecOp = IEI->getOperand(0);
801 Value *ScalarOp = IEI->getOperand(1);
802 Value *IdxOp = IEI->getOperand(2);
803
804 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
805 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
806 unsigned ExtractedIdx =
807 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
808 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
809
810 // Either the extracted from or inserted into vector must be RHSVec,
811 // otherwise we'd end up with a shuffle of three inputs.
812 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
813 Value *RHS = EI->getOperand(0);
814 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
815 assert(LR.second == nullptr || LR.second == RHS);
816
817 if (LR.first->getType() != RHS->getType()) {
818 // Although we are giving up for now, see if we can create extracts
819 // that match the inserts for another round of combining.
820 if (replaceExtractElements(IEI, EI, IC))
821 Rerun = true;
822
823 // We tried our best, but we can't find anything compatible with RHS
824 // further up the chain. Return a trivial shuffle.
825 for (unsigned i = 0; i < NumElts; ++i)
826 Mask[i] = i;
827 return std::make_pair(V, nullptr);
828 }
829
830 unsigned NumLHSElts =
831 cast<FixedVectorType>(RHS->getType())->getNumElements();
832 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
833 return std::make_pair(LR.first, RHS);
834 }
835
836 if (VecOp == PermittedRHS) {
837 // We've gone as far as we can: anything on the other side of the
838 // extractelement will already have been converted into a shuffle.
839 unsigned NumLHSElts =
840 cast<FixedVectorType>(EI->getOperand(0)->getType())
841 ->getNumElements();
842 for (unsigned i = 0; i != NumElts; ++i)
843 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
844 return std::make_pair(EI->getOperand(0), PermittedRHS);
845 }
846
847 // If this insertelement is a chain that comes from exactly these two
848 // vectors, return the vector and the effective shuffle.
849 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
850 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
851 Mask))
852 return std::make_pair(EI->getOperand(0), PermittedRHS);
853 }
854 }
855 }
856
857 // Otherwise, we can't do anything fancy. Return an identity vector.
858 for (unsigned i = 0; i != NumElts; ++i)
859 Mask.push_back(i);
860 return std::make_pair(V, nullptr);
861}
862
863/// Look for chain of insertvalue's that fully define an aggregate, and trace
864/// back the values inserted, see if they are all were extractvalue'd from
865/// the same source aggregate from the exact same element indexes.
866/// If they were, just reuse the source aggregate.
867/// This potentially deals with PHI indirections.
869 InsertValueInst &OrigIVI) {
870 Type *AggTy = OrigIVI.getType();
871 unsigned NumAggElts;
872 switch (AggTy->getTypeID()) {
873 case Type::StructTyID:
874 NumAggElts = AggTy->getStructNumElements();
875 break;
876 case Type::ArrayTyID:
877 NumAggElts = AggTy->getArrayNumElements();
878 break;
879 default:
880 llvm_unreachable("Unhandled aggregate type?");
881 }
882
883 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
885 // FIXME: any interesting patterns to be caught with larger limit?
886 assert(NumAggElts > 0 && "Aggregate should have elements.");
887 if (NumAggElts > 2)
888 return nullptr;
889
890 static constexpr auto NotFound = std::nullopt;
891 static constexpr auto FoundMismatch = nullptr;
892
893 // Try to find a value of each element of an aggregate.
894 // FIXME: deal with more complex, not one-dimensional, aggregate types
895 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
896
897 // Do we know values for each element of the aggregate?
898 auto KnowAllElts = [&AggElts]() {
899 return !llvm::is_contained(AggElts, NotFound);
900 };
901
902 int Depth = 0;
903
904 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905 // every element being overwritten twice, which should never happen.
906 static const int DepthLimit = 2 * NumAggElts;
907
908 // Recurse up the chain of `insertvalue` aggregate operands until either we've
909 // reconstructed full initializer or can't visit any more `insertvalue`'s.
910 for (InsertValueInst *CurrIVI = &OrigIVI;
911 Depth < DepthLimit && CurrIVI && !KnowAllElts();
912 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
913 ++Depth) {
914 auto *InsertedValue =
915 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
916 if (!InsertedValue)
917 return nullptr; // Inserted value must be produced by an instruction.
918
919 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920
921 // Don't bother with more than single-level aggregates.
922 if (Indices.size() != 1)
923 return nullptr; // FIXME: deal with more complex aggregates?
924
925 // Now, we may have already previously recorded the value for this element
926 // of an aggregate. If we did, that means the CurrIVI will later be
927 // overwritten with the already-recorded value. But if not, let's record it!
928 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
929 Elt = Elt.value_or(InsertedValue);
930
931 // FIXME: should we handle chain-terminating undef base operand?
932 }
933
934 // Was that sufficient to deduce the full initializer for the aggregate?
935 if (!KnowAllElts())
936 return nullptr; // Give up then.
937
938 // We now want to find the source[s] of the aggregate elements we've found.
939 // And with "source" we mean the original aggregate[s] from which
940 // the inserted elements were extracted. This may require PHI translation.
941
942 enum class AggregateDescription {
943 /// When analyzing the value that was inserted into an aggregate, we did
944 /// not manage to find defining `extractvalue` instruction to analyze.
945 NotFound,
946 /// When analyzing the value that was inserted into an aggregate, we did
947 /// manage to find defining `extractvalue` instruction[s], and everything
948 /// matched perfectly - aggregate type, element insertion/extraction index.
949 Found,
950 /// When analyzing the value that was inserted into an aggregate, we did
951 /// manage to find defining `extractvalue` instruction, but there was
952 /// a mismatch: either the source type from which the extraction was didn't
953 /// match the aggregate type into which the insertion was,
954 /// or the extraction/insertion channels mismatched,
955 /// or different elements had different source aggregates.
956 FoundMismatch
957 };
958 auto Describe = [](std::optional<Value *> SourceAggregate) {
959 if (SourceAggregate == NotFound)
960 return AggregateDescription::NotFound;
961 if (*SourceAggregate == FoundMismatch)
962 return AggregateDescription::FoundMismatch;
963 return AggregateDescription::Found;
964 };
965
966 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
967 bool EltDefinedInUseBB = false;
968
969 // Given the value \p Elt that was being inserted into element \p EltIdx of an
970 // aggregate AggTy, see if \p Elt was originally defined by an
971 // appropriate extractvalue (same element index, same aggregate type).
972 // If found, return the source aggregate from which the extraction was.
973 // If \p PredBB is provided, does PHI translation of an \p Elt first.
974 auto FindSourceAggregate =
975 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
976 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
977 // For now(?), only deal with, at most, a single level of PHI indirection.
978 if (UseBB && PredBB) {
979 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
980 if (Elt && Elt->getParent() == *UseBB)
981 EltDefinedInUseBB = true;
982 }
983 // FIXME: deal with multiple levels of PHI indirection?
984
985 // Did we find an extraction?
986 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
987 if (!EVI)
988 return NotFound;
989
990 Value *SourceAggregate = EVI->getAggregateOperand();
991
992 // Is the extraction from the same type into which the insertion was?
993 if (SourceAggregate->getType() != AggTy)
994 return FoundMismatch;
995 // And the element index doesn't change between extraction and insertion?
996 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
997 return FoundMismatch;
998
999 return SourceAggregate; // AggregateDescription::Found
1000 };
1001
1002 // Given elements AggElts that were constructing an aggregate OrigIVI,
1003 // see if we can find appropriate source aggregate for each of the elements,
1004 // and see it's the same aggregate for each element. If so, return it.
1005 auto FindCommonSourceAggregate =
1006 [&](std::optional<BasicBlock *> UseBB,
1007 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1008 std::optional<Value *> SourceAggregate;
1009
1010 for (auto I : enumerate(AggElts)) {
1011 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1012 "We don't store nullptr in SourceAggregate!");
1013 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1014 (I.index() != 0) &&
1015 "SourceAggregate should be valid after the first element,");
1016
1017 // For this element, is there a plausible source aggregate?
1018 // FIXME: we could special-case undef element, IFF we know that in the
1019 // source aggregate said element isn't poison.
1020 std::optional<Value *> SourceAggregateForElement =
1021 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1022
1023 // Okay, what have we found? Does that correlate with previous findings?
1024
1025 // Regardless of whether or not we have previously found source
1026 // aggregate for previous elements (if any), if we didn't find one for
1027 // this element, passthrough whatever we have just found.
1028 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1029 return SourceAggregateForElement;
1030
1031 // Okay, we have found source aggregate for this element.
1032 // Let's see what we already know from previous elements, if any.
1033 switch (Describe(SourceAggregate)) {
1034 case AggregateDescription::NotFound:
1035 // This is apparently the first element that we have examined.
1036 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1037 continue; // Great, now look at next element.
1038 case AggregateDescription::Found:
1039 // We have previously already successfully examined other elements.
1040 // Is this the same source aggregate we've found for other elements?
1041 if (*SourceAggregateForElement != *SourceAggregate)
1042 return FoundMismatch;
1043 continue; // Still the same aggregate, look at next element.
1044 case AggregateDescription::FoundMismatch:
1045 llvm_unreachable("Can't happen. We would have early-exited then.");
1046 };
1047 }
1048
1049 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1050 "Must be a valid Value");
1051 return *SourceAggregate;
1052 };
1053
1054 std::optional<Value *> SourceAggregate;
1055
1056 // Can we find the source aggregate without looking at predecessors?
1057 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1058 /*PredBB=*/std::nullopt);
1059 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1060 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1061 return nullptr; // Conflicting source aggregates!
1062 ++NumAggregateReconstructionsSimplified;
1063 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1064 }
1065
1066 // Okay, apparently we need to look at predecessors.
1067
1068 // We should be smart about picking the "use" basic block, which will be the
1069 // merge point for aggregate, where we'll insert the final PHI that will be
1070 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1071 // We should look in which blocks each of the AggElts is being defined,
1072 // they all should be defined in the same basic block.
1073 BasicBlock *UseBB = nullptr;
1074
1075 for (const std::optional<Instruction *> &I : AggElts) {
1076 BasicBlock *BB = (*I)->getParent();
1077 // If it's the first instruction we've encountered, record the basic block.
1078 if (!UseBB) {
1079 UseBB = BB;
1080 continue;
1081 }
1082 // Otherwise, this must be the same basic block we've seen previously.
1083 if (UseBB != BB)
1084 return nullptr;
1085 }
1086
1087 // If *all* of the elements are basic-block-independent, meaning they are
1088 // either function arguments, or constant expressions, then if we didn't
1089 // handle them without predecessor-aware handling, we won't handle them now.
1090 if (!UseBB)
1091 return nullptr;
1092
1093 // If we didn't manage to find source aggregate without looking at
1094 // predecessors, and there are no predecessors to look at, then we're done.
1095 if (pred_empty(UseBB))
1096 return nullptr;
1097
1098 // Arbitrary predecessor count limit.
1099 static const int PredCountLimit = 64;
1100
1101 // Cache the (non-uniqified!) list of predecessors in a vector,
1102 // checking the limit at the same time for efficiency.
1103 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1104 for (BasicBlock *Pred : predecessors(UseBB)) {
1105 // Don't bother if there are too many predecessors.
1106 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1107 return nullptr;
1108 Preds.emplace_back(Pred);
1109 }
1110
1111 // For each predecessor, what is the source aggregate,
1112 // from which all the elements were originally extracted from?
1113 // Note that we want for the map to have stable iteration order!
1115 bool FoundSrcAgg = false;
1116 for (BasicBlock *Pred : Preds) {
1117 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1118 SourceAggregates.insert({Pred, nullptr});
1119 // Did we already evaluate this predecessor?
1120 if (!IV.second)
1121 continue;
1122
1123 // Let's hope that when coming from predecessor Pred, all elements of the
1124 // aggregate produced by OrigIVI must have been originally extracted from
1125 // the same aggregate. Is that so? Can we find said original aggregate?
1126 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1127 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1128 FoundSrcAgg = true;
1129 IV.first->second = *SourceAggregate;
1130 } else {
1131 // If UseBB is the single successor of Pred, we can add InsertValue to
1132 // Pred.
1133 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1134 if (!BI || !BI->isUnconditional())
1135 return nullptr;
1136 }
1137 }
1138
1139 if (!FoundSrcAgg)
1140 return nullptr;
1141
1142 // Do some sanity check if we need to add insertvalue into predecessors.
1143 auto OrigBB = OrigIVI.getParent();
1144 for (auto &It : SourceAggregates) {
1145 if (Describe(It.second) == AggregateDescription::Found)
1146 continue;
1147
1148 // Element is defined in UseBB, so it can't be used in predecessors.
1149 if (EltDefinedInUseBB)
1150 return nullptr;
1151
1152 // Do this transformation cross loop boundary may cause dead loop. So we
1153 // should avoid this situation. But LoopInfo is not generally available, we
1154 // must be conservative here.
1155 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1156 // can't be in inner loop.
1157 if (UseBB != OrigBB)
1158 return nullptr;
1159
1160 // Avoid constructing constant aggregate because constant value may expose
1161 // more optimizations.
1162 bool ConstAgg = true;
1163 for (auto Val : AggElts) {
1164 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1165 if (!isa<Constant>(Elt)) {
1166 ConstAgg = false;
1167 break;
1168 }
1169 }
1170 if (ConstAgg)
1171 return nullptr;
1172 }
1173
1174 // For predecessors without appropriate source aggregate, create one in the
1175 // predecessor.
1176 for (auto &It : SourceAggregates) {
1177 if (Describe(It.second) == AggregateDescription::Found)
1178 continue;
1179
1180 BasicBlock *Pred = It.first;
1181 Builder.SetInsertPoint(Pred->getTerminator());
1182 Value *V = PoisonValue::get(AggTy);
1183 for (auto [Idx, Val] : enumerate(AggElts)) {
1184 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1185 V = Builder.CreateInsertValue(V, Elt, Idx);
1186 }
1187
1188 It.second = V;
1189 }
1190
1191 // All good! Now we just need to thread the source aggregates here.
1192 // Note that we have to insert the new PHI here, ourselves, because we can't
1193 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1194 // Note that the same block can be a predecessor more than once,
1195 // and we need to preserve that invariant for the PHI node.
1196 BuilderTy::InsertPointGuard Guard(Builder);
1197 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1198 auto *PHI =
1199 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1200 for (BasicBlock *Pred : Preds)
1201 PHI->addIncoming(SourceAggregates[Pred], Pred);
1202
1203 ++NumAggregateReconstructionsSimplified;
1204 return replaceInstUsesWith(OrigIVI, PHI);
1205}
1206
1207/// Try to find redundant insertvalue instructions, like the following ones:
1208/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1209/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1210/// Here the second instruction inserts values at the same indices, as the
1211/// first one, making the first one redundant.
1212/// It should be transformed to:
1213/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1216 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1218 return replaceInstUsesWith(I, V);
1219
1220 bool IsRedundant = false;
1221 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1222
1223 // If there is a chain of insertvalue instructions (each of them except the
1224 // last one has only one use and it's another insertvalue insn from this
1225 // chain), check if any of the 'children' uses the same indices as the first
1226 // instruction. In this case, the first one is redundant.
1227 Value *V = &I;
1228 unsigned Depth = 0;
1229 while (V->hasOneUse() && Depth < 10) {
1230 User *U = V->user_back();
1231 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1232 if (!UserInsInst || U->getOperand(0) != V)
1233 break;
1234 if (UserInsInst->getIndices() == FirstIndices) {
1235 IsRedundant = true;
1236 break;
1237 }
1238 V = UserInsInst;
1239 Depth++;
1240 }
1241
1242 if (IsRedundant)
1243 return replaceInstUsesWith(I, I.getOperand(0));
1244
1246 return NewI;
1247
1248 return nullptr;
1249}
1250
1252 // Can not analyze scalable type, the number of elements is not a compile-time
1253 // constant.
1254 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1255 return false;
1256
1257 int MaskSize = Shuf.getShuffleMask().size();
1258 int VecSize =
1259 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1260
1261 // A vector select does not change the size of the operands.
1262 if (MaskSize != VecSize)
1263 return false;
1264
1265 // Each mask element must be undefined or choose a vector element from one of
1266 // the source operands without crossing vector lanes.
1267 for (int i = 0; i != MaskSize; ++i) {
1268 int Elt = Shuf.getMaskValue(i);
1269 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1270 return false;
1271 }
1272
1273 return true;
1274}
1275
1276/// Turn a chain of inserts that splats a value into an insert + shuffle:
1277/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1278/// shufflevector(insertelt(X, %k, 0), poison, zero)
1280 // We are interested in the last insert in a chain. So if this insert has a
1281 // single user and that user is an insert, bail.
1282 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1283 return nullptr;
1284
1285 VectorType *VecTy = InsElt.getType();
1286 // Can not handle scalable type, the number of elements is not a compile-time
1287 // constant.
1288 if (isa<ScalableVectorType>(VecTy))
1289 return nullptr;
1290 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1291
1292 // Do not try to do this for a one-element vector, since that's a nop,
1293 // and will cause an inf-loop.
1294 if (NumElements == 1)
1295 return nullptr;
1296
1297 Value *SplatVal = InsElt.getOperand(1);
1298 InsertElementInst *CurrIE = &InsElt;
1299 SmallBitVector ElementPresent(NumElements, false);
1300 InsertElementInst *FirstIE = nullptr;
1301
1302 // Walk the chain backwards, keeping track of which indices we inserted into,
1303 // until we hit something that isn't an insert of the splatted value.
1304 while (CurrIE) {
1305 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1306 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1307 return nullptr;
1308
1309 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1310 // Check none of the intermediate steps have any additional uses, except
1311 // for the root insertelement instruction, which can be re-used, if it
1312 // inserts at position 0.
1313 if (CurrIE != &InsElt &&
1314 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1315 return nullptr;
1316
1317 ElementPresent[Idx->getZExtValue()] = true;
1318 FirstIE = CurrIE;
1319 CurrIE = NextIE;
1320 }
1321
1322 // If this is just a single insertelement (not a sequence), we are done.
1323 if (FirstIE == &InsElt)
1324 return nullptr;
1325
1326 // If we are not inserting into a poison vector, make sure we've seen an
1327 // insert into every element.
1328 // TODO: If the base vector is not undef, it might be better to create a splat
1329 // and then a select-shuffle (blend) with the base vector.
1330 if (!match(FirstIE->getOperand(0), m_Poison()))
1331 if (!ElementPresent.all())
1332 return nullptr;
1333
1334 // Create the insert + shuffle.
1335 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1336 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1337 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1338 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1339 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1340 InsElt.getIterator());
1341
1342 // Splat from element 0, but replace absent elements with poison in the mask.
1343 SmallVector<int, 16> Mask(NumElements, 0);
1344 for (unsigned i = 0; i != NumElements; ++i)
1345 if (!ElementPresent[i])
1346 Mask[i] = -1;
1347
1348 return new ShuffleVectorInst(FirstIE, Mask);
1349}
1350
1351/// Try to fold an insert element into an existing splat shuffle by changing
1352/// the shuffle's mask to include the index of this insert element.
1354 // Check if the vector operand of this insert is a canonical splat shuffle.
1355 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1356 if (!Shuf || !Shuf->isZeroEltSplat())
1357 return nullptr;
1358
1359 // Bail out early if shuffle is scalable type. The number of elements in
1360 // shuffle mask is unknown at compile-time.
1361 if (isa<ScalableVectorType>(Shuf->getType()))
1362 return nullptr;
1363
1364 // Check for a constant insertion index.
1365 uint64_t IdxC;
1366 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1367 return nullptr;
1368
1369 // Check if the splat shuffle's input is the same as this insert's scalar op.
1370 Value *X = InsElt.getOperand(1);
1371 Value *Op0 = Shuf->getOperand(0);
1372 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1373 return nullptr;
1374
1375 // Replace the shuffle mask element at the index of this insert with a zero.
1376 // For example:
1377 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1378 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1379 unsigned NumMaskElts =
1380 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1381 SmallVector<int, 16> NewMask(NumMaskElts);
1382 for (unsigned i = 0; i != NumMaskElts; ++i)
1383 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1384
1385 return new ShuffleVectorInst(Op0, NewMask);
1386}
1387
1388/// Try to fold an extract+insert element into an existing identity shuffle by
1389/// changing the shuffle's mask to include the index of this insert element.
1391 // Check if the vector operand of this insert is an identity shuffle.
1392 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1393 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1394 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1395 return nullptr;
1396
1397 // Bail out early if shuffle is scalable type. The number of elements in
1398 // shuffle mask is unknown at compile-time.
1399 if (isa<ScalableVectorType>(Shuf->getType()))
1400 return nullptr;
1401
1402 // Check for a constant insertion index.
1403 uint64_t IdxC;
1404 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1405 return nullptr;
1406
1407 // Check if this insert's scalar op is extracted from the identity shuffle's
1408 // input vector.
1409 Value *Scalar = InsElt.getOperand(1);
1410 Value *X = Shuf->getOperand(0);
1411 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1412 return nullptr;
1413
1414 // Replace the shuffle mask element at the index of this extract+insert with
1415 // that same index value.
1416 // For example:
1417 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1418 unsigned NumMaskElts =
1419 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1420 SmallVector<int, 16> NewMask(NumMaskElts);
1421 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1422 for (unsigned i = 0; i != NumMaskElts; ++i) {
1423 if (i != IdxC) {
1424 // All mask elements besides the inserted element remain the same.
1425 NewMask[i] = OldMask[i];
1426 } else if (OldMask[i] == (int)IdxC) {
1427 // If the mask element was already set, there's nothing to do
1428 // (demanded elements analysis may unset it later).
1429 return nullptr;
1430 } else {
1431 assert(OldMask[i] == PoisonMaskElem &&
1432 "Unexpected shuffle mask element for identity shuffle");
1433 NewMask[i] = IdxC;
1434 }
1435 }
1436
1437 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1438}
1439
1440/// If we have an insertelement instruction feeding into another insertelement
1441/// and the 2nd is inserting a constant into the vector, canonicalize that
1442/// constant insertion before the insertion of a variable:
1443///
1444/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1445/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1446///
1447/// This has the potential of eliminating the 2nd insertelement instruction
1448/// via constant folding of the scalar constant into a vector constant.
1450 InstCombiner::BuilderTy &Builder) {
1451 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1452 if (!InsElt1 || !InsElt1->hasOneUse())
1453 return nullptr;
1454
1455 Value *X, *Y;
1456 Constant *ScalarC;
1457 ConstantInt *IdxC1, *IdxC2;
1458 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1459 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1460 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1461 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1462 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1463 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1464 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1465 }
1466
1467 return nullptr;
1468}
1469
1470/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1471/// --> shufflevector X, CVec', Mask'
1473 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1474 // Bail out if the parent has more than one use. In that case, we'd be
1475 // replacing the insertelt with a shuffle, and that's not a clear win.
1476 if (!Inst || !Inst->hasOneUse())
1477 return nullptr;
1478 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1479 // The shuffle must have a constant vector operand. The insertelt must have
1480 // a constant scalar being inserted at a constant position in the vector.
1481 Constant *ShufConstVec, *InsEltScalar;
1482 uint64_t InsEltIndex;
1483 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1484 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1485 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1486 return nullptr;
1487
1488 // Adding an element to an arbitrary shuffle could be expensive, but a
1489 // shuffle that selects elements from vectors without crossing lanes is
1490 // assumed cheap.
1491 // If we're just adding a constant into that shuffle, it will still be
1492 // cheap.
1493 if (!isShuffleEquivalentToSelect(*Shuf))
1494 return nullptr;
1495
1496 // From the above 'select' check, we know that the mask has the same number
1497 // of elements as the vector input operands. We also know that each constant
1498 // input element is used in its lane and can not be used more than once by
1499 // the shuffle. Therefore, replace the constant in the shuffle's constant
1500 // vector with the insertelt constant. Replace the constant in the shuffle's
1501 // mask vector with the insertelt index plus the length of the vector
1502 // (because the constant vector operand of a shuffle is always the 2nd
1503 // operand).
1504 ArrayRef<int> Mask = Shuf->getShuffleMask();
1505 unsigned NumElts = Mask.size();
1506 SmallVector<Constant *, 16> NewShufElts(NumElts);
1507 SmallVector<int, 16> NewMaskElts(NumElts);
1508 for (unsigned I = 0; I != NumElts; ++I) {
1509 if (I == InsEltIndex) {
1510 NewShufElts[I] = InsEltScalar;
1511 NewMaskElts[I] = InsEltIndex + NumElts;
1512 } else {
1513 // Copy over the existing values.
1514 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1515 NewMaskElts[I] = Mask[I];
1516 }
1517
1518 // Bail if we failed to find an element.
1519 if (!NewShufElts[I])
1520 return nullptr;
1521 }
1522
1523 // Create new operands for a shuffle that includes the constant of the
1524 // original insertelt. The old shuffle will be dead now.
1525 return new ShuffleVectorInst(Shuf->getOperand(0),
1526 ConstantVector::get(NewShufElts), NewMaskElts);
1527 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1528 // Transform sequences of insertelements ops with constant data/indexes into
1529 // a single shuffle op.
1530 // Can not handle scalable type, the number of elements needed to create
1531 // shuffle mask is not a compile-time constant.
1532 if (isa<ScalableVectorType>(InsElt.getType()))
1533 return nullptr;
1534 unsigned NumElts =
1535 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1536
1537 uint64_t InsertIdx[2];
1538 Constant *Val[2];
1539 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1540 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1541 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1542 !match(IEI->getOperand(1), m_Constant(Val[1])))
1543 return nullptr;
1544 SmallVector<Constant *, 16> Values(NumElts);
1545 SmallVector<int, 16> Mask(NumElts);
1546 auto ValI = std::begin(Val);
1547 // Generate new constant vector and mask.
1548 // We have 2 values/masks from the insertelements instructions. Insert them
1549 // into new value/mask vectors.
1550 for (uint64_t I : InsertIdx) {
1551 if (!Values[I]) {
1552 Values[I] = *ValI;
1553 Mask[I] = NumElts + I;
1554 }
1555 ++ValI;
1556 }
1557 // Remaining values are filled with 'poison' values.
1558 for (unsigned I = 0; I < NumElts; ++I) {
1559 if (!Values[I]) {
1560 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1561 Mask[I] = I;
1562 }
1563 }
1564 // Create new operands for a shuffle that includes the constant of the
1565 // original insertelt.
1566 return new ShuffleVectorInst(IEI->getOperand(0),
1567 ConstantVector::get(Values), Mask);
1568 }
1569 return nullptr;
1570}
1571
1572/// If both the base vector and the inserted element are extended from the same
1573/// type, do the insert element in the narrow source type followed by extend.
1574/// TODO: This can be extended to include other cast opcodes, but particularly
1575/// if we create a wider insertelement, make sure codegen is not harmed.
1577 InstCombiner::BuilderTy &Builder) {
1578 // We are creating a vector extend. If the original vector extend has another
1579 // use, that would mean we end up with 2 vector extends, so avoid that.
1580 // TODO: We could ease the use-clause to "if at least one op has one use"
1581 // (assuming that the source types match - see next TODO comment).
1582 Value *Vec = InsElt.getOperand(0);
1583 if (!Vec->hasOneUse())
1584 return nullptr;
1585
1586 Value *Scalar = InsElt.getOperand(1);
1587 Value *X, *Y;
1588 CastInst::CastOps CastOpcode;
1589 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1590 CastOpcode = Instruction::FPExt;
1591 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1592 CastOpcode = Instruction::SExt;
1593 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1594 CastOpcode = Instruction::ZExt;
1595 else
1596 return nullptr;
1597
1598 // TODO: We can allow mismatched types by creating an intermediate cast.
1599 if (X->getType()->getScalarType() != Y->getType())
1600 return nullptr;
1601
1602 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1603 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1604 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1605}
1606
1607/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1608/// try to convert to a single insert with appropriate bitcasts.
1610 bool IsBigEndian,
1611 InstCombiner::BuilderTy &Builder) {
1612 Value *VecOp = InsElt.getOperand(0);
1613 Value *ScalarOp = InsElt.getOperand(1);
1614 Value *IndexOp = InsElt.getOperand(2);
1615
1616 // Pattern depends on endian because we expect lower index is inserted first.
1617 // Big endian:
1618 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1619 // Little endian:
1620 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1621 // Note: It is not safe to do this transform with an arbitrary base vector
1622 // because the bitcast of that vector to fewer/larger elements could
1623 // allow poison to spill into an element that was not poison before.
1624 // TODO: Detect smaller fractions of the scalar.
1625 // TODO: One-use checks are conservative.
1626 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1627 Value *Scalar0, *BaseVec;
1628 uint64_t Index0, Index1;
1629 if (!VTy || (VTy->getNumElements() & 1) ||
1630 !match(IndexOp, m_ConstantInt(Index1)) ||
1631 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1632 m_ConstantInt(Index0))) ||
1633 !match(BaseVec, m_Undef()))
1634 return nullptr;
1635
1636 // The first insert must be to the index one less than this one, and
1637 // the first insert must be to an even index.
1638 if (Index0 + 1 != Index1 || Index0 & 1)
1639 return nullptr;
1640
1641 // For big endian, the high half of the value should be inserted first.
1642 // For little endian, the low half of the value should be inserted first.
1643 Value *X;
1644 uint64_t ShAmt;
1645 if (IsBigEndian) {
1646 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1647 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1648 return nullptr;
1649 } else {
1650 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1651 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1652 return nullptr;
1653 }
1654
1655 Type *SrcTy = X->getType();
1656 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1657 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1658 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1659 return nullptr;
1660
1661 // Bitcast the base vector to a vector type with the source element type.
1662 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1663 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1664
1665 // Scale the insert index for a vector with half as many elements.
1666 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1667 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1668 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1669 return new BitCastInst(NewInsert, VTy);
1670}
1671
1673 Value *VecOp = IE.getOperand(0);
1674 Value *ScalarOp = IE.getOperand(1);
1675 Value *IdxOp = IE.getOperand(2);
1676
1677 if (auto *V = simplifyInsertElementInst(
1678 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1679 return replaceInstUsesWith(IE, V);
1680
1681 // Canonicalize type of constant indices to i64 to simplify CSE
1682 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1683 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1684 return replaceOperand(IE, 2, NewIdx);
1685
1686 Value *BaseVec, *OtherScalar;
1687 uint64_t OtherIndexVal;
1688 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1689 m_Value(OtherScalar),
1690 m_ConstantInt(OtherIndexVal)))) &&
1691 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1692 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1693 return InsertElementInst::Create(NewIns, OtherScalar,
1694 Builder.getInt64(OtherIndexVal));
1695 }
1696 }
1697
1698 // If the scalar is bitcast and inserted into undef, do the insert in the
1699 // source type followed by bitcast.
1700 // TODO: Generalize for insert into any constant, not just undef?
1701 Value *ScalarSrc;
1702 if (match(VecOp, m_Undef()) &&
1703 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1704 (ScalarSrc->getType()->isIntegerTy() ||
1705 ScalarSrc->getType()->isFloatingPointTy())) {
1706 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1707 // bitcast (inselt undef, ScalarSrc, IdxOp)
1708 Type *ScalarTy = ScalarSrc->getType();
1709 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1710 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1711 : UndefValue::get(VecTy);
1712 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1713 return new BitCastInst(NewInsElt, IE.getType());
1714 }
1715
1716 // If the vector and scalar are both bitcast from the same element type, do
1717 // the insert in that source type followed by bitcast.
1718 Value *VecSrc;
1719 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1720 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1721 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1722 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1723 cast<VectorType>(VecSrc->getType())->getElementType() ==
1724 ScalarSrc->getType()) {
1725 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1726 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1727 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1728 return new BitCastInst(NewInsElt, IE.getType());
1729 }
1730
1731 // If the inserted element was extracted from some other fixed-length vector
1732 // and both indexes are valid constants, try to turn this into a shuffle.
1733 // Can not handle scalable vector type, the number of elements needed to
1734 // create shuffle mask is not a compile-time constant.
1735 uint64_t InsertedIdx, ExtractedIdx;
1736 Value *ExtVecOp;
1737 if (isa<FixedVectorType>(IE.getType()) &&
1738 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1739 match(ScalarOp,
1740 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1741 isa<FixedVectorType>(ExtVecOp->getType()) &&
1742 ExtractedIdx <
1743 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1744 // TODO: Looking at the user(s) to determine if this insert is a
1745 // fold-to-shuffle opportunity does not match the usual instcombine
1746 // constraints. We should decide if the transform is worthy based only
1747 // on this instruction and its operands, but that may not work currently.
1748 //
1749 // Here, we are trying to avoid creating shuffles before reaching
1750 // the end of a chain of extract-insert pairs. This is complicated because
1751 // we do not generally form arbitrary shuffle masks in instcombine
1752 // (because those may codegen poorly), but collectShuffleElements() does
1753 // exactly that.
1754 //
1755 // The rules for determining what is an acceptable target-independent
1756 // shuffle mask are fuzzy because they evolve based on the backend's
1757 // capabilities and real-world impact.
1758 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1759 if (!Insert.hasOneUse())
1760 return true;
1761 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1762 if (!InsertUser)
1763 return true;
1764 return false;
1765 };
1766
1767 // Try to form a shuffle from a chain of extract-insert ops.
1768 if (isShuffleRootCandidate(IE)) {
1769 bool Rerun = true;
1770 while (Rerun) {
1771 Rerun = false;
1772
1774 ShuffleOps LR =
1775 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1776
1777 // The proposed shuffle may be trivial, in which case we shouldn't
1778 // perform the combine.
1779 if (LR.first != &IE && LR.second != &IE) {
1780 // We now have a shuffle of LHS, RHS, Mask.
1781 if (LR.second == nullptr)
1782 LR.second = PoisonValue::get(LR.first->getType());
1783 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1784 }
1785 }
1786 }
1787 }
1788
1789 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1790 unsigned VWidth = VecTy->getNumElements();
1791 APInt PoisonElts(VWidth, 0);
1792 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1793 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1794 PoisonElts)) {
1795 if (V != &IE)
1796 return replaceInstUsesWith(IE, V);
1797 return &IE;
1798 }
1799 }
1800
1802 return Shuf;
1803
1804 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1805 return NewInsElt;
1806
1807 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1808 return Broadcast;
1809
1811 return Splat;
1812
1813 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1814 return IdentityShuf;
1815
1816 if (Instruction *Ext = narrowInsElt(IE, Builder))
1817 return Ext;
1818
1820 return Ext;
1821
1822 return nullptr;
1823}
1824
1825/// Return true if we can evaluate the specified expression tree if the vector
1826/// elements were shuffled in a different order.
1828 unsigned Depth = 5) {
1829 // We can always reorder the elements of a constant.
1830 if (isa<Constant>(V))
1831 return true;
1832
1833 // We won't reorder vector arguments. No IPO here.
1834 Instruction *I = dyn_cast<Instruction>(V);
1835 if (!I) return false;
1836
1837 // Two users may expect different orders of the elements. Don't try it.
1838 if (!I->hasOneUse())
1839 return false;
1840
1841 if (Depth == 0) return false;
1842
1843 switch (I->getOpcode()) {
1844 case Instruction::UDiv:
1845 case Instruction::SDiv:
1846 case Instruction::URem:
1847 case Instruction::SRem:
1848 // Propagating an undefined shuffle mask element to integer div/rem is not
1849 // allowed because those opcodes can create immediate undefined behavior
1850 // from an undefined element in an operand.
1851 if (llvm::is_contained(Mask, -1))
1852 return false;
1853 [[fallthrough]];
1854 case Instruction::Add:
1855 case Instruction::FAdd:
1856 case Instruction::Sub:
1857 case Instruction::FSub:
1858 case Instruction::Mul:
1859 case Instruction::FMul:
1860 case Instruction::FDiv:
1861 case Instruction::FRem:
1862 case Instruction::Shl:
1863 case Instruction::LShr:
1864 case Instruction::AShr:
1865 case Instruction::And:
1866 case Instruction::Or:
1867 case Instruction::Xor:
1868 case Instruction::ICmp:
1869 case Instruction::FCmp:
1870 case Instruction::Trunc:
1871 case Instruction::ZExt:
1872 case Instruction::SExt:
1873 case Instruction::FPToUI:
1874 case Instruction::FPToSI:
1875 case Instruction::UIToFP:
1876 case Instruction::SIToFP:
1877 case Instruction::FPTrunc:
1878 case Instruction::FPExt:
1879 case Instruction::GetElementPtr: {
1880 // Bail out if we would create longer vector ops. We could allow creating
1881 // longer vector ops, but that may result in more expensive codegen.
1882 Type *ITy = I->getType();
1883 if (ITy->isVectorTy() &&
1884 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1885 return false;
1886 for (Value *Operand : I->operands()) {
1887 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1888 return false;
1889 }
1890 return true;
1891 }
1892 case Instruction::InsertElement: {
1893 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1894 if (!CI) return false;
1895 int ElementNumber = CI->getLimitedValue();
1896
1897 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1898 // can't put an element into multiple indices.
1899 bool SeenOnce = false;
1900 for (int I : Mask) {
1901 if (I == ElementNumber) {
1902 if (SeenOnce)
1903 return false;
1904 SeenOnce = true;
1905 }
1906 }
1907 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1908 }
1909 }
1910 return false;
1911}
1912
1913/// Rebuild a new instruction just like 'I' but with the new operands given.
1914/// In the event of type mismatch, the type of the operands is correct.
1916 IRBuilderBase &Builder) {
1917 Builder.SetInsertPoint(I);
1918 switch (I->getOpcode()) {
1919 case Instruction::Add:
1920 case Instruction::FAdd:
1921 case Instruction::Sub:
1922 case Instruction::FSub:
1923 case Instruction::Mul:
1924 case Instruction::FMul:
1925 case Instruction::UDiv:
1926 case Instruction::SDiv:
1927 case Instruction::FDiv:
1928 case Instruction::URem:
1929 case Instruction::SRem:
1930 case Instruction::FRem:
1931 case Instruction::Shl:
1932 case Instruction::LShr:
1933 case Instruction::AShr:
1934 case Instruction::And:
1935 case Instruction::Or:
1936 case Instruction::Xor: {
1937 BinaryOperator *BO = cast<BinaryOperator>(I);
1938 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1939 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1940 NewOps[0], NewOps[1]);
1941 if (auto *NewI = dyn_cast<Instruction>(New)) {
1942 if (isa<OverflowingBinaryOperator>(BO)) {
1943 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1944 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1945 }
1946 if (isa<PossiblyExactOperator>(BO)) {
1947 NewI->setIsExact(BO->isExact());
1948 }
1949 if (isa<FPMathOperator>(BO))
1950 NewI->copyFastMathFlags(I);
1951 }
1952 return New;
1953 }
1954 case Instruction::ICmp:
1955 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1956 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1957 NewOps[1]);
1958 case Instruction::FCmp:
1959 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1960 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1961 NewOps[1]);
1962 case Instruction::Trunc:
1963 case Instruction::ZExt:
1964 case Instruction::SExt:
1965 case Instruction::FPToUI:
1966 case Instruction::FPToSI:
1967 case Instruction::UIToFP:
1968 case Instruction::SIToFP:
1969 case Instruction::FPTrunc:
1970 case Instruction::FPExt: {
1971 // It's possible that the mask has a different number of elements from
1972 // the original cast. We recompute the destination type to match the mask.
1973 Type *DestTy = VectorType::get(
1974 I->getType()->getScalarType(),
1975 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1976 assert(NewOps.size() == 1 && "cast with #ops != 1");
1977 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1978 DestTy);
1979 }
1980 case Instruction::GetElementPtr: {
1981 Value *Ptr = NewOps[0];
1982 ArrayRef<Value*> Idx = NewOps.slice(1);
1983 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1984 Ptr, Idx, "",
1985 cast<GEPOperator>(I)->isInBounds());
1986 }
1987 }
1988 llvm_unreachable("failed to rebuild vector instructions");
1989}
1990
1992 IRBuilderBase &Builder) {
1993 // Mask.size() does not need to be equal to the number of vector elements.
1994
1995 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1996 Type *EltTy = V->getType()->getScalarType();
1997
1998 if (isa<PoisonValue>(V))
1999 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2000
2001 if (match(V, m_Undef()))
2002 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2003
2004 if (isa<ConstantAggregateZero>(V))
2005 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2006
2007 if (Constant *C = dyn_cast<Constant>(V))
2009 Mask);
2010
2011 Instruction *I = cast<Instruction>(V);
2012 switch (I->getOpcode()) {
2013 case Instruction::Add:
2014 case Instruction::FAdd:
2015 case Instruction::Sub:
2016 case Instruction::FSub:
2017 case Instruction::Mul:
2018 case Instruction::FMul:
2019 case Instruction::UDiv:
2020 case Instruction::SDiv:
2021 case Instruction::FDiv:
2022 case Instruction::URem:
2023 case Instruction::SRem:
2024 case Instruction::FRem:
2025 case Instruction::Shl:
2026 case Instruction::LShr:
2027 case Instruction::AShr:
2028 case Instruction::And:
2029 case Instruction::Or:
2030 case Instruction::Xor:
2031 case Instruction::ICmp:
2032 case Instruction::FCmp:
2033 case Instruction::Trunc:
2034 case Instruction::ZExt:
2035 case Instruction::SExt:
2036 case Instruction::FPToUI:
2037 case Instruction::FPToSI:
2038 case Instruction::UIToFP:
2039 case Instruction::SIToFP:
2040 case Instruction::FPTrunc:
2041 case Instruction::FPExt:
2042 case Instruction::Select:
2043 case Instruction::GetElementPtr: {
2045 bool NeedsRebuild =
2046 (Mask.size() !=
2047 cast<FixedVectorType>(I->getType())->getNumElements());
2048 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2049 Value *V;
2050 // Recursively call evaluateInDifferentElementOrder on vector arguments
2051 // as well. E.g. GetElementPtr may have scalar operands even if the
2052 // return value is a vector, so we need to examine the operand type.
2053 if (I->getOperand(i)->getType()->isVectorTy())
2054 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2055 else
2056 V = I->getOperand(i);
2057 NewOps.push_back(V);
2058 NeedsRebuild |= (V != I->getOperand(i));
2059 }
2060 if (NeedsRebuild)
2061 return buildNew(I, NewOps, Builder);
2062 return I;
2063 }
2064 case Instruction::InsertElement: {
2065 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2066
2067 // The insertelement was inserting at Element. Figure out which element
2068 // that becomes after shuffling. The answer is guaranteed to be unique
2069 // by CanEvaluateShuffled.
2070 bool Found = false;
2071 int Index = 0;
2072 for (int e = Mask.size(); Index != e; ++Index) {
2073 if (Mask[Index] == Element) {
2074 Found = true;
2075 break;
2076 }
2077 }
2078
2079 // If element is not in Mask, no need to handle the operand 1 (element to
2080 // be inserted). Just evaluate values in operand 0 according to Mask.
2081 if (!Found)
2082 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2083
2084 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2085 Builder);
2086 Builder.SetInsertPoint(I);
2087 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2088 }
2089 }
2090 llvm_unreachable("failed to reorder elements of vector instruction!");
2091}
2092
2093// Returns true if the shuffle is extracting a contiguous range of values from
2094// LHS, for example:
2095// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2096// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2097// Shuffles to: |EE|FF|GG|HH|
2098// +--+--+--+--+
2100 ArrayRef<int> Mask) {
2101 unsigned LHSElems =
2102 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2103 unsigned MaskElems = Mask.size();
2104 unsigned BegIdx = Mask.front();
2105 unsigned EndIdx = Mask.back();
2106 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2107 return false;
2108 for (unsigned I = 0; I != MaskElems; ++I)
2109 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2110 return false;
2111 return true;
2112}
2113
2114/// These are the ingredients in an alternate form binary operator as described
2115/// below.
2121 Value *V0 = nullptr, Value *V1 = nullptr) :
2122 Opcode(Opc), Op0(V0), Op1(V1) {}
2123 operator bool() const { return Opcode != 0; }
2124};
2125
2126/// Binops may be transformed into binops with different opcodes and operands.
2127/// Reverse the usual canonicalization to enable folds with the non-canonical
2128/// form of the binop. If a transform is possible, return the elements of the
2129/// new binop. If not, return invalid elements.
2131 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2132 Type *Ty = BO->getType();
2133 switch (BO->getOpcode()) {
2134 case Instruction::Shl: {
2135 // shl X, C --> mul X, (1 << C)
2136 Constant *C;
2137 if (match(BO1, m_ImmConstant(C))) {
2139 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2140 assert(ShlOne && "Constant folding of immediate constants failed");
2141 return {Instruction::Mul, BO0, ShlOne};
2142 }
2143 break;
2144 }
2145 case Instruction::Or: {
2146 // or disjoin X, C --> add X, C
2147 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2148 return {Instruction::Add, BO0, BO1};
2149 break;
2150 }
2151 case Instruction::Sub:
2152 // sub 0, X --> mul X, -1
2153 if (match(BO0, m_ZeroInt()))
2154 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2155 break;
2156 default:
2157 break;
2158 }
2159 return {};
2160}
2161
2162/// A select shuffle of a select shuffle with a shared operand can be reduced
2163/// to a single select shuffle. This is an obvious improvement in IR, and the
2164/// backend is expected to lower select shuffles efficiently.
2166 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2167
2168 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2170 Shuf.getShuffleMask(Mask);
2171 unsigned NumElts = Mask.size();
2172
2173 // Canonicalize a select shuffle with common operand as Op1.
2174 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2175 if (ShufOp && ShufOp->isSelect() &&
2176 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2177 std::swap(Op0, Op1);
2179 }
2180
2181 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2182 if (!ShufOp || !ShufOp->isSelect() ||
2183 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2184 return nullptr;
2185
2186 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2188 ShufOp->getShuffleMask(Mask1);
2189 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2190
2191 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2192 if (Y == Op0) {
2193 std::swap(X, Y);
2195 }
2196
2197 // If the mask chooses from X (operand 0), it stays the same.
2198 // If the mask chooses from the earlier shuffle, the other mask value is
2199 // transferred to the combined select shuffle:
2200 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2201 SmallVector<int, 16> NewMask(NumElts);
2202 for (unsigned i = 0; i != NumElts; ++i)
2203 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2204
2205 // A select mask with undef elements might look like an identity mask.
2206 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2207 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2208 "Unexpected shuffle mask");
2209 return new ShuffleVectorInst(X, Y, NewMask);
2210}
2211
2213 const SimplifyQuery &SQ) {
2214 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2215
2216 // Are we shuffling together some value and that same value after it has been
2217 // modified by a binop with a constant?
2218 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2219 Constant *C;
2220 bool Op0IsBinop;
2221 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2222 Op0IsBinop = true;
2223 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2224 Op0IsBinop = false;
2225 else
2226 return nullptr;
2227
2228 // The identity constant for a binop leaves a variable operand unchanged. For
2229 // a vector, this is a splat of something like 0, -1, or 1.
2230 // If there's no identity constant for this binop, we're done.
2231 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2232 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2233 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2234 if (!IdC)
2235 return nullptr;
2236
2237 Value *X = Op0IsBinop ? Op1 : Op0;
2238
2239 // Prevent folding in the case the non-binop operand might have NaN values.
2240 // If X can have NaN elements then we have that the floating point math
2241 // operation in the transformed code may not preserve the exact NaN
2242 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2243 // This makes the transformation incorrect since the original program would
2244 // have preserved the exact NaN bit-pattern.
2245 // Avoid the folding if X can have NaN elements.
2246 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2247 !isKnownNeverNaN(X, 0, SQ))
2248 return nullptr;
2249
2250 // Shuffle identity constants into the lanes that return the original value.
2251 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2252 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2253 // The existing binop constant vector remains in the same operand position.
2254 ArrayRef<int> Mask = Shuf.getShuffleMask();
2255 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2257
2258 bool MightCreatePoisonOrUB =
2260 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2261 if (MightCreatePoisonOrUB)
2262 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2263
2264 // shuf (bop X, C), X, M --> bop X, C'
2265 // shuf X, (bop X, C), M --> bop X, C'
2266 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2267 NewBO->copyIRFlags(BO);
2268
2269 // An undef shuffle mask element may propagate as an undef constant element in
2270 // the new binop. That would produce poison where the original code might not.
2271 // If we already made a safe constant, then there's no danger.
2272 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2274 return NewBO;
2275}
2276
2277/// If we have an insert of a scalar to a non-zero element of an undefined
2278/// vector and then shuffle that value, that's the same as inserting to the zero
2279/// element and shuffling. Splatting from the zero element is recognized as the
2280/// canonical form of splat.
2282 InstCombiner::BuilderTy &Builder) {
2283 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2284 ArrayRef<int> Mask = Shuf.getShuffleMask();
2285 Value *X;
2286 uint64_t IndexC;
2287
2288 // Match a shuffle that is a splat to a non-zero element.
2290 m_ConstantInt(IndexC)))) ||
2291 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2292 return nullptr;
2293
2294 // Insert into element 0 of a poison vector.
2295 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2296 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2297
2298 // Splat from element 0. Any mask element that is poison remains poison.
2299 // For example:
2300 // shuf (inselt poison, X, 2), _, <2,2,undef>
2301 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2302 unsigned NumMaskElts =
2303 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2304 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2305 for (unsigned i = 0; i != NumMaskElts; ++i)
2306 if (Mask[i] == PoisonMaskElem)
2307 NewMask[i] = Mask[i];
2308
2309 return new ShuffleVectorInst(NewIns, NewMask);
2310}
2311
2312/// Try to fold shuffles that are the equivalent of a vector select.
2314 if (!Shuf.isSelect())
2315 return nullptr;
2316
2317 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2318 // Commuting undef to operand 0 conflicts with another canonicalization.
2319 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2320 if (!match(Shuf.getOperand(1), m_Undef()) &&
2321 Shuf.getMaskValue(0) >= (int)NumElts) {
2322 // TODO: Can we assert that both operands of a shuffle-select are not undef
2323 // (otherwise, it would have been folded by instsimplify?
2324 Shuf.commute();
2325 return &Shuf;
2326 }
2327
2329 return I;
2330
2332 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2333 return I;
2334
2335 BinaryOperator *B0, *B1;
2336 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2337 !match(Shuf.getOperand(1), m_BinOp(B1)))
2338 return nullptr;
2339
2340 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2341 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2342 // with a multiply, we will exit because C0/C1 will not be set.
2343 Value *X, *Y;
2344 Constant *C0 = nullptr, *C1 = nullptr;
2345 bool ConstantsAreOp1;
2346 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2347 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2348 ConstantsAreOp1 = false;
2349 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2350 m_Neg(m_Value(X)))) &&
2352 m_Neg(m_Value(Y)))))
2353 ConstantsAreOp1 = true;
2354 else
2355 return nullptr;
2356
2357 // We need matching binops to fold the lanes together.
2360 bool DropNSW = false;
2361 if (ConstantsAreOp1 && Opc0 != Opc1) {
2362 // TODO: We drop "nsw" if shift is converted into multiply because it may
2363 // not be correct when the shift amount is BitWidth - 1. We could examine
2364 // each vector element to determine if it is safe to keep that flag.
2365 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2366 DropNSW = true;
2367 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2368 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2369 Opc0 = AltB0.Opcode;
2370 C0 = cast<Constant>(AltB0.Op1);
2371 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2372 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2373 Opc1 = AltB1.Opcode;
2374 C1 = cast<Constant>(AltB1.Op1);
2375 }
2376 }
2377
2378 if (Opc0 != Opc1 || !C0 || !C1)
2379 return nullptr;
2380
2381 // The opcodes must be the same. Use a new name to make that clear.
2382 BinaryOperator::BinaryOps BOpc = Opc0;
2383
2384 // Select the constant elements needed for the single binop.
2385 ArrayRef<int> Mask = Shuf.getShuffleMask();
2386 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2387
2388 // We are moving a binop after a shuffle. When a shuffle has an undefined
2389 // mask element, the result is undefined, but it is not poison or undefined
2390 // behavior. That is not necessarily true for div/rem/shift.
2391 bool MightCreatePoisonOrUB =
2394 if (MightCreatePoisonOrUB)
2396 ConstantsAreOp1);
2397
2398 Value *V;
2399 if (X == Y) {
2400 // Remove a binop and the shuffle by rearranging the constant:
2401 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2402 // shuffle (op C0, V), (op C1, V), M --> op C', V
2403 V = X;
2404 } else {
2405 // If there are 2 different variable operands, we must create a new shuffle
2406 // (select) first, so check uses to ensure that we don't end up with more
2407 // instructions than we started with.
2408 if (!B0->hasOneUse() && !B1->hasOneUse())
2409 return nullptr;
2410
2411 // If we use the original shuffle mask and op1 is *variable*, we would be
2412 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2413 // poison. We do not have to guard against UB when *constants* are op1
2414 // because safe constants guarantee that we do not overflow sdiv/srem (and
2415 // there's no danger for other opcodes).
2416 // TODO: To allow this case, create a new shuffle mask with no undefs.
2417 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2418 return nullptr;
2419
2420 // Note: In general, we do not create new shuffles in InstCombine because we
2421 // do not know if a target can lower an arbitrary shuffle optimally. In this
2422 // case, the shuffle uses the existing mask, so there is no additional risk.
2423
2424 // Select the variable vectors first, then perform the binop:
2425 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2426 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2427 V = Builder.CreateShuffleVector(X, Y, Mask);
2428 }
2429
2430 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2431 Builder.CreateBinOp(BOpc, NewC, V);
2432
2433 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2434 // 1. If we changed an opcode, poison conditions might have changed.
2435 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2436 // where the original code did not. But if we already made a safe constant,
2437 // then there's no danger.
2438 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2439 NewI->copyIRFlags(B0);
2440 NewI->andIRFlags(B1);
2441 if (DropNSW)
2442 NewI->setHasNoSignedWrap(false);
2443 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2444 NewI->dropPoisonGeneratingFlags();
2445 }
2446 return replaceInstUsesWith(Shuf, NewBO);
2447}
2448
2449/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2450/// Example (little endian):
2451/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2453 bool IsBigEndian) {
2454 // This must be a bitcasted shuffle of 1 vector integer operand.
2455 Type *DestType = Shuf.getType();
2456 Value *X;
2457 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2458 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2459 return nullptr;
2460
2461 // The source type must have the same number of elements as the shuffle,
2462 // and the source element type must be larger than the shuffle element type.
2463 Type *SrcType = X->getType();
2464 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2465 cast<FixedVectorType>(SrcType)->getNumElements() !=
2466 cast<FixedVectorType>(DestType)->getNumElements() ||
2467 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2468 return nullptr;
2469
2470 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2471 "Expected a shuffle that decreases length");
2472
2473 // Last, check that the mask chooses the correct low bits for each narrow
2474 // element in the result.
2475 uint64_t TruncRatio =
2476 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2477 ArrayRef<int> Mask = Shuf.getShuffleMask();
2478 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2479 if (Mask[i] == PoisonMaskElem)
2480 continue;
2481 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2482 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2483 if (Mask[i] != (int)LSBIndex)
2484 return nullptr;
2485 }
2486
2487 return new TruncInst(X, DestType);
2488}
2489
2490/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2491/// narrowing (concatenating with poison and extracting back to the original
2492/// length). This allows replacing the wide select with a narrow select.
2494 InstCombiner::BuilderTy &Builder) {
2495 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2496 // of the 1st vector operand of a shuffle.
2497 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2498 return nullptr;
2499
2500 // The vector being shuffled must be a vector select that we can eliminate.
2501 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2502 Value *Cond, *X, *Y;
2503 if (!match(Shuf.getOperand(0),
2505 return nullptr;
2506
2507 // We need a narrow condition value. It must be extended with poison elements
2508 // and have the same number of elements as this shuffle.
2509 unsigned NarrowNumElts =
2510 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2511 Value *NarrowCond;
2512 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2513 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2514 NarrowNumElts ||
2515 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2516 return nullptr;
2517
2518 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2519 // -->
2520 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2521 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2522 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2523 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2524}
2525
2526/// Canonicalize FP negate/abs after shuffle.
2528 InstCombiner::BuilderTy &Builder) {
2529 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2530 Value *X;
2531 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2532 return nullptr;
2533
2534 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2535
2536 // Match 1-input (unary) shuffle.
2537 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2538 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
2539 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2540 if (IsFNeg)
2541 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2542
2544 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2545 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2546 NewF->setFastMathFlags(S0->getFastMathFlags());
2547 return NewF;
2548 }
2549
2550 // Match 2-input (binary) shuffle.
2551 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2552 Value *Y;
2553 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2554 S0->getOpcode() != S1->getOpcode() ||
2555 (!S0->hasOneUse() && !S1->hasOneUse()))
2556 return nullptr;
2557
2558 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2559 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2560 Instruction *NewF;
2561 if (IsFNeg) {
2562 NewF = UnaryOperator::CreateFNeg(NewShuf);
2563 } else {
2565 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2566 NewF = CallInst::Create(FAbs, {NewShuf});
2567 }
2568 NewF->copyIRFlags(S0);
2569 NewF->andIRFlags(S1);
2570 return NewF;
2571}
2572
2573/// Canonicalize casts after shuffle.
2575 InstCombiner::BuilderTy &Builder) {
2576 // Do we have 2 matching cast operands?
2577 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2578 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2579 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2580 Cast0->getSrcTy() != Cast1->getSrcTy())
2581 return nullptr;
2582
2583 // TODO: Allow other opcodes? That would require easing the type restrictions
2584 // below here.
2585 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2586 switch (CastOpcode) {
2587 case Instruction::FPToSI:
2588 case Instruction::FPToUI:
2589 case Instruction::SIToFP:
2590 case Instruction::UIToFP:
2591 break;
2592 default:
2593 return nullptr;
2594 }
2595
2596 VectorType *ShufTy = Shuf.getType();
2597 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2598 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2599
2600 // TODO: Allow length-increasing shuffles?
2601 if (ShufTy->getElementCount().getKnownMinValue() >
2602 ShufOpTy->getElementCount().getKnownMinValue())
2603 return nullptr;
2604
2605 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2606 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2607 "Expected fixed vector operands for casts and binary shuffle");
2608 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2609 return nullptr;
2610
2611 // At least one of the operands must have only one use (the shuffle).
2612 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2613 return nullptr;
2614
2615 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2616 Value *X = Cast0->getOperand(0);
2617 Value *Y = Cast1->getOperand(0);
2618 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2619 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2620}
2621
2622/// Try to fold an extract subvector operation.
2624 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2625 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2626 return nullptr;
2627
2628 // Check if we are extracting all bits of an inserted scalar:
2629 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2630 Value *X;
2631 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2632 X->getType()->getPrimitiveSizeInBits() ==
2634 return new BitCastInst(X, Shuf.getType());
2635
2636 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2637 Value *Y;
2638 ArrayRef<int> Mask;
2639 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2640 return nullptr;
2641
2642 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2643 // then combining may result in worse codegen.
2644 if (!Op0->hasOneUse())
2645 return nullptr;
2646
2647 // We are extracting a subvector from a shuffle. Remove excess elements from
2648 // the 1st shuffle mask to eliminate the extract.
2649 //
2650 // This transform is conservatively limited to identity extracts because we do
2651 // not allow arbitrary shuffle mask creation as a target-independent transform
2652 // (because we can't guarantee that will lower efficiently).
2653 //
2654 // If the extracting shuffle has an poison mask element, it transfers to the
2655 // new shuffle mask. Otherwise, copy the original mask element. Example:
2656 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2657 // shuf X, Y, <C0, poison, C2, poison>
2658 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2659 SmallVector<int, 16> NewMask(NumElts);
2660 assert(NumElts < Mask.size() &&
2661 "Identity with extract must have less elements than its inputs");
2662
2663 for (unsigned i = 0; i != NumElts; ++i) {
2664 int ExtractMaskElt = Shuf.getMaskValue(i);
2665 int MaskElt = Mask[i];
2666 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2667 }
2668 return new ShuffleVectorInst(X, Y, NewMask);
2669}
2670
2671/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2672/// operand with the operand of an insertelement.
2674 InstCombinerImpl &IC) {
2675 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2677 Shuf.getShuffleMask(Mask);
2678
2679 int NumElts = Mask.size();
2680 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2681
2682 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2683 // not be able to handle it there if the insertelement has >1 use.
2684 // If the shuffle has an insertelement operand but does not choose the
2685 // inserted scalar element from that value, then we can replace that shuffle
2686 // operand with the source vector of the insertelement.
2687 Value *X;
2688 uint64_t IdxC;
2689 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2690 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2691 if (!is_contained(Mask, (int)IdxC))
2692 return IC.replaceOperand(Shuf, 0, X);
2693 }
2694 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2695 // Offset the index constant by the vector width because we are checking for
2696 // accesses to the 2nd vector input of the shuffle.
2697 IdxC += InpNumElts;
2698 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2699 if (!is_contained(Mask, (int)IdxC))
2700 return IC.replaceOperand(Shuf, 1, X);
2701 }
2702 // For the rest of the transform, the shuffle must not change vector sizes.
2703 // TODO: This restriction could be removed if the insert has only one use
2704 // (because the transform would require a new length-changing shuffle).
2705 if (NumElts != InpNumElts)
2706 return nullptr;
2707
2708 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2709 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2710 // We need an insertelement with a constant index.
2711 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2712 m_ConstantInt(IndexC))))
2713 return false;
2714
2715 // Test the shuffle mask to see if it splices the inserted scalar into the
2716 // operand 1 vector of the shuffle.
2717 int NewInsIndex = -1;
2718 for (int i = 0; i != NumElts; ++i) {
2719 // Ignore undef mask elements.
2720 if (Mask[i] == -1)
2721 continue;
2722
2723 // The shuffle takes elements of operand 1 without lane changes.
2724 if (Mask[i] == NumElts + i)
2725 continue;
2726
2727 // The shuffle must choose the inserted scalar exactly once.
2728 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2729 return false;
2730
2731 // The shuffle is placing the inserted scalar into element i.
2732 NewInsIndex = i;
2733 }
2734
2735 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2736
2737 // Index is updated to the potentially translated insertion lane.
2738 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2739 return true;
2740 };
2741
2742 // If the shuffle is unnecessary, insert the scalar operand directly into
2743 // operand 1 of the shuffle. Example:
2744 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2745 Value *Scalar;
2746 ConstantInt *IndexC;
2747 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2748 return InsertElementInst::Create(V1, Scalar, IndexC);
2749
2750 // Try again after commuting shuffle. Example:
2751 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2752 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2753 std::swap(V0, V1);
2755 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2756 return InsertElementInst::Create(V1, Scalar, IndexC);
2757
2758 return nullptr;
2759}
2760
2762 // Match the operands as identity with padding (also known as concatenation
2763 // with undef) shuffles of the same source type. The backend is expected to
2764 // recreate these concatenations from a shuffle of narrow operands.
2765 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2766 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2767 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2768 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2769 return nullptr;
2770
2771 // We limit this transform to power-of-2 types because we expect that the
2772 // backend can convert the simplified IR patterns to identical nodes as the
2773 // original IR.
2774 // TODO: If we can verify the same behavior for arbitrary types, the
2775 // power-of-2 checks can be removed.
2776 Value *X = Shuffle0->getOperand(0);
2777 Value *Y = Shuffle1->getOperand(0);
2778 if (X->getType() != Y->getType() ||
2779 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2781 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2782 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2783 match(X, m_Undef()) || match(Y, m_Undef()))
2784 return nullptr;
2785 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2786 match(Shuffle1->getOperand(1), m_Undef()) &&
2787 "Unexpected operand for identity shuffle");
2788
2789 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2790 // operands directly by adjusting the shuffle mask to account for the narrower
2791 // types:
2792 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2793 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2794 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2795 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2796
2797 ArrayRef<int> Mask = Shuf.getShuffleMask();
2798 SmallVector<int, 16> NewMask(Mask.size(), -1);
2799 for (int i = 0, e = Mask.size(); i != e; ++i) {
2800 if (Mask[i] == -1)
2801 continue;
2802
2803 // If this shuffle is choosing an undef element from 1 of the sources, that
2804 // element is undef.
2805 if (Mask[i] < WideElts) {
2806 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2807 continue;
2808 } else {
2809 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2810 continue;
2811 }
2812
2813 // If this shuffle is choosing from the 1st narrow op, the mask element is
2814 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2815 // element is offset down to adjust for the narrow vector widths.
2816 if (Mask[i] < WideElts) {
2817 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2818 NewMask[i] = Mask[i];
2819 } else {
2820 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2821 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2822 }
2823 }
2824 return new ShuffleVectorInst(X, Y, NewMask);
2825}
2826
2827// Splatting the first element of the result of a BinOp, where any of the
2828// BinOp's operands are the result of a first element splat can be simplified to
2829// splatting the first element of the result of the BinOp
2831 if (!match(SVI.getOperand(1), m_Poison()) ||
2832 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2833 !SVI.getOperand(0)->hasOneUse())
2834 return nullptr;
2835
2836 Value *Op0 = SVI.getOperand(0);
2837 Value *X, *Y;
2839 m_Value(Y))) &&
2840 !match(Op0, m_BinOp(m_Value(X),
2842 return nullptr;
2843 if (X->getType() != Y->getType())
2844 return nullptr;
2845
2846 auto *BinOp = cast<BinaryOperator>(Op0);
2848 return nullptr;
2849
2850 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2851 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2852 NewBOI->copyIRFlags(BinOp);
2853
2854 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2855}
2856
2858 Value *LHS = SVI.getOperand(0);
2859 Value *RHS = SVI.getOperand(1);
2860 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2861 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2862 SVI.getType(), ShufQuery))
2863 return replaceInstUsesWith(SVI, V);
2864
2865 if (Instruction *I = simplifyBinOpSplats(SVI))
2866 return I;
2867
2868 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2869 // order to support scalable vectors.
2870 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2871 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2872
2873 if (isa<ScalableVectorType>(LHS->getType()))
2874 return nullptr;
2875
2876 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2877 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2878
2879 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2880 //
2881 // if X and Y are of the same (vector) type, and the element size is not
2882 // changed by the bitcasts, we can distribute the bitcasts through the
2883 // shuffle, hopefully reducing the number of instructions. We make sure that
2884 // at least one bitcast only has one use, so we don't *increase* the number of
2885 // instructions here.
2886 Value *X, *Y;
2888 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2889 X->getType()->getScalarSizeInBits() ==
2890 SVI.getType()->getScalarSizeInBits() &&
2891 (LHS->hasOneUse() || RHS->hasOneUse())) {
2893 SVI.getName() + ".uncasted");
2894 return new BitCastInst(V, SVI.getType());
2895 }
2896
2897 ArrayRef<int> Mask = SVI.getShuffleMask();
2898
2899 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2900 // simulated shuffle can simplify, then this shuffle is unnecessary:
2901 // shuf (bitcast X), undef, Mask --> bitcast X'
2902 // TODO: This could be extended to allow length-changing shuffles.
2903 // The transform might also be obsoleted if we allowed canonicalization
2904 // of bitcasted shuffles.
2905 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2906 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2907 // Try to create a scaled mask constant.
2908 auto *XType = cast<FixedVectorType>(X->getType());
2909 unsigned XNumElts = XType->getNumElements();
2910 SmallVector<int, 16> ScaledMask;
2911 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2912 // If the shuffled source vector simplifies, cast that value to this
2913 // shuffle's type.
2914 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2915 ScaledMask, XType, ShufQuery))
2916 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2917 }
2918 }
2919
2920 // shuffle x, x, mask --> shuffle x, undef, mask'
2921 if (LHS == RHS) {
2922 assert(!match(RHS, m_Undef()) &&
2923 "Shuffle with 2 undef ops not simplified?");
2924 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2925 }
2926
2927 // shuffle undef, x, mask --> shuffle x, undef, mask'
2928 if (match(LHS, m_Undef())) {
2929 SVI.commute();
2930 return &SVI;
2931 }
2932
2934 return I;
2935
2936 if (Instruction *I = foldSelectShuffle(SVI))
2937 return I;
2938
2940 return I;
2941
2943 return I;
2944
2946 return I;
2947
2948 if (Instruction *I = foldCastShuffle(SVI, Builder))
2949 return I;
2950
2951 APInt PoisonElts(VWidth, 0);
2952 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2953 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2954 if (V != &SVI)
2955 return replaceInstUsesWith(SVI, V);
2956 return &SVI;
2957 }
2958
2960 return I;
2961
2962 // These transforms have the potential to lose undef knowledge, so they are
2963 // intentionally placed after SimplifyDemandedVectorElts().
2964 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2965 return I;
2967 return I;
2968
2969 if (match(RHS, m_Constant())) {
2970 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
2971 // We cannot do this fold for elementwise select since ShuffleVector is
2972 // not elementwise.
2973 if (SI->getCondition()->getType()->isIntegerTy() &&
2974 (isa<PoisonValue>(RHS) ||
2975 isGuaranteedNotToBePoison(SI->getCondition()))) {
2976 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
2977 return I;
2978 }
2979 }
2980 if (auto *PN = dyn_cast<PHINode>(LHS)) {
2981 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
2982 return I;
2983 }
2984 }
2985
2986 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
2988 return replaceInstUsesWith(SVI, V);
2989 }
2990
2991 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2992 // a non-vector type. We can instead bitcast the original vector followed by
2993 // an extract of the desired element:
2994 //
2995 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2996 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2997 // %1 = bitcast <4 x i8> %sroa to i32
2998 // Becomes:
2999 // %bc = bitcast <16 x i8> %in to <4 x i32>
3000 // %ext = extractelement <4 x i32> %bc, i32 0
3001 //
3002 // If the shuffle is extracting a contiguous range of values from the input
3003 // vector then each use which is a bitcast of the extracted size can be
3004 // replaced. This will work if the vector types are compatible, and the begin
3005 // index is aligned to a value in the casted vector type. If the begin index
3006 // isn't aligned then we can shuffle the original vector (keeping the same
3007 // vector type) before extracting.
3008 //
3009 // This code will bail out if the target type is fundamentally incompatible
3010 // with vectors of the source type.
3011 //
3012 // Example of <16 x i8>, target type i32:
3013 // Index range [4,8): v-----------v Will work.
3014 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3015 // <16 x i8>: | | | | | | | | | | | | | | | | |
3016 // <4 x i32>: | | | | |
3017 // +-----------+-----------+-----------+-----------+
3018 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3019 // Target type i40: ^--------------^ Won't work, bail.
3020 bool MadeChange = false;
3021 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3022 Value *V = LHS;
3023 unsigned MaskElems = Mask.size();
3024 auto *SrcTy = cast<FixedVectorType>(V->getType());
3025 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
3026 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3027 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3028 unsigned SrcNumElems = SrcTy->getNumElements();
3031 for (User *U : SVI.users())
3032 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
3033 if (!BC->use_empty())
3034 // Only visit bitcasts that weren't previously handled.
3035 BCs.push_back(BC);
3036 for (BitCastInst *BC : BCs) {
3037 unsigned BegIdx = Mask.front();
3038 Type *TgtTy = BC->getDestTy();
3039 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3040 if (!TgtElemBitWidth)
3041 continue;
3042 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3043 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3044 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3045 if (!VecBitWidthsEqual)
3046 continue;
3048 continue;
3049 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3050 if (!BegIsAligned) {
3051 // Shuffle the input so [0,NumElements) contains the output, and
3052 // [NumElems,SrcNumElems) is undef.
3053 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3054 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3055 ShuffleMask[I] = Idx;
3056 V = Builder.CreateShuffleVector(V, ShuffleMask,
3057 SVI.getName() + ".extract");
3058 BegIdx = 0;
3059 }
3060 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3061 assert(SrcElemsPerTgtElem);
3062 BegIdx /= SrcElemsPerTgtElem;
3063 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
3064 auto *NewBC =
3065 BCAlreadyExists
3066 ? NewBCs[CastSrcTy]
3067 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3068 if (!BCAlreadyExists)
3069 NewBCs[CastSrcTy] = NewBC;
3070 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
3071 SVI.getName() + ".extract");
3072 // The shufflevector isn't being replaced: the bitcast that used it
3073 // is. InstCombine will visit the newly-created instructions.
3074 replaceInstUsesWith(*BC, Ext);
3075 MadeChange = true;
3076 }
3077 }
3078
3079 // If the LHS is a shufflevector itself, see if we can combine it with this
3080 // one without producing an unusual shuffle.
3081 // Cases that might be simplified:
3082 // 1.
3083 // x1=shuffle(v1,v2,mask1)
3084 // x=shuffle(x1,undef,mask)
3085 // ==>
3086 // x=shuffle(v1,undef,newMask)
3087 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3088 // 2.
3089 // x1=shuffle(v1,undef,mask1)
3090 // x=shuffle(x1,x2,mask)
3091 // where v1.size() == mask1.size()
3092 // ==>
3093 // x=shuffle(v1,x2,newMask)
3094 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3095 // 3.
3096 // x2=shuffle(v2,undef,mask2)
3097 // x=shuffle(x1,x2,mask)
3098 // where v2.size() == mask2.size()
3099 // ==>
3100 // x=shuffle(x1,v2,newMask)
3101 // newMask[i] = (mask[i] < x1.size())
3102 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3103 // 4.
3104 // x1=shuffle(v1,undef,mask1)
3105 // x2=shuffle(v2,undef,mask2)
3106 // x=shuffle(x1,x2,mask)
3107 // where v1.size() == v2.size()
3108 // ==>
3109 // x=shuffle(v1,v2,newMask)
3110 // newMask[i] = (mask[i] < x1.size())
3111 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3112 //
3113 // Here we are really conservative:
3114 // we are absolutely afraid of producing a shuffle mask not in the input
3115 // program, because the code gen may not be smart enough to turn a merged
3116 // shuffle into two specific shuffles: it may produce worse code. As such,
3117 // we only merge two shuffles if the result is either a splat or one of the
3118 // input shuffle masks. In this case, merging the shuffles just removes
3119 // one instruction, which we know is safe. This is good for things like
3120 // turning: (splat(splat)) -> splat, or
3121 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3122 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3123 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3124 if (LHSShuffle)
3125 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3126 !match(RHS, m_Poison()))
3127 LHSShuffle = nullptr;
3128 if (RHSShuffle)
3129 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3130 RHSShuffle = nullptr;
3131 if (!LHSShuffle && !RHSShuffle)
3132 return MadeChange ? &SVI : nullptr;
3133
3134 Value* LHSOp0 = nullptr;
3135 Value* LHSOp1 = nullptr;
3136 Value* RHSOp0 = nullptr;
3137 unsigned LHSOp0Width = 0;
3138 unsigned RHSOp0Width = 0;
3139 if (LHSShuffle) {
3140 LHSOp0 = LHSShuffle->getOperand(0);
3141 LHSOp1 = LHSShuffle->getOperand(1);
3142 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3143 }
3144 if (RHSShuffle) {
3145 RHSOp0 = RHSShuffle->getOperand(0);
3146 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3147 }
3148 Value* newLHS = LHS;
3149 Value* newRHS = RHS;
3150 if (LHSShuffle) {
3151 // case 1
3152 if (match(RHS, m_Poison())) {
3153 newLHS = LHSOp0;
3154 newRHS = LHSOp1;
3155 }
3156 // case 2 or 4
3157 else if (LHSOp0Width == LHSWidth) {
3158 newLHS = LHSOp0;
3159 }
3160 }
3161 // case 3 or 4
3162 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3163 newRHS = RHSOp0;
3164 }
3165 // case 4
3166 if (LHSOp0 == RHSOp0) {
3167 newLHS = LHSOp0;
3168 newRHS = nullptr;
3169 }
3170
3171 if (newLHS == LHS && newRHS == RHS)
3172 return MadeChange ? &SVI : nullptr;
3173
3174 ArrayRef<int> LHSMask;
3175 ArrayRef<int> RHSMask;
3176 if (newLHS != LHS)
3177 LHSMask = LHSShuffle->getShuffleMask();
3178 if (RHSShuffle && newRHS != RHS)
3179 RHSMask = RHSShuffle->getShuffleMask();
3180
3181 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3182 SmallVector<int, 16> newMask;
3183 bool isSplat = true;
3184 int SplatElt = -1;
3185 // Create a new mask for the new ShuffleVectorInst so that the new
3186 // ShuffleVectorInst is equivalent to the original one.
3187 for (unsigned i = 0; i < VWidth; ++i) {
3188 int eltMask;
3189 if (Mask[i] < 0) {
3190 // This element is a poison value.
3191 eltMask = -1;
3192 } else if (Mask[i] < (int)LHSWidth) {
3193 // This element is from left hand side vector operand.
3194 //
3195 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3196 // new mask value for the element.
3197 if (newLHS != LHS) {
3198 eltMask = LHSMask[Mask[i]];
3199 // If the value selected is an poison value, explicitly specify it
3200 // with a -1 mask value.
3201 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3202 eltMask = -1;
3203 } else
3204 eltMask = Mask[i];
3205 } else {
3206 // This element is from right hand side vector operand
3207 //
3208 // If the value selected is a poison value, explicitly specify it
3209 // with a -1 mask value. (case 1)
3210 if (match(RHS, m_Poison()))
3211 eltMask = -1;
3212 // If RHS is going to be replaced (case 3 or 4), calculate the
3213 // new mask value for the element.
3214 else if (newRHS != RHS) {
3215 eltMask = RHSMask[Mask[i]-LHSWidth];
3216 // If the value selected is an poison value, explicitly specify it
3217 // with a -1 mask value.
3218 if (eltMask >= (int)RHSOp0Width) {
3219 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3220 "should have been check above");
3221 eltMask = -1;
3222 }
3223 } else
3224 eltMask = Mask[i]-LHSWidth;
3225
3226 // If LHS's width is changed, shift the mask value accordingly.
3227 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3228 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3229 // If newRHS == newLHS, we want to remap any references from newRHS to
3230 // newLHS so that we can properly identify splats that may occur due to
3231 // obfuscation across the two vectors.
3232 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3233 eltMask += newLHSWidth;
3234 }
3235
3236 // Check if this could still be a splat.
3237 if (eltMask >= 0) {
3238 if (SplatElt >= 0 && SplatElt != eltMask)
3239 isSplat = false;
3240 SplatElt = eltMask;
3241 }
3242
3243 newMask.push_back(eltMask);
3244 }
3245
3246 // If the result mask is equal to one of the original shuffle masks,
3247 // or is a splat, do the replacement.
3248 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3249 if (!newRHS)
3250 newRHS = PoisonValue::get(newLHS->getType());
3251 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3252 }
3253
3254 return MadeChange ? &SVI : nullptr;
3255}
static const LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1111
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BinaryOps getOpcode() const
Definition: InstrTypes.h:370
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:218
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:758
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2599
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2691
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:258
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:435
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isBigEndian() const
Definition: DataLayout.h:198
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:617
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2393
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:488
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2525
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1689
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:77
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:394
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:383
const DataLayout & DL
Definition: InstCombiner.h:76
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:338
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:418
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:286
BuilderTy & Builder
Definition: InstCombiner.h:61
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:344
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:282
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:280
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1460
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:243
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:74
@ StructTyID
Structures.
Definition: Type.h:73
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:138
UnaryOps getOpcode() const
Definition: InstrTypes.h:153
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:146
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1067
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I)
Don't use information from its non-constant operands.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const