LLVM 19.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if (!ShiftAmountC ||
209 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210 Ext.getVectorOperand()->hasOneUse())) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 if (IndexC) {
423 // Canonicalize type of constant indices to i64 to simplify CSE
424 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
425 return replaceOperand(EI, 1, NewIdx);
426
428 unsigned NumElts = EC.getKnownMinValue();
429
430 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
431 Intrinsic::ID IID = II->getIntrinsicID();
432 // Index needs to be lower than the minimum size of the vector, because
433 // for scalable vector, the vector size is known at run time.
434 if (IID == Intrinsic::experimental_stepvector &&
435 IndexC->getValue().ult(NumElts)) {
436 Type *Ty = EI.getType();
437 unsigned BitWidth = Ty->getIntegerBitWidth();
438 Value *Idx;
439 // Return index when its value does not exceed the allowed limit
440 // for the element type of the vector, otherwise return undefined.
441 if (IndexC->getValue().getActiveBits() <= BitWidth)
442 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
443 else
444 Idx = PoisonValue::get(Ty);
445 return replaceInstUsesWith(EI, Idx);
446 }
447 }
448
449 // InstSimplify should handle cases where the index is invalid.
450 // For fixed-length vector, it's invalid to extract out-of-range element.
451 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
452 return nullptr;
453
454 if (Instruction *I = foldBitcastExtElt(EI))
455 return I;
456
457 // If there's a vector PHI feeding a scalar use through this extractelement
458 // instruction, try to scalarize the PHI.
459 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
460 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
461 return ScalarPHI;
462 }
463
464 // TODO come up with a n-ary matcher that subsumes both unary and
465 // binary matchers.
466 UnaryOperator *UO;
467 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
468 // extelt (unop X), Index --> unop (extelt X, Index)
469 Value *X = UO->getOperand(0);
472 }
473
474 BinaryOperator *BO;
475 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
477 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
480 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
481 }
482
483 Value *X, *Y;
485 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
486 cheapToScalarize(SrcVec, Index)) {
487 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
490 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
491 }
492
493 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
494 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
495 // instsimplify already handled the case where the indices are constants
496 // and equal by value, if both are constants, they must not be the same
497 // value, extract from the pre-inserted value instead.
498 if (isa<Constant>(IE->getOperand(2)) && IndexC)
499 return replaceOperand(EI, 0, IE->getOperand(0));
500 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
501 auto *VecType = cast<VectorType>(GEP->getType());
502 ElementCount EC = VecType->getElementCount();
503 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
504 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
505 // Find out why we have a vector result - these are a few examples:
506 // 1. We have a scalar pointer and a vector of indices, or
507 // 2. We have a vector of pointers and a scalar index, or
508 // 3. We have a vector of pointers and a vector of indices, etc.
509 // Here we only consider combining when there is exactly one vector
510 // operand, since the optimization is less obviously a win due to
511 // needing more than one extractelements.
512
513 unsigned VectorOps =
514 llvm::count_if(GEP->operands(), [](const Value *V) {
515 return isa<VectorType>(V->getType());
516 });
517 if (VectorOps == 1) {
518 Value *NewPtr = GEP->getPointerOperand();
519 if (isa<VectorType>(NewPtr->getType()))
520 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
521
523 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
524 Value *Op = GEP->getOperand(I);
525 if (isa<VectorType>(Op->getType()))
526 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
527 else
528 NewOps.push_back(Op);
529 }
530
532 GEP->getSourceElementType(), NewPtr, NewOps);
533 NewGEP->setIsInBounds(GEP->isInBounds());
534 return NewGEP;
535 }
536 }
537 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
538 // If this is extracting an element from a shufflevector, figure out where
539 // it came from and extract from the appropriate input element instead.
540 // Restrict the following transformation to fixed-length vector.
541 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
542 int SrcIdx =
543 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
544 Value *Src;
545 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
546 ->getNumElements();
547
548 if (SrcIdx < 0)
550 if (SrcIdx < (int)LHSWidth)
551 Src = SVI->getOperand(0);
552 else {
553 SrcIdx -= LHSWidth;
554 Src = SVI->getOperand(1);
555 }
556 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
558 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
559 }
560 } else if (auto *CI = dyn_cast<CastInst>(I)) {
561 // Canonicalize extractelement(cast) -> cast(extractelement).
562 // Bitcasts can change the number of vector elements, and they cost
563 // nothing.
564 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
565 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
566 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
567 }
568 }
569 }
570
571 // Run demanded elements after other transforms as this can drop flags on
572 // binops. If there's two paths to the same final result, we prefer the
573 // one which doesn't force us to drop flags.
574 if (IndexC) {
576 unsigned NumElts = EC.getKnownMinValue();
577 // This instruction only demands the single element from the input vector.
578 // Skip for scalable type, the number of elements is unknown at
579 // compile-time.
580 if (!EC.isScalable() && NumElts != 1) {
581 // If the input vector has a single use, simplify it based on this use
582 // property.
583 if (SrcVec->hasOneUse()) {
584 APInt PoisonElts(NumElts, 0);
585 APInt DemandedElts(NumElts, 0);
586 DemandedElts.setBit(IndexC->getZExtValue());
587 if (Value *V =
588 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
589 return replaceOperand(EI, 0, V);
590 } else {
591 // If the input vector has multiple uses, simplify it based on a union
592 // of all elements used.
593 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
594 if (!DemandedElts.isAllOnes()) {
595 APInt PoisonElts(NumElts, 0);
597 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
598 true /* AllowMultipleUsers */)) {
599 if (V != SrcVec) {
600 Worklist.addValue(SrcVec);
601 SrcVec->replaceAllUsesWith(V);
602 return &EI;
603 }
604 }
605 }
606 }
607 }
608 }
609 return nullptr;
610}
611
612/// If V is a shuffle of values that ONLY returns elements from either LHS or
613/// RHS, return the shuffle mask and true. Otherwise, return false.
615 SmallVectorImpl<int> &Mask) {
616 assert(LHS->getType() == RHS->getType() &&
617 "Invalid CollectSingleShuffleElements");
618 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
619
620 if (match(V, m_Undef())) {
621 Mask.assign(NumElts, -1);
622 return true;
623 }
624
625 if (V == LHS) {
626 for (unsigned i = 0; i != NumElts; ++i)
627 Mask.push_back(i);
628 return true;
629 }
630
631 if (V == RHS) {
632 for (unsigned i = 0; i != NumElts; ++i)
633 Mask.push_back(i + NumElts);
634 return true;
635 }
636
637 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
638 // If this is an insert of an extract from some other vector, include it.
639 Value *VecOp = IEI->getOperand(0);
640 Value *ScalarOp = IEI->getOperand(1);
641 Value *IdxOp = IEI->getOperand(2);
642
643 if (!isa<ConstantInt>(IdxOp))
644 return false;
645 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
646
647 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
648 // We can handle this if the vector we are inserting into is
649 // transitively ok.
650 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
651 // If so, update the mask to reflect the inserted poison.
652 Mask[InsertedIdx] = -1;
653 return true;
654 }
655 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
656 if (isa<ConstantInt>(EI->getOperand(1))) {
657 unsigned ExtractedIdx =
658 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
659 unsigned NumLHSElts =
660 cast<FixedVectorType>(LHS->getType())->getNumElements();
661
662 // This must be extracting from either LHS or RHS.
663 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
664 // We can handle this if the vector we are inserting into is
665 // transitively ok.
666 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
667 // If so, update the mask to reflect the inserted value.
668 if (EI->getOperand(0) == LHS) {
669 Mask[InsertedIdx % NumElts] = ExtractedIdx;
670 } else {
671 assert(EI->getOperand(0) == RHS);
672 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
673 }
674 return true;
675 }
676 }
677 }
678 }
679 }
680
681 return false;
682}
683
684/// If we have insertion into a vector that is wider than the vector that we
685/// are extracting from, try to widen the source vector to allow a single
686/// shufflevector to replace one or more insert/extract pairs.
688 ExtractElementInst *ExtElt,
689 InstCombinerImpl &IC) {
690 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
691 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
692 unsigned NumInsElts = InsVecType->getNumElements();
693 unsigned NumExtElts = ExtVecType->getNumElements();
694
695 // The inserted-to vector must be wider than the extracted-from vector.
696 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
697 NumExtElts >= NumInsElts)
698 return false;
699
700 // Create a shuffle mask to widen the extended-from vector using poison
701 // values. The mask selects all of the values of the original vector followed
702 // by as many poison values as needed to create a vector of the same length
703 // as the inserted-to vector.
704 SmallVector<int, 16> ExtendMask;
705 for (unsigned i = 0; i < NumExtElts; ++i)
706 ExtendMask.push_back(i);
707 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
708 ExtendMask.push_back(-1);
709
710 Value *ExtVecOp = ExtElt->getVectorOperand();
711 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
712 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
713 ? ExtVecOpInst->getParent()
714 : ExtElt->getParent();
715
716 // TODO: This restriction matches the basic block check below when creating
717 // new extractelement instructions. If that limitation is removed, this one
718 // could also be removed. But for now, we just bail out to ensure that we
719 // will replace the extractelement instruction that is feeding our
720 // insertelement instruction. This allows the insertelement to then be
721 // replaced by a shufflevector. If the insertelement is not replaced, we can
722 // induce infinite looping because there's an optimization for extractelement
723 // that will delete our widening shuffle. This would trigger another attempt
724 // here to create that shuffle, and we spin forever.
725 if (InsertionBlock != InsElt->getParent())
726 return false;
727
728 // TODO: This restriction matches the check in visitInsertElementInst() and
729 // prevents an infinite loop caused by not turning the extract/insert pair
730 // into a shuffle. We really should not need either check, but we're lacking
731 // folds for shufflevectors because we're afraid to generate shuffle masks
732 // that the backend can't handle.
733 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
734 return false;
735
736 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
737
738 // Insert the new shuffle after the vector operand of the extract is defined
739 // (as long as it's not a PHI) or at the start of the basic block of the
740 // extract, so any subsequent extracts in the same basic block can use it.
741 // TODO: Insert before the earliest ExtractElementInst that is replaced.
742 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
743 WideVec->insertAfter(ExtVecOpInst);
744 else
745 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
746
747 // Replace extracts from the original narrow vector with extracts from the new
748 // wide vector.
749 for (User *U : ExtVecOp->users()) {
750 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
751 if (!OldExt || OldExt->getParent() != WideVec->getParent())
752 continue;
753 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
754 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
755 IC.replaceInstUsesWith(*OldExt, NewExt);
756 // Add the old extracts to the worklist for DCE. We can't remove the
757 // extracts directly, because they may still be used by the calling code.
758 IC.addToWorklist(OldExt);
759 }
760
761 return true;
762}
763
764/// We are building a shuffle to create V, which is a sequence of insertelement,
765/// extractelement pairs. If PermittedRHS is set, then we must either use it or
766/// not rely on the second vector source. Return a std::pair containing the
767/// left and right vectors of the proposed shuffle (or 0), and set the Mask
768/// parameter as required.
769///
770/// Note: we intentionally don't try to fold earlier shuffles since they have
771/// often been chosen carefully to be efficiently implementable on the target.
772using ShuffleOps = std::pair<Value *, Value *>;
773
775 Value *PermittedRHS,
776 InstCombinerImpl &IC, bool &Rerun) {
777 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
778 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
779
780 if (match(V, m_Poison())) {
781 Mask.assign(NumElts, -1);
782 return std::make_pair(
783 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
784 }
785
786 if (isa<ConstantAggregateZero>(V)) {
787 Mask.assign(NumElts, 0);
788 return std::make_pair(V, nullptr);
789 }
790
791 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
792 // If this is an insert of an extract from some other vector, include it.
793 Value *VecOp = IEI->getOperand(0);
794 Value *ScalarOp = IEI->getOperand(1);
795 Value *IdxOp = IEI->getOperand(2);
796
797 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
798 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
799 unsigned ExtractedIdx =
800 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
801 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
802
803 // Either the extracted from or inserted into vector must be RHSVec,
804 // otherwise we'd end up with a shuffle of three inputs.
805 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
806 Value *RHS = EI->getOperand(0);
807 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
808 assert(LR.second == nullptr || LR.second == RHS);
809
810 if (LR.first->getType() != RHS->getType()) {
811 // Although we are giving up for now, see if we can create extracts
812 // that match the inserts for another round of combining.
813 if (replaceExtractElements(IEI, EI, IC))
814 Rerun = true;
815
816 // We tried our best, but we can't find anything compatible with RHS
817 // further up the chain. Return a trivial shuffle.
818 for (unsigned i = 0; i < NumElts; ++i)
819 Mask[i] = i;
820 return std::make_pair(V, nullptr);
821 }
822
823 unsigned NumLHSElts =
824 cast<FixedVectorType>(RHS->getType())->getNumElements();
825 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
826 return std::make_pair(LR.first, RHS);
827 }
828
829 if (VecOp == PermittedRHS) {
830 // We've gone as far as we can: anything on the other side of the
831 // extractelement will already have been converted into a shuffle.
832 unsigned NumLHSElts =
833 cast<FixedVectorType>(EI->getOperand(0)->getType())
834 ->getNumElements();
835 for (unsigned i = 0; i != NumElts; ++i)
836 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
837 return std::make_pair(EI->getOperand(0), PermittedRHS);
838 }
839
840 // If this insertelement is a chain that comes from exactly these two
841 // vectors, return the vector and the effective shuffle.
842 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
843 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
844 Mask))
845 return std::make_pair(EI->getOperand(0), PermittedRHS);
846 }
847 }
848 }
849
850 // Otherwise, we can't do anything fancy. Return an identity vector.
851 for (unsigned i = 0; i != NumElts; ++i)
852 Mask.push_back(i);
853 return std::make_pair(V, nullptr);
854}
855
856/// Look for chain of insertvalue's that fully define an aggregate, and trace
857/// back the values inserted, see if they are all were extractvalue'd from
858/// the same source aggregate from the exact same element indexes.
859/// If they were, just reuse the source aggregate.
860/// This potentially deals with PHI indirections.
862 InsertValueInst &OrigIVI) {
863 Type *AggTy = OrigIVI.getType();
864 unsigned NumAggElts;
865 switch (AggTy->getTypeID()) {
866 case Type::StructTyID:
867 NumAggElts = AggTy->getStructNumElements();
868 break;
869 case Type::ArrayTyID:
870 NumAggElts = AggTy->getArrayNumElements();
871 break;
872 default:
873 llvm_unreachable("Unhandled aggregate type?");
874 }
875
876 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
877 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
878 // FIXME: any interesting patterns to be caught with larger limit?
879 assert(NumAggElts > 0 && "Aggregate should have elements.");
880 if (NumAggElts > 2)
881 return nullptr;
882
883 static constexpr auto NotFound = std::nullopt;
884 static constexpr auto FoundMismatch = nullptr;
885
886 // Try to find a value of each element of an aggregate.
887 // FIXME: deal with more complex, not one-dimensional, aggregate types
888 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
889
890 // Do we know values for each element of the aggregate?
891 auto KnowAllElts = [&AggElts]() {
892 return !llvm::is_contained(AggElts, NotFound);
893 };
894
895 int Depth = 0;
896
897 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
898 // every element being overwritten twice, which should never happen.
899 static const int DepthLimit = 2 * NumAggElts;
900
901 // Recurse up the chain of `insertvalue` aggregate operands until either we've
902 // reconstructed full initializer or can't visit any more `insertvalue`'s.
903 for (InsertValueInst *CurrIVI = &OrigIVI;
904 Depth < DepthLimit && CurrIVI && !KnowAllElts();
905 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
906 ++Depth) {
907 auto *InsertedValue =
908 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
909 if (!InsertedValue)
910 return nullptr; // Inserted value must be produced by an instruction.
911
912 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
913
914 // Don't bother with more than single-level aggregates.
915 if (Indices.size() != 1)
916 return nullptr; // FIXME: deal with more complex aggregates?
917
918 // Now, we may have already previously recorded the value for this element
919 // of an aggregate. If we did, that means the CurrIVI will later be
920 // overwritten with the already-recorded value. But if not, let's record it!
921 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
922 Elt = Elt.value_or(InsertedValue);
923
924 // FIXME: should we handle chain-terminating undef base operand?
925 }
926
927 // Was that sufficient to deduce the full initializer for the aggregate?
928 if (!KnowAllElts())
929 return nullptr; // Give up then.
930
931 // We now want to find the source[s] of the aggregate elements we've found.
932 // And with "source" we mean the original aggregate[s] from which
933 // the inserted elements were extracted. This may require PHI translation.
934
935 enum class AggregateDescription {
936 /// When analyzing the value that was inserted into an aggregate, we did
937 /// not manage to find defining `extractvalue` instruction to analyze.
938 NotFound,
939 /// When analyzing the value that was inserted into an aggregate, we did
940 /// manage to find defining `extractvalue` instruction[s], and everything
941 /// matched perfectly - aggregate type, element insertion/extraction index.
942 Found,
943 /// When analyzing the value that was inserted into an aggregate, we did
944 /// manage to find defining `extractvalue` instruction, but there was
945 /// a mismatch: either the source type from which the extraction was didn't
946 /// match the aggregate type into which the insertion was,
947 /// or the extraction/insertion channels mismatched,
948 /// or different elements had different source aggregates.
949 FoundMismatch
950 };
951 auto Describe = [](std::optional<Value *> SourceAggregate) {
952 if (SourceAggregate == NotFound)
953 return AggregateDescription::NotFound;
954 if (*SourceAggregate == FoundMismatch)
955 return AggregateDescription::FoundMismatch;
956 return AggregateDescription::Found;
957 };
958
959 // Given the value \p Elt that was being inserted into element \p EltIdx of an
960 // aggregate AggTy, see if \p Elt was originally defined by an
961 // appropriate extractvalue (same element index, same aggregate type).
962 // If found, return the source aggregate from which the extraction was.
963 // If \p PredBB is provided, does PHI translation of an \p Elt first.
964 auto FindSourceAggregate =
965 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
966 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
967 // For now(?), only deal with, at most, a single level of PHI indirection.
968 if (UseBB && PredBB)
969 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
970 // FIXME: deal with multiple levels of PHI indirection?
971
972 // Did we find an extraction?
973 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
974 if (!EVI)
975 return NotFound;
976
977 Value *SourceAggregate = EVI->getAggregateOperand();
978
979 // Is the extraction from the same type into which the insertion was?
980 if (SourceAggregate->getType() != AggTy)
981 return FoundMismatch;
982 // And the element index doesn't change between extraction and insertion?
983 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
984 return FoundMismatch;
985
986 return SourceAggregate; // AggregateDescription::Found
987 };
988
989 // Given elements AggElts that were constructing an aggregate OrigIVI,
990 // see if we can find appropriate source aggregate for each of the elements,
991 // and see it's the same aggregate for each element. If so, return it.
992 auto FindCommonSourceAggregate =
993 [&](std::optional<BasicBlock *> UseBB,
994 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
995 std::optional<Value *> SourceAggregate;
996
997 for (auto I : enumerate(AggElts)) {
998 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
999 "We don't store nullptr in SourceAggregate!");
1000 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1001 (I.index() != 0) &&
1002 "SourceAggregate should be valid after the first element,");
1003
1004 // For this element, is there a plausible source aggregate?
1005 // FIXME: we could special-case undef element, IFF we know that in the
1006 // source aggregate said element isn't poison.
1007 std::optional<Value *> SourceAggregateForElement =
1008 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1009
1010 // Okay, what have we found? Does that correlate with previous findings?
1011
1012 // Regardless of whether or not we have previously found source
1013 // aggregate for previous elements (if any), if we didn't find one for
1014 // this element, passthrough whatever we have just found.
1015 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1016 return SourceAggregateForElement;
1017
1018 // Okay, we have found source aggregate for this element.
1019 // Let's see what we already know from previous elements, if any.
1020 switch (Describe(SourceAggregate)) {
1021 case AggregateDescription::NotFound:
1022 // This is apparently the first element that we have examined.
1023 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1024 continue; // Great, now look at next element.
1025 case AggregateDescription::Found:
1026 // We have previously already successfully examined other elements.
1027 // Is this the same source aggregate we've found for other elements?
1028 if (*SourceAggregateForElement != *SourceAggregate)
1029 return FoundMismatch;
1030 continue; // Still the same aggregate, look at next element.
1031 case AggregateDescription::FoundMismatch:
1032 llvm_unreachable("Can't happen. We would have early-exited then.");
1033 };
1034 }
1035
1036 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1037 "Must be a valid Value");
1038 return *SourceAggregate;
1039 };
1040
1041 std::optional<Value *> SourceAggregate;
1042
1043 // Can we find the source aggregate without looking at predecessors?
1044 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1045 /*PredBB=*/std::nullopt);
1046 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1047 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1048 return nullptr; // Conflicting source aggregates!
1049 ++NumAggregateReconstructionsSimplified;
1050 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1051 }
1052
1053 // Okay, apparently we need to look at predecessors.
1054
1055 // We should be smart about picking the "use" basic block, which will be the
1056 // merge point for aggregate, where we'll insert the final PHI that will be
1057 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1058 // We should look in which blocks each of the AggElts is being defined,
1059 // they all should be defined in the same basic block.
1060 BasicBlock *UseBB = nullptr;
1061
1062 for (const std::optional<Instruction *> &I : AggElts) {
1063 BasicBlock *BB = (*I)->getParent();
1064 // If it's the first instruction we've encountered, record the basic block.
1065 if (!UseBB) {
1066 UseBB = BB;
1067 continue;
1068 }
1069 // Otherwise, this must be the same basic block we've seen previously.
1070 if (UseBB != BB)
1071 return nullptr;
1072 }
1073
1074 // If *all* of the elements are basic-block-independent, meaning they are
1075 // either function arguments, or constant expressions, then if we didn't
1076 // handle them without predecessor-aware handling, we won't handle them now.
1077 if (!UseBB)
1078 return nullptr;
1079
1080 // If we didn't manage to find source aggregate without looking at
1081 // predecessors, and there are no predecessors to look at, then we're done.
1082 if (pred_empty(UseBB))
1083 return nullptr;
1084
1085 // Arbitrary predecessor count limit.
1086 static const int PredCountLimit = 64;
1087
1088 // Cache the (non-uniqified!) list of predecessors in a vector,
1089 // checking the limit at the same time for efficiency.
1090 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1091 for (BasicBlock *Pred : predecessors(UseBB)) {
1092 // Don't bother if there are too many predecessors.
1093 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1094 return nullptr;
1095 Preds.emplace_back(Pred);
1096 }
1097
1098 // For each predecessor, what is the source aggregate,
1099 // from which all the elements were originally extracted from?
1100 // Note that we want for the map to have stable iteration order!
1102 for (BasicBlock *Pred : Preds) {
1103 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1104 SourceAggregates.insert({Pred, nullptr});
1105 // Did we already evaluate this predecessor?
1106 if (!IV.second)
1107 continue;
1108
1109 // Let's hope that when coming from predecessor Pred, all elements of the
1110 // aggregate produced by OrigIVI must have been originally extracted from
1111 // the same aggregate. Is that so? Can we find said original aggregate?
1112 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1113 if (Describe(SourceAggregate) != AggregateDescription::Found)
1114 return nullptr; // Give up.
1115 IV.first->second = *SourceAggregate;
1116 }
1117
1118 // All good! Now we just need to thread the source aggregates here.
1119 // Note that we have to insert the new PHI here, ourselves, because we can't
1120 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1121 // Note that the same block can be a predecessor more than once,
1122 // and we need to preserve that invariant for the PHI node.
1123 BuilderTy::InsertPointGuard Guard(Builder);
1124 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1125 auto *PHI =
1126 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1127 for (BasicBlock *Pred : Preds)
1128 PHI->addIncoming(SourceAggregates[Pred], Pred);
1129
1130 ++NumAggregateReconstructionsSimplified;
1131 return replaceInstUsesWith(OrigIVI, PHI);
1132}
1133
1134/// Try to find redundant insertvalue instructions, like the following ones:
1135/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1136/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1137/// Here the second instruction inserts values at the same indices, as the
1138/// first one, making the first one redundant.
1139/// It should be transformed to:
1140/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1143 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1145 return replaceInstUsesWith(I, V);
1146
1147 bool IsRedundant = false;
1148 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1149
1150 // If there is a chain of insertvalue instructions (each of them except the
1151 // last one has only one use and it's another insertvalue insn from this
1152 // chain), check if any of the 'children' uses the same indices as the first
1153 // instruction. In this case, the first one is redundant.
1154 Value *V = &I;
1155 unsigned Depth = 0;
1156 while (V->hasOneUse() && Depth < 10) {
1157 User *U = V->user_back();
1158 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1159 if (!UserInsInst || U->getOperand(0) != V)
1160 break;
1161 if (UserInsInst->getIndices() == FirstIndices) {
1162 IsRedundant = true;
1163 break;
1164 }
1165 V = UserInsInst;
1166 Depth++;
1167 }
1168
1169 if (IsRedundant)
1170 return replaceInstUsesWith(I, I.getOperand(0));
1171
1173 return NewI;
1174
1175 return nullptr;
1176}
1177
1179 // Can not analyze scalable type, the number of elements is not a compile-time
1180 // constant.
1181 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1182 return false;
1183
1184 int MaskSize = Shuf.getShuffleMask().size();
1185 int VecSize =
1186 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1187
1188 // A vector select does not change the size of the operands.
1189 if (MaskSize != VecSize)
1190 return false;
1191
1192 // Each mask element must be undefined or choose a vector element from one of
1193 // the source operands without crossing vector lanes.
1194 for (int i = 0; i != MaskSize; ++i) {
1195 int Elt = Shuf.getMaskValue(i);
1196 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1197 return false;
1198 }
1199
1200 return true;
1201}
1202
1203/// Turn a chain of inserts that splats a value into an insert + shuffle:
1204/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1205/// shufflevector(insertelt(X, %k, 0), poison, zero)
1207 // We are interested in the last insert in a chain. So if this insert has a
1208 // single user and that user is an insert, bail.
1209 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1210 return nullptr;
1211
1212 VectorType *VecTy = InsElt.getType();
1213 // Can not handle scalable type, the number of elements is not a compile-time
1214 // constant.
1215 if (isa<ScalableVectorType>(VecTy))
1216 return nullptr;
1217 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1218
1219 // Do not try to do this for a one-element vector, since that's a nop,
1220 // and will cause an inf-loop.
1221 if (NumElements == 1)
1222 return nullptr;
1223
1224 Value *SplatVal = InsElt.getOperand(1);
1225 InsertElementInst *CurrIE = &InsElt;
1226 SmallBitVector ElementPresent(NumElements, false);
1227 InsertElementInst *FirstIE = nullptr;
1228
1229 // Walk the chain backwards, keeping track of which indices we inserted into,
1230 // until we hit something that isn't an insert of the splatted value.
1231 while (CurrIE) {
1232 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1233 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1234 return nullptr;
1235
1236 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1237 // Check none of the intermediate steps have any additional uses, except
1238 // for the root insertelement instruction, which can be re-used, if it
1239 // inserts at position 0.
1240 if (CurrIE != &InsElt &&
1241 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1242 return nullptr;
1243
1244 ElementPresent[Idx->getZExtValue()] = true;
1245 FirstIE = CurrIE;
1246 CurrIE = NextIE;
1247 }
1248
1249 // If this is just a single insertelement (not a sequence), we are done.
1250 if (FirstIE == &InsElt)
1251 return nullptr;
1252
1253 // If we are not inserting into a poison vector, make sure we've seen an
1254 // insert into every element.
1255 // TODO: If the base vector is not undef, it might be better to create a splat
1256 // and then a select-shuffle (blend) with the base vector.
1257 if (!match(FirstIE->getOperand(0), m_Poison()))
1258 if (!ElementPresent.all())
1259 return nullptr;
1260
1261 // Create the insert + shuffle.
1262 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1263 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1264 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1265 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1266 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1267 InsElt.getIterator());
1268
1269 // Splat from element 0, but replace absent elements with poison in the mask.
1270 SmallVector<int, 16> Mask(NumElements, 0);
1271 for (unsigned i = 0; i != NumElements; ++i)
1272 if (!ElementPresent[i])
1273 Mask[i] = -1;
1274
1275 return new ShuffleVectorInst(FirstIE, Mask);
1276}
1277
1278/// Try to fold an insert element into an existing splat shuffle by changing
1279/// the shuffle's mask to include the index of this insert element.
1281 // Check if the vector operand of this insert is a canonical splat shuffle.
1282 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1283 if (!Shuf || !Shuf->isZeroEltSplat())
1284 return nullptr;
1285
1286 // Bail out early if shuffle is scalable type. The number of elements in
1287 // shuffle mask is unknown at compile-time.
1288 if (isa<ScalableVectorType>(Shuf->getType()))
1289 return nullptr;
1290
1291 // Check for a constant insertion index.
1292 uint64_t IdxC;
1293 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1294 return nullptr;
1295
1296 // Check if the splat shuffle's input is the same as this insert's scalar op.
1297 Value *X = InsElt.getOperand(1);
1298 Value *Op0 = Shuf->getOperand(0);
1299 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1300 return nullptr;
1301
1302 // Replace the shuffle mask element at the index of this insert with a zero.
1303 // For example:
1304 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1305 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1306 unsigned NumMaskElts =
1307 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1308 SmallVector<int, 16> NewMask(NumMaskElts);
1309 for (unsigned i = 0; i != NumMaskElts; ++i)
1310 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1311
1312 return new ShuffleVectorInst(Op0, NewMask);
1313}
1314
1315/// Try to fold an extract+insert element into an existing identity shuffle by
1316/// changing the shuffle's mask to include the index of this insert element.
1318 // Check if the vector operand of this insert is an identity shuffle.
1319 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1320 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1321 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1322 return nullptr;
1323
1324 // Bail out early if shuffle is scalable type. The number of elements in
1325 // shuffle mask is unknown at compile-time.
1326 if (isa<ScalableVectorType>(Shuf->getType()))
1327 return nullptr;
1328
1329 // Check for a constant insertion index.
1330 uint64_t IdxC;
1331 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1332 return nullptr;
1333
1334 // Check if this insert's scalar op is extracted from the identity shuffle's
1335 // input vector.
1336 Value *Scalar = InsElt.getOperand(1);
1337 Value *X = Shuf->getOperand(0);
1338 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1339 return nullptr;
1340
1341 // Replace the shuffle mask element at the index of this extract+insert with
1342 // that same index value.
1343 // For example:
1344 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1345 unsigned NumMaskElts =
1346 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1347 SmallVector<int, 16> NewMask(NumMaskElts);
1348 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1349 for (unsigned i = 0; i != NumMaskElts; ++i) {
1350 if (i != IdxC) {
1351 // All mask elements besides the inserted element remain the same.
1352 NewMask[i] = OldMask[i];
1353 } else if (OldMask[i] == (int)IdxC) {
1354 // If the mask element was already set, there's nothing to do
1355 // (demanded elements analysis may unset it later).
1356 return nullptr;
1357 } else {
1358 assert(OldMask[i] == PoisonMaskElem &&
1359 "Unexpected shuffle mask element for identity shuffle");
1360 NewMask[i] = IdxC;
1361 }
1362 }
1363
1364 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1365}
1366
1367/// If we have an insertelement instruction feeding into another insertelement
1368/// and the 2nd is inserting a constant into the vector, canonicalize that
1369/// constant insertion before the insertion of a variable:
1370///
1371/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1372/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1373///
1374/// This has the potential of eliminating the 2nd insertelement instruction
1375/// via constant folding of the scalar constant into a vector constant.
1377 InstCombiner::BuilderTy &Builder) {
1378 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1379 if (!InsElt1 || !InsElt1->hasOneUse())
1380 return nullptr;
1381
1382 Value *X, *Y;
1383 Constant *ScalarC;
1384 ConstantInt *IdxC1, *IdxC2;
1385 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1386 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1387 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1388 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1389 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1390 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1391 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1392 }
1393
1394 return nullptr;
1395}
1396
1397/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1398/// --> shufflevector X, CVec', Mask'
1400 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1401 // Bail out if the parent has more than one use. In that case, we'd be
1402 // replacing the insertelt with a shuffle, and that's not a clear win.
1403 if (!Inst || !Inst->hasOneUse())
1404 return nullptr;
1405 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1406 // The shuffle must have a constant vector operand. The insertelt must have
1407 // a constant scalar being inserted at a constant position in the vector.
1408 Constant *ShufConstVec, *InsEltScalar;
1409 uint64_t InsEltIndex;
1410 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1411 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1412 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1413 return nullptr;
1414
1415 // Adding an element to an arbitrary shuffle could be expensive, but a
1416 // shuffle that selects elements from vectors without crossing lanes is
1417 // assumed cheap.
1418 // If we're just adding a constant into that shuffle, it will still be
1419 // cheap.
1420 if (!isShuffleEquivalentToSelect(*Shuf))
1421 return nullptr;
1422
1423 // From the above 'select' check, we know that the mask has the same number
1424 // of elements as the vector input operands. We also know that each constant
1425 // input element is used in its lane and can not be used more than once by
1426 // the shuffle. Therefore, replace the constant in the shuffle's constant
1427 // vector with the insertelt constant. Replace the constant in the shuffle's
1428 // mask vector with the insertelt index plus the length of the vector
1429 // (because the constant vector operand of a shuffle is always the 2nd
1430 // operand).
1431 ArrayRef<int> Mask = Shuf->getShuffleMask();
1432 unsigned NumElts = Mask.size();
1433 SmallVector<Constant *, 16> NewShufElts(NumElts);
1434 SmallVector<int, 16> NewMaskElts(NumElts);
1435 for (unsigned I = 0; I != NumElts; ++I) {
1436 if (I == InsEltIndex) {
1437 NewShufElts[I] = InsEltScalar;
1438 NewMaskElts[I] = InsEltIndex + NumElts;
1439 } else {
1440 // Copy over the existing values.
1441 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1442 NewMaskElts[I] = Mask[I];
1443 }
1444
1445 // Bail if we failed to find an element.
1446 if (!NewShufElts[I])
1447 return nullptr;
1448 }
1449
1450 // Create new operands for a shuffle that includes the constant of the
1451 // original insertelt. The old shuffle will be dead now.
1452 return new ShuffleVectorInst(Shuf->getOperand(0),
1453 ConstantVector::get(NewShufElts), NewMaskElts);
1454 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1455 // Transform sequences of insertelements ops with constant data/indexes into
1456 // a single shuffle op.
1457 // Can not handle scalable type, the number of elements needed to create
1458 // shuffle mask is not a compile-time constant.
1459 if (isa<ScalableVectorType>(InsElt.getType()))
1460 return nullptr;
1461 unsigned NumElts =
1462 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1463
1464 uint64_t InsertIdx[2];
1465 Constant *Val[2];
1466 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1467 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1468 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1469 !match(IEI->getOperand(1), m_Constant(Val[1])))
1470 return nullptr;
1471 SmallVector<Constant *, 16> Values(NumElts);
1472 SmallVector<int, 16> Mask(NumElts);
1473 auto ValI = std::begin(Val);
1474 // Generate new constant vector and mask.
1475 // We have 2 values/masks from the insertelements instructions. Insert them
1476 // into new value/mask vectors.
1477 for (uint64_t I : InsertIdx) {
1478 if (!Values[I]) {
1479 Values[I] = *ValI;
1480 Mask[I] = NumElts + I;
1481 }
1482 ++ValI;
1483 }
1484 // Remaining values are filled with 'poison' values.
1485 for (unsigned I = 0; I < NumElts; ++I) {
1486 if (!Values[I]) {
1487 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1488 Mask[I] = I;
1489 }
1490 }
1491 // Create new operands for a shuffle that includes the constant of the
1492 // original insertelt.
1493 return new ShuffleVectorInst(IEI->getOperand(0),
1494 ConstantVector::get(Values), Mask);
1495 }
1496 return nullptr;
1497}
1498
1499/// If both the base vector and the inserted element are extended from the same
1500/// type, do the insert element in the narrow source type followed by extend.
1501/// TODO: This can be extended to include other cast opcodes, but particularly
1502/// if we create a wider insertelement, make sure codegen is not harmed.
1504 InstCombiner::BuilderTy &Builder) {
1505 // We are creating a vector extend. If the original vector extend has another
1506 // use, that would mean we end up with 2 vector extends, so avoid that.
1507 // TODO: We could ease the use-clause to "if at least one op has one use"
1508 // (assuming that the source types match - see next TODO comment).
1509 Value *Vec = InsElt.getOperand(0);
1510 if (!Vec->hasOneUse())
1511 return nullptr;
1512
1513 Value *Scalar = InsElt.getOperand(1);
1514 Value *X, *Y;
1515 CastInst::CastOps CastOpcode;
1516 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1517 CastOpcode = Instruction::FPExt;
1518 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1519 CastOpcode = Instruction::SExt;
1520 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1521 CastOpcode = Instruction::ZExt;
1522 else
1523 return nullptr;
1524
1525 // TODO: We can allow mismatched types by creating an intermediate cast.
1526 if (X->getType()->getScalarType() != Y->getType())
1527 return nullptr;
1528
1529 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1530 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1531 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1532}
1533
1534/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1535/// try to convert to a single insert with appropriate bitcasts.
1537 bool IsBigEndian,
1538 InstCombiner::BuilderTy &Builder) {
1539 Value *VecOp = InsElt.getOperand(0);
1540 Value *ScalarOp = InsElt.getOperand(1);
1541 Value *IndexOp = InsElt.getOperand(2);
1542
1543 // Pattern depends on endian because we expect lower index is inserted first.
1544 // Big endian:
1545 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1546 // Little endian:
1547 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1548 // Note: It is not safe to do this transform with an arbitrary base vector
1549 // because the bitcast of that vector to fewer/larger elements could
1550 // allow poison to spill into an element that was not poison before.
1551 // TODO: Detect smaller fractions of the scalar.
1552 // TODO: One-use checks are conservative.
1553 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1554 Value *Scalar0, *BaseVec;
1555 uint64_t Index0, Index1;
1556 if (!VTy || (VTy->getNumElements() & 1) ||
1557 !match(IndexOp, m_ConstantInt(Index1)) ||
1558 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1559 m_ConstantInt(Index0))) ||
1560 !match(BaseVec, m_Undef()))
1561 return nullptr;
1562
1563 // The first insert must be to the index one less than this one, and
1564 // the first insert must be to an even index.
1565 if (Index0 + 1 != Index1 || Index0 & 1)
1566 return nullptr;
1567
1568 // For big endian, the high half of the value should be inserted first.
1569 // For little endian, the low half of the value should be inserted first.
1570 Value *X;
1571 uint64_t ShAmt;
1572 if (IsBigEndian) {
1573 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1574 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1575 return nullptr;
1576 } else {
1577 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1578 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1579 return nullptr;
1580 }
1581
1582 Type *SrcTy = X->getType();
1583 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1584 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1585 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1586 return nullptr;
1587
1588 // Bitcast the base vector to a vector type with the source element type.
1589 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1590 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1591
1592 // Scale the insert index for a vector with half as many elements.
1593 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1594 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1595 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1596 return new BitCastInst(NewInsert, VTy);
1597}
1598
1600 Value *VecOp = IE.getOperand(0);
1601 Value *ScalarOp = IE.getOperand(1);
1602 Value *IdxOp = IE.getOperand(2);
1603
1604 if (auto *V = simplifyInsertElementInst(
1605 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1606 return replaceInstUsesWith(IE, V);
1607
1608 // Canonicalize type of constant indices to i64 to simplify CSE
1609 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1610 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1611 return replaceOperand(IE, 2, NewIdx);
1612
1613 Value *BaseVec, *OtherScalar;
1614 uint64_t OtherIndexVal;
1615 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1616 m_Value(OtherScalar),
1617 m_ConstantInt(OtherIndexVal)))) &&
1618 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1619 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1620 return InsertElementInst::Create(NewIns, OtherScalar,
1621 Builder.getInt64(OtherIndexVal));
1622 }
1623 }
1624
1625 // If the scalar is bitcast and inserted into undef, do the insert in the
1626 // source type followed by bitcast.
1627 // TODO: Generalize for insert into any constant, not just undef?
1628 Value *ScalarSrc;
1629 if (match(VecOp, m_Undef()) &&
1630 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1631 (ScalarSrc->getType()->isIntegerTy() ||
1632 ScalarSrc->getType()->isFloatingPointTy())) {
1633 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1634 // bitcast (inselt undef, ScalarSrc, IdxOp)
1635 Type *ScalarTy = ScalarSrc->getType();
1636 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1637 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1638 : UndefValue::get(VecTy);
1639 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1640 return new BitCastInst(NewInsElt, IE.getType());
1641 }
1642
1643 // If the vector and scalar are both bitcast from the same element type, do
1644 // the insert in that source type followed by bitcast.
1645 Value *VecSrc;
1646 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1647 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1648 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1649 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1650 cast<VectorType>(VecSrc->getType())->getElementType() ==
1651 ScalarSrc->getType()) {
1652 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1653 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1654 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1655 return new BitCastInst(NewInsElt, IE.getType());
1656 }
1657
1658 // If the inserted element was extracted from some other fixed-length vector
1659 // and both indexes are valid constants, try to turn this into a shuffle.
1660 // Can not handle scalable vector type, the number of elements needed to
1661 // create shuffle mask is not a compile-time constant.
1662 uint64_t InsertedIdx, ExtractedIdx;
1663 Value *ExtVecOp;
1664 if (isa<FixedVectorType>(IE.getType()) &&
1665 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1666 match(ScalarOp,
1667 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1668 isa<FixedVectorType>(ExtVecOp->getType()) &&
1669 ExtractedIdx <
1670 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1671 // TODO: Looking at the user(s) to determine if this insert is a
1672 // fold-to-shuffle opportunity does not match the usual instcombine
1673 // constraints. We should decide if the transform is worthy based only
1674 // on this instruction and its operands, but that may not work currently.
1675 //
1676 // Here, we are trying to avoid creating shuffles before reaching
1677 // the end of a chain of extract-insert pairs. This is complicated because
1678 // we do not generally form arbitrary shuffle masks in instcombine
1679 // (because those may codegen poorly), but collectShuffleElements() does
1680 // exactly that.
1681 //
1682 // The rules for determining what is an acceptable target-independent
1683 // shuffle mask are fuzzy because they evolve based on the backend's
1684 // capabilities and real-world impact.
1685 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1686 if (!Insert.hasOneUse())
1687 return true;
1688 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1689 if (!InsertUser)
1690 return true;
1691 return false;
1692 };
1693
1694 // Try to form a shuffle from a chain of extract-insert ops.
1695 if (isShuffleRootCandidate(IE)) {
1696 bool Rerun = true;
1697 while (Rerun) {
1698 Rerun = false;
1699
1701 ShuffleOps LR =
1702 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1703
1704 // The proposed shuffle may be trivial, in which case we shouldn't
1705 // perform the combine.
1706 if (LR.first != &IE && LR.second != &IE) {
1707 // We now have a shuffle of LHS, RHS, Mask.
1708 if (LR.second == nullptr)
1709 LR.second = PoisonValue::get(LR.first->getType());
1710 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1711 }
1712 }
1713 }
1714 }
1715
1716 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1717 unsigned VWidth = VecTy->getNumElements();
1718 APInt PoisonElts(VWidth, 0);
1719 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1720 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1721 PoisonElts)) {
1722 if (V != &IE)
1723 return replaceInstUsesWith(IE, V);
1724 return &IE;
1725 }
1726 }
1727
1729 return Shuf;
1730
1731 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1732 return NewInsElt;
1733
1734 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1735 return Broadcast;
1736
1738 return Splat;
1739
1740 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1741 return IdentityShuf;
1742
1743 if (Instruction *Ext = narrowInsElt(IE, Builder))
1744 return Ext;
1745
1747 return Ext;
1748
1749 return nullptr;
1750}
1751
1752/// Return true if we can evaluate the specified expression tree if the vector
1753/// elements were shuffled in a different order.
1755 unsigned Depth = 5) {
1756 // We can always reorder the elements of a constant.
1757 if (isa<Constant>(V))
1758 return true;
1759
1760 // We won't reorder vector arguments. No IPO here.
1761 Instruction *I = dyn_cast<Instruction>(V);
1762 if (!I) return false;
1763
1764 // Two users may expect different orders of the elements. Don't try it.
1765 if (!I->hasOneUse())
1766 return false;
1767
1768 if (Depth == 0) return false;
1769
1770 switch (I->getOpcode()) {
1771 case Instruction::UDiv:
1772 case Instruction::SDiv:
1773 case Instruction::URem:
1774 case Instruction::SRem:
1775 // Propagating an undefined shuffle mask element to integer div/rem is not
1776 // allowed because those opcodes can create immediate undefined behavior
1777 // from an undefined element in an operand.
1778 if (llvm::is_contained(Mask, -1))
1779 return false;
1780 [[fallthrough]];
1781 case Instruction::Add:
1782 case Instruction::FAdd:
1783 case Instruction::Sub:
1784 case Instruction::FSub:
1785 case Instruction::Mul:
1786 case Instruction::FMul:
1787 case Instruction::FDiv:
1788 case Instruction::FRem:
1789 case Instruction::Shl:
1790 case Instruction::LShr:
1791 case Instruction::AShr:
1792 case Instruction::And:
1793 case Instruction::Or:
1794 case Instruction::Xor:
1795 case Instruction::ICmp:
1796 case Instruction::FCmp:
1797 case Instruction::Trunc:
1798 case Instruction::ZExt:
1799 case Instruction::SExt:
1800 case Instruction::FPToUI:
1801 case Instruction::FPToSI:
1802 case Instruction::UIToFP:
1803 case Instruction::SIToFP:
1804 case Instruction::FPTrunc:
1805 case Instruction::FPExt:
1806 case Instruction::GetElementPtr: {
1807 // Bail out if we would create longer vector ops. We could allow creating
1808 // longer vector ops, but that may result in more expensive codegen.
1809 Type *ITy = I->getType();
1810 if (ITy->isVectorTy() &&
1811 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1812 return false;
1813 for (Value *Operand : I->operands()) {
1814 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1815 return false;
1816 }
1817 return true;
1818 }
1819 case Instruction::InsertElement: {
1820 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1821 if (!CI) return false;
1822 int ElementNumber = CI->getLimitedValue();
1823
1824 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1825 // can't put an element into multiple indices.
1826 bool SeenOnce = false;
1827 for (int I : Mask) {
1828 if (I == ElementNumber) {
1829 if (SeenOnce)
1830 return false;
1831 SeenOnce = true;
1832 }
1833 }
1834 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1835 }
1836 }
1837 return false;
1838}
1839
1840/// Rebuild a new instruction just like 'I' but with the new operands given.
1841/// In the event of type mismatch, the type of the operands is correct.
1843 IRBuilderBase &Builder) {
1844 Builder.SetInsertPoint(I);
1845 switch (I->getOpcode()) {
1846 case Instruction::Add:
1847 case Instruction::FAdd:
1848 case Instruction::Sub:
1849 case Instruction::FSub:
1850 case Instruction::Mul:
1851 case Instruction::FMul:
1852 case Instruction::UDiv:
1853 case Instruction::SDiv:
1854 case Instruction::FDiv:
1855 case Instruction::URem:
1856 case Instruction::SRem:
1857 case Instruction::FRem:
1858 case Instruction::Shl:
1859 case Instruction::LShr:
1860 case Instruction::AShr:
1861 case Instruction::And:
1862 case Instruction::Or:
1863 case Instruction::Xor: {
1864 BinaryOperator *BO = cast<BinaryOperator>(I);
1865 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1866 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1867 NewOps[0], NewOps[1]);
1868 if (auto *NewI = dyn_cast<Instruction>(New)) {
1869 if (isa<OverflowingBinaryOperator>(BO)) {
1870 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1871 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1872 }
1873 if (isa<PossiblyExactOperator>(BO)) {
1874 NewI->setIsExact(BO->isExact());
1875 }
1876 if (isa<FPMathOperator>(BO))
1877 NewI->copyFastMathFlags(I);
1878 }
1879 return New;
1880 }
1881 case Instruction::ICmp:
1882 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1883 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1884 NewOps[1]);
1885 case Instruction::FCmp:
1886 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1887 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1888 NewOps[1]);
1889 case Instruction::Trunc:
1890 case Instruction::ZExt:
1891 case Instruction::SExt:
1892 case Instruction::FPToUI:
1893 case Instruction::FPToSI:
1894 case Instruction::UIToFP:
1895 case Instruction::SIToFP:
1896 case Instruction::FPTrunc:
1897 case Instruction::FPExt: {
1898 // It's possible that the mask has a different number of elements from
1899 // the original cast. We recompute the destination type to match the mask.
1900 Type *DestTy = VectorType::get(
1901 I->getType()->getScalarType(),
1902 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1903 assert(NewOps.size() == 1 && "cast with #ops != 1");
1904 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1905 DestTy);
1906 }
1907 case Instruction::GetElementPtr: {
1908 Value *Ptr = NewOps[0];
1909 ArrayRef<Value*> Idx = NewOps.slice(1);
1910 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1911 Ptr, Idx, "",
1912 cast<GEPOperator>(I)->isInBounds());
1913 }
1914 }
1915 llvm_unreachable("failed to rebuild vector instructions");
1916}
1917
1919 IRBuilderBase &Builder) {
1920 // Mask.size() does not need to be equal to the number of vector elements.
1921
1922 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1923 Type *EltTy = V->getType()->getScalarType();
1924
1925 if (isa<PoisonValue>(V))
1926 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
1927
1928 if (match(V, m_Undef()))
1929 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1930
1931 if (isa<ConstantAggregateZero>(V))
1932 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1933
1934 if (Constant *C = dyn_cast<Constant>(V))
1936 Mask);
1937
1938 Instruction *I = cast<Instruction>(V);
1939 switch (I->getOpcode()) {
1940 case Instruction::Add:
1941 case Instruction::FAdd:
1942 case Instruction::Sub:
1943 case Instruction::FSub:
1944 case Instruction::Mul:
1945 case Instruction::FMul:
1946 case Instruction::UDiv:
1947 case Instruction::SDiv:
1948 case Instruction::FDiv:
1949 case Instruction::URem:
1950 case Instruction::SRem:
1951 case Instruction::FRem:
1952 case Instruction::Shl:
1953 case Instruction::LShr:
1954 case Instruction::AShr:
1955 case Instruction::And:
1956 case Instruction::Or:
1957 case Instruction::Xor:
1958 case Instruction::ICmp:
1959 case Instruction::FCmp:
1960 case Instruction::Trunc:
1961 case Instruction::ZExt:
1962 case Instruction::SExt:
1963 case Instruction::FPToUI:
1964 case Instruction::FPToSI:
1965 case Instruction::UIToFP:
1966 case Instruction::SIToFP:
1967 case Instruction::FPTrunc:
1968 case Instruction::FPExt:
1969 case Instruction::Select:
1970 case Instruction::GetElementPtr: {
1972 bool NeedsRebuild =
1973 (Mask.size() !=
1974 cast<FixedVectorType>(I->getType())->getNumElements());
1975 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1976 Value *V;
1977 // Recursively call evaluateInDifferentElementOrder on vector arguments
1978 // as well. E.g. GetElementPtr may have scalar operands even if the
1979 // return value is a vector, so we need to examine the operand type.
1980 if (I->getOperand(i)->getType()->isVectorTy())
1981 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
1982 else
1983 V = I->getOperand(i);
1984 NewOps.push_back(V);
1985 NeedsRebuild |= (V != I->getOperand(i));
1986 }
1987 if (NeedsRebuild)
1988 return buildNew(I, NewOps, Builder);
1989 return I;
1990 }
1991 case Instruction::InsertElement: {
1992 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1993
1994 // The insertelement was inserting at Element. Figure out which element
1995 // that becomes after shuffling. The answer is guaranteed to be unique
1996 // by CanEvaluateShuffled.
1997 bool Found = false;
1998 int Index = 0;
1999 for (int e = Mask.size(); Index != e; ++Index) {
2000 if (Mask[Index] == Element) {
2001 Found = true;
2002 break;
2003 }
2004 }
2005
2006 // If element is not in Mask, no need to handle the operand 1 (element to
2007 // be inserted). Just evaluate values in operand 0 according to Mask.
2008 if (!Found)
2009 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2010
2011 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2012 Builder);
2013 Builder.SetInsertPoint(I);
2014 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2015 }
2016 }
2017 llvm_unreachable("failed to reorder elements of vector instruction!");
2018}
2019
2020// Returns true if the shuffle is extracting a contiguous range of values from
2021// LHS, for example:
2022// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2023// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2024// Shuffles to: |EE|FF|GG|HH|
2025// +--+--+--+--+
2027 ArrayRef<int> Mask) {
2028 unsigned LHSElems =
2029 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2030 unsigned MaskElems = Mask.size();
2031 unsigned BegIdx = Mask.front();
2032 unsigned EndIdx = Mask.back();
2033 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2034 return false;
2035 for (unsigned I = 0; I != MaskElems; ++I)
2036 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2037 return false;
2038 return true;
2039}
2040
2041/// These are the ingredients in an alternate form binary operator as described
2042/// below.
2048 Value *V0 = nullptr, Value *V1 = nullptr) :
2049 Opcode(Opc), Op0(V0), Op1(V1) {}
2050 operator bool() const { return Opcode != 0; }
2051};
2052
2053/// Binops may be transformed into binops with different opcodes and operands.
2054/// Reverse the usual canonicalization to enable folds with the non-canonical
2055/// form of the binop. If a transform is possible, return the elements of the
2056/// new binop. If not, return invalid elements.
2058 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2059 Type *Ty = BO->getType();
2060 switch (BO->getOpcode()) {
2061 case Instruction::Shl: {
2062 // shl X, C --> mul X, (1 << C)
2063 Constant *C;
2064 if (match(BO1, m_Constant(C))) {
2065 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
2066 return {Instruction::Mul, BO0, ShlOne};
2067 }
2068 break;
2069 }
2070 case Instruction::Or: {
2071 // or X, C --> add X, C (when X and C have no common bits set)
2072 const APInt *C;
2073 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
2074 return {Instruction::Add, BO0, BO1};
2075 break;
2076 }
2077 case Instruction::Sub:
2078 // sub 0, X --> mul X, -1
2079 if (match(BO0, m_ZeroInt()))
2080 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2081 break;
2082 default:
2083 break;
2084 }
2085 return {};
2086}
2087
2088/// A select shuffle of a select shuffle with a shared operand can be reduced
2089/// to a single select shuffle. This is an obvious improvement in IR, and the
2090/// backend is expected to lower select shuffles efficiently.
2092 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2093
2094 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2096 Shuf.getShuffleMask(Mask);
2097 unsigned NumElts = Mask.size();
2098
2099 // Canonicalize a select shuffle with common operand as Op1.
2100 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2101 if (ShufOp && ShufOp->isSelect() &&
2102 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2103 std::swap(Op0, Op1);
2105 }
2106
2107 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2108 if (!ShufOp || !ShufOp->isSelect() ||
2109 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2110 return nullptr;
2111
2112 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2114 ShufOp->getShuffleMask(Mask1);
2115 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2116
2117 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2118 if (Y == Op0) {
2119 std::swap(X, Y);
2121 }
2122
2123 // If the mask chooses from X (operand 0), it stays the same.
2124 // If the mask chooses from the earlier shuffle, the other mask value is
2125 // transferred to the combined select shuffle:
2126 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2127 SmallVector<int, 16> NewMask(NumElts);
2128 for (unsigned i = 0; i != NumElts; ++i)
2129 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2130
2131 // A select mask with undef elements might look like an identity mask.
2132 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2133 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2134 "Unexpected shuffle mask");
2135 return new ShuffleVectorInst(X, Y, NewMask);
2136}
2137
2139 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2140
2141 // Are we shuffling together some value and that same value after it has been
2142 // modified by a binop with a constant?
2143 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2144 Constant *C;
2145 bool Op0IsBinop;
2146 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2147 Op0IsBinop = true;
2148 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2149 Op0IsBinop = false;
2150 else
2151 return nullptr;
2152
2153 // The identity constant for a binop leaves a variable operand unchanged. For
2154 // a vector, this is a splat of something like 0, -1, or 1.
2155 // If there's no identity constant for this binop, we're done.
2156 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2157 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2158 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2159 if (!IdC)
2160 return nullptr;
2161
2162 // Shuffle identity constants into the lanes that return the original value.
2163 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2164 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2165 // The existing binop constant vector remains in the same operand position.
2166 ArrayRef<int> Mask = Shuf.getShuffleMask();
2167 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2169
2170 bool MightCreatePoisonOrUB =
2172 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2173 if (MightCreatePoisonOrUB)
2174 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2175
2176 // shuf (bop X, C), X, M --> bop X, C'
2177 // shuf X, (bop X, C), M --> bop X, C'
2178 Value *X = Op0IsBinop ? Op1 : Op0;
2179 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2180 NewBO->copyIRFlags(BO);
2181
2182 // An undef shuffle mask element may propagate as an undef constant element in
2183 // the new binop. That would produce poison where the original code might not.
2184 // If we already made a safe constant, then there's no danger.
2185 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2187 return NewBO;
2188}
2189
2190/// If we have an insert of a scalar to a non-zero element of an undefined
2191/// vector and then shuffle that value, that's the same as inserting to the zero
2192/// element and shuffling. Splatting from the zero element is recognized as the
2193/// canonical form of splat.
2195 InstCombiner::BuilderTy &Builder) {
2196 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2197 ArrayRef<int> Mask = Shuf.getShuffleMask();
2198 Value *X;
2199 uint64_t IndexC;
2200
2201 // Match a shuffle that is a splat to a non-zero element.
2202 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2203 m_ConstantInt(IndexC)))) ||
2204 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2205 return nullptr;
2206
2207 // Insert into element 0 of a poison vector.
2208 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2209 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2210
2211 // Splat from element 0. Any mask element that is undefined remains undefined.
2212 // For example:
2213 // shuf (inselt undef, X, 2), _, <2,2,undef>
2214 // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2215 unsigned NumMaskElts =
2216 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2217 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2218 for (unsigned i = 0; i != NumMaskElts; ++i)
2219 if (Mask[i] == PoisonMaskElem)
2220 NewMask[i] = Mask[i];
2221
2222 return new ShuffleVectorInst(NewIns, NewMask);
2223}
2224
2225/// Try to fold shuffles that are the equivalent of a vector select.
2227 if (!Shuf.isSelect())
2228 return nullptr;
2229
2230 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2231 // Commuting undef to operand 0 conflicts with another canonicalization.
2232 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2233 if (!match(Shuf.getOperand(1), m_Undef()) &&
2234 Shuf.getMaskValue(0) >= (int)NumElts) {
2235 // TODO: Can we assert that both operands of a shuffle-select are not undef
2236 // (otherwise, it would have been folded by instsimplify?
2237 Shuf.commute();
2238 return &Shuf;
2239 }
2240
2242 return I;
2243
2245 return I;
2246
2247 BinaryOperator *B0, *B1;
2248 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2249 !match(Shuf.getOperand(1), m_BinOp(B1)))
2250 return nullptr;
2251
2252 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2253 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2254 // with a multiply, we will exit because C0/C1 will not be set.
2255 Value *X, *Y;
2256 Constant *C0 = nullptr, *C1 = nullptr;
2257 bool ConstantsAreOp1;
2258 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2259 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2260 ConstantsAreOp1 = false;
2261 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2262 m_Neg(m_Value(X)))) &&
2264 m_Neg(m_Value(Y)))))
2265 ConstantsAreOp1 = true;
2266 else
2267 return nullptr;
2268
2269 // We need matching binops to fold the lanes together.
2272 bool DropNSW = false;
2273 if (ConstantsAreOp1 && Opc0 != Opc1) {
2274 // TODO: We drop "nsw" if shift is converted into multiply because it may
2275 // not be correct when the shift amount is BitWidth - 1. We could examine
2276 // each vector element to determine if it is safe to keep that flag.
2277 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2278 DropNSW = true;
2279 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2280 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2281 Opc0 = AltB0.Opcode;
2282 C0 = cast<Constant>(AltB0.Op1);
2283 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2284 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2285 Opc1 = AltB1.Opcode;
2286 C1 = cast<Constant>(AltB1.Op1);
2287 }
2288 }
2289
2290 if (Opc0 != Opc1 || !C0 || !C1)
2291 return nullptr;
2292
2293 // The opcodes must be the same. Use a new name to make that clear.
2294 BinaryOperator::BinaryOps BOpc = Opc0;
2295
2296 // Select the constant elements needed for the single binop.
2297 ArrayRef<int> Mask = Shuf.getShuffleMask();
2298 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2299
2300 // We are moving a binop after a shuffle. When a shuffle has an undefined
2301 // mask element, the result is undefined, but it is not poison or undefined
2302 // behavior. That is not necessarily true for div/rem/shift.
2303 bool MightCreatePoisonOrUB =
2306 if (MightCreatePoisonOrUB)
2308 ConstantsAreOp1);
2309
2310 Value *V;
2311 if (X == Y) {
2312 // Remove a binop and the shuffle by rearranging the constant:
2313 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2314 // shuffle (op C0, V), (op C1, V), M --> op C', V
2315 V = X;
2316 } else {
2317 // If there are 2 different variable operands, we must create a new shuffle
2318 // (select) first, so check uses to ensure that we don't end up with more
2319 // instructions than we started with.
2320 if (!B0->hasOneUse() && !B1->hasOneUse())
2321 return nullptr;
2322
2323 // If we use the original shuffle mask and op1 is *variable*, we would be
2324 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2325 // poison. We do not have to guard against UB when *constants* are op1
2326 // because safe constants guarantee that we do not overflow sdiv/srem (and
2327 // there's no danger for other opcodes).
2328 // TODO: To allow this case, create a new shuffle mask with no undefs.
2329 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2330 return nullptr;
2331
2332 // Note: In general, we do not create new shuffles in InstCombine because we
2333 // do not know if a target can lower an arbitrary shuffle optimally. In this
2334 // case, the shuffle uses the existing mask, so there is no additional risk.
2335
2336 // Select the variable vectors first, then perform the binop:
2337 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2338 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2339 V = Builder.CreateShuffleVector(X, Y, Mask);
2340 }
2341
2342 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2343 Builder.CreateBinOp(BOpc, NewC, V);
2344
2345 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2346 // 1. If we changed an opcode, poison conditions might have changed.
2347 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2348 // where the original code did not. But if we already made a safe constant,
2349 // then there's no danger.
2350 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2351 NewI->copyIRFlags(B0);
2352 NewI->andIRFlags(B1);
2353 if (DropNSW)
2354 NewI->setHasNoSignedWrap(false);
2355 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2356 NewI->dropPoisonGeneratingFlags();
2357 }
2358 return replaceInstUsesWith(Shuf, NewBO);
2359}
2360
2361/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2362/// Example (little endian):
2363/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2365 bool IsBigEndian) {
2366 // This must be a bitcasted shuffle of 1 vector integer operand.
2367 Type *DestType = Shuf.getType();
2368 Value *X;
2369 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2370 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2371 return nullptr;
2372
2373 // The source type must have the same number of elements as the shuffle,
2374 // and the source element type must be larger than the shuffle element type.
2375 Type *SrcType = X->getType();
2376 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2377 cast<FixedVectorType>(SrcType)->getNumElements() !=
2378 cast<FixedVectorType>(DestType)->getNumElements() ||
2379 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2380 return nullptr;
2381
2382 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2383 "Expected a shuffle that decreases length");
2384
2385 // Last, check that the mask chooses the correct low bits for each narrow
2386 // element in the result.
2387 uint64_t TruncRatio =
2388 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2389 ArrayRef<int> Mask = Shuf.getShuffleMask();
2390 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2391 if (Mask[i] == PoisonMaskElem)
2392 continue;
2393 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2394 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2395 if (Mask[i] != (int)LSBIndex)
2396 return nullptr;
2397 }
2398
2399 return new TruncInst(X, DestType);
2400}
2401
2402/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2403/// narrowing (concatenating with undef and extracting back to the original
2404/// length). This allows replacing the wide select with a narrow select.
2406 InstCombiner::BuilderTy &Builder) {
2407 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2408 // of the 1st vector operand of a shuffle.
2409 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2410 return nullptr;
2411
2412 // The vector being shuffled must be a vector select that we can eliminate.
2413 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2414 Value *Cond, *X, *Y;
2415 if (!match(Shuf.getOperand(0),
2417 return nullptr;
2418
2419 // We need a narrow condition value. It must be extended with undef elements
2420 // and have the same number of elements as this shuffle.
2421 unsigned NarrowNumElts =
2422 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2423 Value *NarrowCond;
2424 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2425 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2426 NarrowNumElts ||
2427 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2428 return nullptr;
2429
2430 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2431 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2432 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2433 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2434 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2435}
2436
2437/// Canonicalize FP negate/abs after shuffle.
2439 InstCombiner::BuilderTy &Builder) {
2440 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2441 Value *X;
2442 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2443 return nullptr;
2444
2445 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2446
2447 // Match 1-input (unary) shuffle.
2448 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2449 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2450 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2451 if (IsFNeg)
2452 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2453
2455 Intrinsic::fabs, Shuf.getType());
2456 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2457 NewF->setFastMathFlags(S0->getFastMathFlags());
2458 return NewF;
2459 }
2460
2461 // Match 2-input (binary) shuffle.
2462 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2463 Value *Y;
2464 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2465 S0->getOpcode() != S1->getOpcode() ||
2466 (!S0->hasOneUse() && !S1->hasOneUse()))
2467 return nullptr;
2468
2469 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2470 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2471 Instruction *NewF;
2472 if (IsFNeg) {
2473 NewF = UnaryOperator::CreateFNeg(NewShuf);
2474 } else {
2476 Intrinsic::fabs, Shuf.getType());
2477 NewF = CallInst::Create(FAbs, {NewShuf});
2478 }
2479 NewF->copyIRFlags(S0);
2480 NewF->andIRFlags(S1);
2481 return NewF;
2482}
2483
2484/// Canonicalize casts after shuffle.
2486 InstCombiner::BuilderTy &Builder) {
2487 // Do we have 2 matching cast operands?
2488 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2489 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2490 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2491 Cast0->getSrcTy() != Cast1->getSrcTy())
2492 return nullptr;
2493
2494 // TODO: Allow other opcodes? That would require easing the type restrictions
2495 // below here.
2496 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2497 switch (CastOpcode) {
2498 case Instruction::FPToSI:
2499 case Instruction::FPToUI:
2500 case Instruction::SIToFP:
2501 case Instruction::UIToFP:
2502 break;
2503 default:
2504 return nullptr;
2505 }
2506
2507 VectorType *ShufTy = Shuf.getType();
2508 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2509 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2510
2511 // TODO: Allow length-increasing shuffles?
2512 if (ShufTy->getElementCount().getKnownMinValue() >
2513 ShufOpTy->getElementCount().getKnownMinValue())
2514 return nullptr;
2515
2516 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2517 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2518 "Expected fixed vector operands for casts and binary shuffle");
2519 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2520 return nullptr;
2521
2522 // At least one of the operands must have only one use (the shuffle).
2523 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2524 return nullptr;
2525
2526 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2527 Value *X = Cast0->getOperand(0);
2528 Value *Y = Cast1->getOperand(0);
2529 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2530 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2531}
2532
2533/// Try to fold an extract subvector operation.
2535 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2536 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2537 return nullptr;
2538
2539 // Check if we are extracting all bits of an inserted scalar:
2540 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2541 Value *X;
2542 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2543 X->getType()->getPrimitiveSizeInBits() ==
2545 return new BitCastInst(X, Shuf.getType());
2546
2547 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2548 Value *Y;
2549 ArrayRef<int> Mask;
2550 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2551 return nullptr;
2552
2553 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2554 // then combining may result in worse codegen.
2555 if (!Op0->hasOneUse())
2556 return nullptr;
2557
2558 // We are extracting a subvector from a shuffle. Remove excess elements from
2559 // the 1st shuffle mask to eliminate the extract.
2560 //
2561 // This transform is conservatively limited to identity extracts because we do
2562 // not allow arbitrary shuffle mask creation as a target-independent transform
2563 // (because we can't guarantee that will lower efficiently).
2564 //
2565 // If the extracting shuffle has an undef mask element, it transfers to the
2566 // new shuffle mask. Otherwise, copy the original mask element. Example:
2567 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2568 // shuf X, Y, <C0, undef, C2, undef>
2569 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2570 SmallVector<int, 16> NewMask(NumElts);
2571 assert(NumElts < Mask.size() &&
2572 "Identity with extract must have less elements than its inputs");
2573
2574 for (unsigned i = 0; i != NumElts; ++i) {
2575 int ExtractMaskElt = Shuf.getMaskValue(i);
2576 int MaskElt = Mask[i];
2577 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2578 }
2579 return new ShuffleVectorInst(X, Y, NewMask);
2580}
2581
2582/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2583/// operand with the operand of an insertelement.
2585 InstCombinerImpl &IC) {
2586 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2588 Shuf.getShuffleMask(Mask);
2589
2590 int NumElts = Mask.size();
2591 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2592
2593 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2594 // not be able to handle it there if the insertelement has >1 use.
2595 // If the shuffle has an insertelement operand but does not choose the
2596 // inserted scalar element from that value, then we can replace that shuffle
2597 // operand with the source vector of the insertelement.
2598 Value *X;
2599 uint64_t IdxC;
2600 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2601 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2602 if (!is_contained(Mask, (int)IdxC))
2603 return IC.replaceOperand(Shuf, 0, X);
2604 }
2605 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2606 // Offset the index constant by the vector width because we are checking for
2607 // accesses to the 2nd vector input of the shuffle.
2608 IdxC += InpNumElts;
2609 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2610 if (!is_contained(Mask, (int)IdxC))
2611 return IC.replaceOperand(Shuf, 1, X);
2612 }
2613 // For the rest of the transform, the shuffle must not change vector sizes.
2614 // TODO: This restriction could be removed if the insert has only one use
2615 // (because the transform would require a new length-changing shuffle).
2616 if (NumElts != InpNumElts)
2617 return nullptr;
2618
2619 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2620 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2621 // We need an insertelement with a constant index.
2622 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2623 m_ConstantInt(IndexC))))
2624 return false;
2625
2626 // Test the shuffle mask to see if it splices the inserted scalar into the
2627 // operand 1 vector of the shuffle.
2628 int NewInsIndex = -1;
2629 for (int i = 0; i != NumElts; ++i) {
2630 // Ignore undef mask elements.
2631 if (Mask[i] == -1)
2632 continue;
2633
2634 // The shuffle takes elements of operand 1 without lane changes.
2635 if (Mask[i] == NumElts + i)
2636 continue;
2637
2638 // The shuffle must choose the inserted scalar exactly once.
2639 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2640 return false;
2641
2642 // The shuffle is placing the inserted scalar into element i.
2643 NewInsIndex = i;
2644 }
2645
2646 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2647
2648 // Index is updated to the potentially translated insertion lane.
2649 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2650 return true;
2651 };
2652
2653 // If the shuffle is unnecessary, insert the scalar operand directly into
2654 // operand 1 of the shuffle. Example:
2655 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2656 Value *Scalar;
2657 ConstantInt *IndexC;
2658 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2659 return InsertElementInst::Create(V1, Scalar, IndexC);
2660
2661 // Try again after commuting shuffle. Example:
2662 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2663 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2664 std::swap(V0, V1);
2666 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2667 return InsertElementInst::Create(V1, Scalar, IndexC);
2668
2669 return nullptr;
2670}
2671
2673 // Match the operands as identity with padding (also known as concatenation
2674 // with undef) shuffles of the same source type. The backend is expected to
2675 // recreate these concatenations from a shuffle of narrow operands.
2676 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2677 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2678 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2679 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2680 return nullptr;
2681
2682 // We limit this transform to power-of-2 types because we expect that the
2683 // backend can convert the simplified IR patterns to identical nodes as the
2684 // original IR.
2685 // TODO: If we can verify the same behavior for arbitrary types, the
2686 // power-of-2 checks can be removed.
2687 Value *X = Shuffle0->getOperand(0);
2688 Value *Y = Shuffle1->getOperand(0);
2689 if (X->getType() != Y->getType() ||
2690 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2692 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2693 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2694 match(X, m_Undef()) || match(Y, m_Undef()))
2695 return nullptr;
2696 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2697 match(Shuffle1->getOperand(1), m_Undef()) &&
2698 "Unexpected operand for identity shuffle");
2699
2700 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2701 // operands directly by adjusting the shuffle mask to account for the narrower
2702 // types:
2703 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2704 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2705 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2706 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2707
2708 ArrayRef<int> Mask = Shuf.getShuffleMask();
2709 SmallVector<int, 16> NewMask(Mask.size(), -1);
2710 for (int i = 0, e = Mask.size(); i != e; ++i) {
2711 if (Mask[i] == -1)
2712 continue;
2713
2714 // If this shuffle is choosing an undef element from 1 of the sources, that
2715 // element is undef.
2716 if (Mask[i] < WideElts) {
2717 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2718 continue;
2719 } else {
2720 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2721 continue;
2722 }
2723
2724 // If this shuffle is choosing from the 1st narrow op, the mask element is
2725 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2726 // element is offset down to adjust for the narrow vector widths.
2727 if (Mask[i] < WideElts) {
2728 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2729 NewMask[i] = Mask[i];
2730 } else {
2731 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2732 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2733 }
2734 }
2735 return new ShuffleVectorInst(X, Y, NewMask);
2736}
2737
2738// Splatting the first element of the result of a BinOp, where any of the
2739// BinOp's operands are the result of a first element splat can be simplified to
2740// splatting the first element of the result of the BinOp
2742 if (!match(SVI.getOperand(1), m_Undef()) ||
2743 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2744 !SVI.getOperand(0)->hasOneUse())
2745 return nullptr;
2746
2747 Value *Op0 = SVI.getOperand(0);
2748 Value *X, *Y;
2749 if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2750 m_Value(Y))) &&
2751 !match(Op0, m_BinOp(m_Value(X),
2753 return nullptr;
2754 if (X->getType() != Y->getType())
2755 return nullptr;
2756
2757 auto *BinOp = cast<BinaryOperator>(Op0);
2758 if (!isSafeToSpeculativelyExecute(BinOp))
2759 return nullptr;
2760
2761 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2762 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2763 NewBOI->copyIRFlags(BinOp);
2764
2765 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2766}
2767
2769 Value *LHS = SVI.getOperand(0);
2770 Value *RHS = SVI.getOperand(1);
2771 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2772 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2773 SVI.getType(), ShufQuery))
2774 return replaceInstUsesWith(SVI, V);
2775
2776 if (Instruction *I = simplifyBinOpSplats(SVI))
2777 return I;
2778
2779 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2780 // order to support scalable vectors.
2781 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2782 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2783
2784 if (isa<ScalableVectorType>(LHS->getType()))
2785 return nullptr;
2786
2787 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2788 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2789
2790 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2791 //
2792 // if X and Y are of the same (vector) type, and the element size is not
2793 // changed by the bitcasts, we can distribute the bitcasts through the
2794 // shuffle, hopefully reducing the number of instructions. We make sure that
2795 // at least one bitcast only has one use, so we don't *increase* the number of
2796 // instructions here.
2797 Value *X, *Y;
2799 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2800 X->getType()->getScalarSizeInBits() ==
2801 SVI.getType()->getScalarSizeInBits() &&
2802 (LHS->hasOneUse() || RHS->hasOneUse())) {
2804 SVI.getName() + ".uncasted");
2805 return new BitCastInst(V, SVI.getType());
2806 }
2807
2808 ArrayRef<int> Mask = SVI.getShuffleMask();
2809
2810 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2811 // simulated shuffle can simplify, then this shuffle is unnecessary:
2812 // shuf (bitcast X), undef, Mask --> bitcast X'
2813 // TODO: This could be extended to allow length-changing shuffles.
2814 // The transform might also be obsoleted if we allowed canonicalization
2815 // of bitcasted shuffles.
2816 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2817 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2818 // Try to create a scaled mask constant.
2819 auto *XType = cast<FixedVectorType>(X->getType());
2820 unsigned XNumElts = XType->getNumElements();
2821 SmallVector<int, 16> ScaledMask;
2822 if (XNumElts >= VWidth) {
2823 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2824 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2825 } else {
2826 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2827 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2828 ScaledMask.clear();
2829 }
2830 if (!ScaledMask.empty()) {
2831 // If the shuffled source vector simplifies, cast that value to this
2832 // shuffle's type.
2833 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2834 ScaledMask, XType, ShufQuery))
2835 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2836 }
2837 }
2838
2839 // shuffle x, x, mask --> shuffle x, undef, mask'
2840 if (LHS == RHS) {
2841 assert(!match(RHS, m_Undef()) &&
2842 "Shuffle with 2 undef ops not simplified?");
2843 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2844 }
2845
2846 // shuffle undef, x, mask --> shuffle x, undef, mask'
2847 if (match(LHS, m_Undef())) {
2848 SVI.commute();
2849 return &SVI;
2850 }
2851
2853 return I;
2854
2855 if (Instruction *I = foldSelectShuffle(SVI))
2856 return I;
2857
2859 return I;
2860
2862 return I;
2863
2865 return I;
2866
2867 if (Instruction *I = foldCastShuffle(SVI, Builder))
2868 return I;
2869
2870 APInt PoisonElts(VWidth, 0);
2871 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2872 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2873 if (V != &SVI)
2874 return replaceInstUsesWith(SVI, V);
2875 return &SVI;
2876 }
2877
2879 return I;
2880
2881 // These transforms have the potential to lose undef knowledge, so they are
2882 // intentionally placed after SimplifyDemandedVectorElts().
2883 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2884 return I;
2886 return I;
2887
2888 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2890 return replaceInstUsesWith(SVI, V);
2891 }
2892
2893 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2894 // a non-vector type. We can instead bitcast the original vector followed by
2895 // an extract of the desired element:
2896 //
2897 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2898 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2899 // %1 = bitcast <4 x i8> %sroa to i32
2900 // Becomes:
2901 // %bc = bitcast <16 x i8> %in to <4 x i32>
2902 // %ext = extractelement <4 x i32> %bc, i32 0
2903 //
2904 // If the shuffle is extracting a contiguous range of values from the input
2905 // vector then each use which is a bitcast of the extracted size can be
2906 // replaced. This will work if the vector types are compatible, and the begin
2907 // index is aligned to a value in the casted vector type. If the begin index
2908 // isn't aligned then we can shuffle the original vector (keeping the same
2909 // vector type) before extracting.
2910 //
2911 // This code will bail out if the target type is fundamentally incompatible
2912 // with vectors of the source type.
2913 //
2914 // Example of <16 x i8>, target type i32:
2915 // Index range [4,8): v-----------v Will work.
2916 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2917 // <16 x i8>: | | | | | | | | | | | | | | | | |
2918 // <4 x i32>: | | | | |
2919 // +-----------+-----------+-----------+-----------+
2920 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2921 // Target type i40: ^--------------^ Won't work, bail.
2922 bool MadeChange = false;
2923 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2924 Value *V = LHS;
2925 unsigned MaskElems = Mask.size();
2926 auto *SrcTy = cast<FixedVectorType>(V->getType());
2927 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2928 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2929 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2930 unsigned SrcNumElems = SrcTy->getNumElements();
2933 for (User *U : SVI.users())
2934 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2935 if (!BC->use_empty())
2936 // Only visit bitcasts that weren't previously handled.
2937 BCs.push_back(BC);
2938 for (BitCastInst *BC : BCs) {
2939 unsigned BegIdx = Mask.front();
2940 Type *TgtTy = BC->getDestTy();
2941 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2942 if (!TgtElemBitWidth)
2943 continue;
2944 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2945 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2946 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2947 if (!VecBitWidthsEqual)
2948 continue;
2950 continue;
2951 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2952 if (!BegIsAligned) {
2953 // Shuffle the input so [0,NumElements) contains the output, and
2954 // [NumElems,SrcNumElems) is undef.
2955 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2956 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2957 ShuffleMask[I] = Idx;
2958 V = Builder.CreateShuffleVector(V, ShuffleMask,
2959 SVI.getName() + ".extract");
2960 BegIdx = 0;
2961 }
2962 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2963 assert(SrcElemsPerTgtElem);
2964 BegIdx /= SrcElemsPerTgtElem;
2965 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2966 auto *NewBC =
2967 BCAlreadyExists
2968 ? NewBCs[CastSrcTy]
2969 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2970 if (!BCAlreadyExists)
2971 NewBCs[CastSrcTy] = NewBC;
2972 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
2973 SVI.getName() + ".extract");
2974 // The shufflevector isn't being replaced: the bitcast that used it
2975 // is. InstCombine will visit the newly-created instructions.
2976 replaceInstUsesWith(*BC, Ext);
2977 MadeChange = true;
2978 }
2979 }
2980
2981 // If the LHS is a shufflevector itself, see if we can combine it with this
2982 // one without producing an unusual shuffle.
2983 // Cases that might be simplified:
2984 // 1.
2985 // x1=shuffle(v1,v2,mask1)
2986 // x=shuffle(x1,undef,mask)
2987 // ==>
2988 // x=shuffle(v1,undef,newMask)
2989 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2990 // 2.
2991 // x1=shuffle(v1,undef,mask1)
2992 // x=shuffle(x1,x2,mask)
2993 // where v1.size() == mask1.size()
2994 // ==>
2995 // x=shuffle(v1,x2,newMask)
2996 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2997 // 3.
2998 // x2=shuffle(v2,undef,mask2)
2999 // x=shuffle(x1,x2,mask)
3000 // where v2.size() == mask2.size()
3001 // ==>
3002 // x=shuffle(x1,v2,newMask)
3003 // newMask[i] = (mask[i] < x1.size())
3004 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3005 // 4.
3006 // x1=shuffle(v1,undef,mask1)
3007 // x2=shuffle(v2,undef,mask2)
3008 // x=shuffle(x1,x2,mask)
3009 // where v1.size() == v2.size()
3010 // ==>
3011 // x=shuffle(v1,v2,newMask)
3012 // newMask[i] = (mask[i] < x1.size())
3013 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3014 //
3015 // Here we are really conservative:
3016 // we are absolutely afraid of producing a shuffle mask not in the input
3017 // program, because the code gen may not be smart enough to turn a merged
3018 // shuffle into two specific shuffles: it may produce worse code. As such,
3019 // we only merge two shuffles if the result is either a splat or one of the
3020 // input shuffle masks. In this case, merging the shuffles just removes
3021 // one instruction, which we know is safe. This is good for things like
3022 // turning: (splat(splat)) -> splat, or
3023 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3024 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3025 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3026 if (LHSShuffle)
3027 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3028 !match(RHS, m_Poison()))
3029 LHSShuffle = nullptr;
3030 if (RHSShuffle)
3031 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3032 RHSShuffle = nullptr;
3033 if (!LHSShuffle && !RHSShuffle)
3034 return MadeChange ? &SVI : nullptr;
3035
3036 Value* LHSOp0 = nullptr;
3037 Value* LHSOp1 = nullptr;
3038 Value* RHSOp0 = nullptr;
3039 unsigned LHSOp0Width = 0;
3040 unsigned RHSOp0Width = 0;
3041 if (LHSShuffle) {
3042 LHSOp0 = LHSShuffle->getOperand(0);
3043 LHSOp1 = LHSShuffle->getOperand(1);
3044 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3045 }
3046 if (RHSShuffle) {
3047 RHSOp0 = RHSShuffle->getOperand(0);
3048 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3049 }
3050 Value* newLHS = LHS;
3051 Value* newRHS = RHS;
3052 if (LHSShuffle) {
3053 // case 1
3054 if (match(RHS, m_Poison())) {
3055 newLHS = LHSOp0;
3056 newRHS = LHSOp1;
3057 }
3058 // case 2 or 4
3059 else if (LHSOp0Width == LHSWidth) {
3060 newLHS = LHSOp0;
3061 }
3062 }
3063 // case 3 or 4
3064 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3065 newRHS = RHSOp0;
3066 }
3067 // case 4
3068 if (LHSOp0 == RHSOp0) {
3069 newLHS = LHSOp0;
3070 newRHS = nullptr;
3071 }
3072
3073 if (newLHS == LHS && newRHS == RHS)
3074 return MadeChange ? &SVI : nullptr;
3075
3076 ArrayRef<int> LHSMask;
3077 ArrayRef<int> RHSMask;
3078 if (newLHS != LHS)
3079 LHSMask = LHSShuffle->getShuffleMask();
3080 if (RHSShuffle && newRHS != RHS)
3081 RHSMask = RHSShuffle->getShuffleMask();
3082
3083 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3084 SmallVector<int, 16> newMask;
3085 bool isSplat = true;
3086 int SplatElt = -1;
3087 // Create a new mask for the new ShuffleVectorInst so that the new
3088 // ShuffleVectorInst is equivalent to the original one.
3089 for (unsigned i = 0; i < VWidth; ++i) {
3090 int eltMask;
3091 if (Mask[i] < 0) {
3092 // This element is a poison value.
3093 eltMask = -1;
3094 } else if (Mask[i] < (int)LHSWidth) {
3095 // This element is from left hand side vector operand.
3096 //
3097 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3098 // new mask value for the element.
3099 if (newLHS != LHS) {
3100 eltMask = LHSMask[Mask[i]];
3101 // If the value selected is an poison value, explicitly specify it
3102 // with a -1 mask value.
3103 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3104 eltMask = -1;
3105 } else
3106 eltMask = Mask[i];
3107 } else {
3108 // This element is from right hand side vector operand
3109 //
3110 // If the value selected is a poison value, explicitly specify it
3111 // with a -1 mask value. (case 1)
3112 if (match(RHS, m_Poison()))
3113 eltMask = -1;
3114 // If RHS is going to be replaced (case 3 or 4), calculate the
3115 // new mask value for the element.
3116 else if (newRHS != RHS) {
3117 eltMask = RHSMask[Mask[i]-LHSWidth];
3118 // If the value selected is an poison value, explicitly specify it
3119 // with a -1 mask value.
3120 if (eltMask >= (int)RHSOp0Width) {
3121 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3122 "should have been check above");
3123 eltMask = -1;
3124 }
3125 } else
3126 eltMask = Mask[i]-LHSWidth;
3127
3128 // If LHS's width is changed, shift the mask value accordingly.
3129 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3130 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3131 // If newRHS == newLHS, we want to remap any references from newRHS to
3132 // newLHS so that we can properly identify splats that may occur due to
3133 // obfuscation across the two vectors.
3134 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3135 eltMask += newLHSWidth;
3136 }
3137
3138 // Check if this could still be a splat.
3139 if (eltMask >= 0) {
3140 if (SplatElt >= 0 && SplatElt != eltMask)
3141 isSplat = false;
3142 SplatElt = eltMask;
3143 }
3144
3145 newMask.push_back(eltMask);
3146 }
3147
3148 // If the result mask is equal to one of the original shuffle masks,
3149 // or is a splat, do the replacement.
3150 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3151 if (!newRHS)
3152 newRHS = PoisonValue::get(newLHS->getType());
3153 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3154 }
3155
3156 return MadeChange ? &SVI : nullptr;
3157}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:491
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:297
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a compare instruction, given the opcode, the predicate and the two operands.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2499
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2562
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2598
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:254
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:147
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2344
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:485
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2144
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:76
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:385
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:374
const DataLayout & DL
Definition: InstCombiner.h:75
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:335
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:409
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:283
BuilderTy & Builder
Definition: InstCombiner.h:60
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:151
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:148
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
bool isShift() const
Definition: Instruction.h:258
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:257
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1398
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:173
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:189
UnaryOps getOpcode() const
Definition: InstrTypes.h:203
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1066
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:155
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:903
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:160
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:561
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &DL, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96