LLVM 19.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
177 addToWorklist(E);
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if (!ShiftAmountC ||
209 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210 Ext.getVectorOperand()->hasOneUse())) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 if (IndexC) {
423 // Canonicalize type of constant indices to i64 to simplify CSE
424 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
425 return replaceOperand(EI, 1, NewIdx);
426
428 unsigned NumElts = EC.getKnownMinValue();
429
430 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
431 Intrinsic::ID IID = II->getIntrinsicID();
432 // Index needs to be lower than the minimum size of the vector, because
433 // for scalable vector, the vector size is known at run time.
434 if (IID == Intrinsic::experimental_stepvector &&
435 IndexC->getValue().ult(NumElts)) {
436 Type *Ty = EI.getType();
437 unsigned BitWidth = Ty->getIntegerBitWidth();
438 Value *Idx;
439 // Return index when its value does not exceed the allowed limit
440 // for the element type of the vector, otherwise return undefined.
441 if (IndexC->getValue().getActiveBits() <= BitWidth)
442 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
443 else
444 Idx = PoisonValue::get(Ty);
445 return replaceInstUsesWith(EI, Idx);
446 }
447 }
448
449 // InstSimplify should handle cases where the index is invalid.
450 // For fixed-length vector, it's invalid to extract out-of-range element.
451 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
452 return nullptr;
453
454 if (Instruction *I = foldBitcastExtElt(EI))
455 return I;
456
457 // If there's a vector PHI feeding a scalar use through this extractelement
458 // instruction, try to scalarize the PHI.
459 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
460 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
461 return ScalarPHI;
462 }
463
464 // TODO come up with a n-ary matcher that subsumes both unary and
465 // binary matchers.
466 UnaryOperator *UO;
467 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
468 // extelt (unop X), Index --> unop (extelt X, Index)
469 Value *X = UO->getOperand(0);
472 }
473
474 BinaryOperator *BO;
475 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
477 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
480 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
481 }
482
483 Value *X, *Y;
485 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
486 cheapToScalarize(SrcVec, Index)) {
487 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
490 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
491 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
492 SrcCmpInst);
493 }
494
495 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
496 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
497 // instsimplify already handled the case where the indices are constants
498 // and equal by value, if both are constants, they must not be the same
499 // value, extract from the pre-inserted value instead.
500 if (isa<Constant>(IE->getOperand(2)) && IndexC)
501 return replaceOperand(EI, 0, IE->getOperand(0));
502 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
503 auto *VecType = cast<VectorType>(GEP->getType());
504 ElementCount EC = VecType->getElementCount();
505 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
506 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
507 // Find out why we have a vector result - these are a few examples:
508 // 1. We have a scalar pointer and a vector of indices, or
509 // 2. We have a vector of pointers and a scalar index, or
510 // 3. We have a vector of pointers and a vector of indices, etc.
511 // Here we only consider combining when there is exactly one vector
512 // operand, since the optimization is less obviously a win due to
513 // needing more than one extractelements.
514
515 unsigned VectorOps =
516 llvm::count_if(GEP->operands(), [](const Value *V) {
517 return isa<VectorType>(V->getType());
518 });
519 if (VectorOps == 1) {
520 Value *NewPtr = GEP->getPointerOperand();
521 if (isa<VectorType>(NewPtr->getType()))
522 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
523
525 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
526 Value *Op = GEP->getOperand(I);
527 if (isa<VectorType>(Op->getType()))
528 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
529 else
530 NewOps.push_back(Op);
531 }
532
534 GEP->getSourceElementType(), NewPtr, NewOps);
535 NewGEP->setIsInBounds(GEP->isInBounds());
536 return NewGEP;
537 }
538 }
539 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
540 // If this is extracting an element from a shufflevector, figure out where
541 // it came from and extract from the appropriate input element instead.
542 // Restrict the following transformation to fixed-length vector.
543 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
544 int SrcIdx =
545 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
546 Value *Src;
547 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
548 ->getNumElements();
549
550 if (SrcIdx < 0)
552 if (SrcIdx < (int)LHSWidth)
553 Src = SVI->getOperand(0);
554 else {
555 SrcIdx -= LHSWidth;
556 Src = SVI->getOperand(1);
557 }
558 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
560 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
561 }
562 } else if (auto *CI = dyn_cast<CastInst>(I)) {
563 // Canonicalize extractelement(cast) -> cast(extractelement).
564 // Bitcasts can change the number of vector elements, and they cost
565 // nothing.
566 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
567 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
568 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
569 }
570 }
571 }
572
573 // Run demanded elements after other transforms as this can drop flags on
574 // binops. If there's two paths to the same final result, we prefer the
575 // one which doesn't force us to drop flags.
576 if (IndexC) {
578 unsigned NumElts = EC.getKnownMinValue();
579 // This instruction only demands the single element from the input vector.
580 // Skip for scalable type, the number of elements is unknown at
581 // compile-time.
582 if (!EC.isScalable() && NumElts != 1) {
583 // If the input vector has a single use, simplify it based on this use
584 // property.
585 if (SrcVec->hasOneUse()) {
586 APInt PoisonElts(NumElts, 0);
587 APInt DemandedElts(NumElts, 0);
588 DemandedElts.setBit(IndexC->getZExtValue());
589 if (Value *V =
590 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
591 return replaceOperand(EI, 0, V);
592 } else {
593 // If the input vector has multiple uses, simplify it based on a union
594 // of all elements used.
595 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
596 if (!DemandedElts.isAllOnes()) {
597 APInt PoisonElts(NumElts, 0);
599 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
600 true /* AllowMultipleUsers */)) {
601 if (V != SrcVec) {
602 Worklist.addValue(SrcVec);
603 SrcVec->replaceAllUsesWith(V);
604 return &EI;
605 }
606 }
607 }
608 }
609 }
610 }
611 return nullptr;
612}
613
614/// If V is a shuffle of values that ONLY returns elements from either LHS or
615/// RHS, return the shuffle mask and true. Otherwise, return false.
617 SmallVectorImpl<int> &Mask) {
618 assert(LHS->getType() == RHS->getType() &&
619 "Invalid CollectSingleShuffleElements");
620 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
621
622 if (match(V, m_Poison())) {
623 Mask.assign(NumElts, -1);
624 return true;
625 }
626
627 if (V == LHS) {
628 for (unsigned i = 0; i != NumElts; ++i)
629 Mask.push_back(i);
630 return true;
631 }
632
633 if (V == RHS) {
634 for (unsigned i = 0; i != NumElts; ++i)
635 Mask.push_back(i + NumElts);
636 return true;
637 }
638
639 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
640 // If this is an insert of an extract from some other vector, include it.
641 Value *VecOp = IEI->getOperand(0);
642 Value *ScalarOp = IEI->getOperand(1);
643 Value *IdxOp = IEI->getOperand(2);
644
645 if (!isa<ConstantInt>(IdxOp))
646 return false;
647 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
648
649 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
650 // We can handle this if the vector we are inserting into is
651 // transitively ok.
652 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
653 // If so, update the mask to reflect the inserted poison.
654 Mask[InsertedIdx] = -1;
655 return true;
656 }
657 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
658 if (isa<ConstantInt>(EI->getOperand(1))) {
659 unsigned ExtractedIdx =
660 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
661 unsigned NumLHSElts =
662 cast<FixedVectorType>(LHS->getType())->getNumElements();
663
664 // This must be extracting from either LHS or RHS.
665 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
666 // We can handle this if the vector we are inserting into is
667 // transitively ok.
668 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
669 // If so, update the mask to reflect the inserted value.
670 if (EI->getOperand(0) == LHS) {
671 Mask[InsertedIdx % NumElts] = ExtractedIdx;
672 } else {
673 assert(EI->getOperand(0) == RHS);
674 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
675 }
676 return true;
677 }
678 }
679 }
680 }
681 }
682
683 return false;
684}
685
686/// If we have insertion into a vector that is wider than the vector that we
687/// are extracting from, try to widen the source vector to allow a single
688/// shufflevector to replace one or more insert/extract pairs.
690 ExtractElementInst *ExtElt,
691 InstCombinerImpl &IC) {
692 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
693 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
694 unsigned NumInsElts = InsVecType->getNumElements();
695 unsigned NumExtElts = ExtVecType->getNumElements();
696
697 // The inserted-to vector must be wider than the extracted-from vector.
698 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
699 NumExtElts >= NumInsElts)
700 return false;
701
702 // Create a shuffle mask to widen the extended-from vector using poison
703 // values. The mask selects all of the values of the original vector followed
704 // by as many poison values as needed to create a vector of the same length
705 // as the inserted-to vector.
706 SmallVector<int, 16> ExtendMask;
707 for (unsigned i = 0; i < NumExtElts; ++i)
708 ExtendMask.push_back(i);
709 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
710 ExtendMask.push_back(-1);
711
712 Value *ExtVecOp = ExtElt->getVectorOperand();
713 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
714 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
715 ? ExtVecOpInst->getParent()
716 : ExtElt->getParent();
717
718 // TODO: This restriction matches the basic block check below when creating
719 // new extractelement instructions. If that limitation is removed, this one
720 // could also be removed. But for now, we just bail out to ensure that we
721 // will replace the extractelement instruction that is feeding our
722 // insertelement instruction. This allows the insertelement to then be
723 // replaced by a shufflevector. If the insertelement is not replaced, we can
724 // induce infinite looping because there's an optimization for extractelement
725 // that will delete our widening shuffle. This would trigger another attempt
726 // here to create that shuffle, and we spin forever.
727 if (InsertionBlock != InsElt->getParent())
728 return false;
729
730 // TODO: This restriction matches the check in visitInsertElementInst() and
731 // prevents an infinite loop caused by not turning the extract/insert pair
732 // into a shuffle. We really should not need either check, but we're lacking
733 // folds for shufflevectors because we're afraid to generate shuffle masks
734 // that the backend can't handle.
735 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
736 return false;
737
738 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
739
740 // Insert the new shuffle after the vector operand of the extract is defined
741 // (as long as it's not a PHI) or at the start of the basic block of the
742 // extract, so any subsequent extracts in the same basic block can use it.
743 // TODO: Insert before the earliest ExtractElementInst that is replaced.
744 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
745 WideVec->insertAfter(ExtVecOpInst);
746 else
747 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
748
749 // Replace extracts from the original narrow vector with extracts from the new
750 // wide vector.
751 for (User *U : ExtVecOp->users()) {
752 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
753 if (!OldExt || OldExt->getParent() != WideVec->getParent())
754 continue;
755 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
756 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
757 IC.replaceInstUsesWith(*OldExt, NewExt);
758 // Add the old extracts to the worklist for DCE. We can't remove the
759 // extracts directly, because they may still be used by the calling code.
760 IC.addToWorklist(OldExt);
761 }
762
763 return true;
764}
765
766/// We are building a shuffle to create V, which is a sequence of insertelement,
767/// extractelement pairs. If PermittedRHS is set, then we must either use it or
768/// not rely on the second vector source. Return a std::pair containing the
769/// left and right vectors of the proposed shuffle (or 0), and set the Mask
770/// parameter as required.
771///
772/// Note: we intentionally don't try to fold earlier shuffles since they have
773/// often been chosen carefully to be efficiently implementable on the target.
774using ShuffleOps = std::pair<Value *, Value *>;
775
777 Value *PermittedRHS,
778 InstCombinerImpl &IC, bool &Rerun) {
779 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
780 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
781
782 if (match(V, m_Poison())) {
783 Mask.assign(NumElts, -1);
784 return std::make_pair(
785 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
786 }
787
788 if (isa<ConstantAggregateZero>(V)) {
789 Mask.assign(NumElts, 0);
790 return std::make_pair(V, nullptr);
791 }
792
793 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
794 // If this is an insert of an extract from some other vector, include it.
795 Value *VecOp = IEI->getOperand(0);
796 Value *ScalarOp = IEI->getOperand(1);
797 Value *IdxOp = IEI->getOperand(2);
798
799 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
800 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
801 unsigned ExtractedIdx =
802 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
803 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
804
805 // Either the extracted from or inserted into vector must be RHSVec,
806 // otherwise we'd end up with a shuffle of three inputs.
807 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
808 Value *RHS = EI->getOperand(0);
809 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
810 assert(LR.second == nullptr || LR.second == RHS);
811
812 if (LR.first->getType() != RHS->getType()) {
813 // Although we are giving up for now, see if we can create extracts
814 // that match the inserts for another round of combining.
815 if (replaceExtractElements(IEI, EI, IC))
816 Rerun = true;
817
818 // We tried our best, but we can't find anything compatible with RHS
819 // further up the chain. Return a trivial shuffle.
820 for (unsigned i = 0; i < NumElts; ++i)
821 Mask[i] = i;
822 return std::make_pair(V, nullptr);
823 }
824
825 unsigned NumLHSElts =
826 cast<FixedVectorType>(RHS->getType())->getNumElements();
827 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
828 return std::make_pair(LR.first, RHS);
829 }
830
831 if (VecOp == PermittedRHS) {
832 // We've gone as far as we can: anything on the other side of the
833 // extractelement will already have been converted into a shuffle.
834 unsigned NumLHSElts =
835 cast<FixedVectorType>(EI->getOperand(0)->getType())
836 ->getNumElements();
837 for (unsigned i = 0; i != NumElts; ++i)
838 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
839 return std::make_pair(EI->getOperand(0), PermittedRHS);
840 }
841
842 // If this insertelement is a chain that comes from exactly these two
843 // vectors, return the vector and the effective shuffle.
844 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
845 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
846 Mask))
847 return std::make_pair(EI->getOperand(0), PermittedRHS);
848 }
849 }
850 }
851
852 // Otherwise, we can't do anything fancy. Return an identity vector.
853 for (unsigned i = 0; i != NumElts; ++i)
854 Mask.push_back(i);
855 return std::make_pair(V, nullptr);
856}
857
858/// Look for chain of insertvalue's that fully define an aggregate, and trace
859/// back the values inserted, see if they are all were extractvalue'd from
860/// the same source aggregate from the exact same element indexes.
861/// If they were, just reuse the source aggregate.
862/// This potentially deals with PHI indirections.
864 InsertValueInst &OrigIVI) {
865 Type *AggTy = OrigIVI.getType();
866 unsigned NumAggElts;
867 switch (AggTy->getTypeID()) {
868 case Type::StructTyID:
869 NumAggElts = AggTy->getStructNumElements();
870 break;
871 case Type::ArrayTyID:
872 NumAggElts = AggTy->getArrayNumElements();
873 break;
874 default:
875 llvm_unreachable("Unhandled aggregate type?");
876 }
877
878 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
879 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
880 // FIXME: any interesting patterns to be caught with larger limit?
881 assert(NumAggElts > 0 && "Aggregate should have elements.");
882 if (NumAggElts > 2)
883 return nullptr;
884
885 static constexpr auto NotFound = std::nullopt;
886 static constexpr auto FoundMismatch = nullptr;
887
888 // Try to find a value of each element of an aggregate.
889 // FIXME: deal with more complex, not one-dimensional, aggregate types
890 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
891
892 // Do we know values for each element of the aggregate?
893 auto KnowAllElts = [&AggElts]() {
894 return !llvm::is_contained(AggElts, NotFound);
895 };
896
897 int Depth = 0;
898
899 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
900 // every element being overwritten twice, which should never happen.
901 static const int DepthLimit = 2 * NumAggElts;
902
903 // Recurse up the chain of `insertvalue` aggregate operands until either we've
904 // reconstructed full initializer or can't visit any more `insertvalue`'s.
905 for (InsertValueInst *CurrIVI = &OrigIVI;
906 Depth < DepthLimit && CurrIVI && !KnowAllElts();
907 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
908 ++Depth) {
909 auto *InsertedValue =
910 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
911 if (!InsertedValue)
912 return nullptr; // Inserted value must be produced by an instruction.
913
914 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
915
916 // Don't bother with more than single-level aggregates.
917 if (Indices.size() != 1)
918 return nullptr; // FIXME: deal with more complex aggregates?
919
920 // Now, we may have already previously recorded the value for this element
921 // of an aggregate. If we did, that means the CurrIVI will later be
922 // overwritten with the already-recorded value. But if not, let's record it!
923 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
924 Elt = Elt.value_or(InsertedValue);
925
926 // FIXME: should we handle chain-terminating undef base operand?
927 }
928
929 // Was that sufficient to deduce the full initializer for the aggregate?
930 if (!KnowAllElts())
931 return nullptr; // Give up then.
932
933 // We now want to find the source[s] of the aggregate elements we've found.
934 // And with "source" we mean the original aggregate[s] from which
935 // the inserted elements were extracted. This may require PHI translation.
936
937 enum class AggregateDescription {
938 /// When analyzing the value that was inserted into an aggregate, we did
939 /// not manage to find defining `extractvalue` instruction to analyze.
940 NotFound,
941 /// When analyzing the value that was inserted into an aggregate, we did
942 /// manage to find defining `extractvalue` instruction[s], and everything
943 /// matched perfectly - aggregate type, element insertion/extraction index.
944 Found,
945 /// When analyzing the value that was inserted into an aggregate, we did
946 /// manage to find defining `extractvalue` instruction, but there was
947 /// a mismatch: either the source type from which the extraction was didn't
948 /// match the aggregate type into which the insertion was,
949 /// or the extraction/insertion channels mismatched,
950 /// or different elements had different source aggregates.
951 FoundMismatch
952 };
953 auto Describe = [](std::optional<Value *> SourceAggregate) {
954 if (SourceAggregate == NotFound)
955 return AggregateDescription::NotFound;
956 if (*SourceAggregate == FoundMismatch)
957 return AggregateDescription::FoundMismatch;
958 return AggregateDescription::Found;
959 };
960
961 // Given the value \p Elt that was being inserted into element \p EltIdx of an
962 // aggregate AggTy, see if \p Elt was originally defined by an
963 // appropriate extractvalue (same element index, same aggregate type).
964 // If found, return the source aggregate from which the extraction was.
965 // If \p PredBB is provided, does PHI translation of an \p Elt first.
966 auto FindSourceAggregate =
967 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
968 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
969 // For now(?), only deal with, at most, a single level of PHI indirection.
970 if (UseBB && PredBB)
971 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
972 // FIXME: deal with multiple levels of PHI indirection?
973
974 // Did we find an extraction?
975 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
976 if (!EVI)
977 return NotFound;
978
979 Value *SourceAggregate = EVI->getAggregateOperand();
980
981 // Is the extraction from the same type into which the insertion was?
982 if (SourceAggregate->getType() != AggTy)
983 return FoundMismatch;
984 // And the element index doesn't change between extraction and insertion?
985 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
986 return FoundMismatch;
987
988 return SourceAggregate; // AggregateDescription::Found
989 };
990
991 // Given elements AggElts that were constructing an aggregate OrigIVI,
992 // see if we can find appropriate source aggregate for each of the elements,
993 // and see it's the same aggregate for each element. If so, return it.
994 auto FindCommonSourceAggregate =
995 [&](std::optional<BasicBlock *> UseBB,
996 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
997 std::optional<Value *> SourceAggregate;
998
999 for (auto I : enumerate(AggElts)) {
1000 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1001 "We don't store nullptr in SourceAggregate!");
1002 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1003 (I.index() != 0) &&
1004 "SourceAggregate should be valid after the first element,");
1005
1006 // For this element, is there a plausible source aggregate?
1007 // FIXME: we could special-case undef element, IFF we know that in the
1008 // source aggregate said element isn't poison.
1009 std::optional<Value *> SourceAggregateForElement =
1010 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1011
1012 // Okay, what have we found? Does that correlate with previous findings?
1013
1014 // Regardless of whether or not we have previously found source
1015 // aggregate for previous elements (if any), if we didn't find one for
1016 // this element, passthrough whatever we have just found.
1017 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1018 return SourceAggregateForElement;
1019
1020 // Okay, we have found source aggregate for this element.
1021 // Let's see what we already know from previous elements, if any.
1022 switch (Describe(SourceAggregate)) {
1023 case AggregateDescription::NotFound:
1024 // This is apparently the first element that we have examined.
1025 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1026 continue; // Great, now look at next element.
1027 case AggregateDescription::Found:
1028 // We have previously already successfully examined other elements.
1029 // Is this the same source aggregate we've found for other elements?
1030 if (*SourceAggregateForElement != *SourceAggregate)
1031 return FoundMismatch;
1032 continue; // Still the same aggregate, look at next element.
1033 case AggregateDescription::FoundMismatch:
1034 llvm_unreachable("Can't happen. We would have early-exited then.");
1035 };
1036 }
1037
1038 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1039 "Must be a valid Value");
1040 return *SourceAggregate;
1041 };
1042
1043 std::optional<Value *> SourceAggregate;
1044
1045 // Can we find the source aggregate without looking at predecessors?
1046 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1047 /*PredBB=*/std::nullopt);
1048 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1049 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1050 return nullptr; // Conflicting source aggregates!
1051 ++NumAggregateReconstructionsSimplified;
1052 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1053 }
1054
1055 // Okay, apparently we need to look at predecessors.
1056
1057 // We should be smart about picking the "use" basic block, which will be the
1058 // merge point for aggregate, where we'll insert the final PHI that will be
1059 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1060 // We should look in which blocks each of the AggElts is being defined,
1061 // they all should be defined in the same basic block.
1062 BasicBlock *UseBB = nullptr;
1063
1064 for (const std::optional<Instruction *> &I : AggElts) {
1065 BasicBlock *BB = (*I)->getParent();
1066 // If it's the first instruction we've encountered, record the basic block.
1067 if (!UseBB) {
1068 UseBB = BB;
1069 continue;
1070 }
1071 // Otherwise, this must be the same basic block we've seen previously.
1072 if (UseBB != BB)
1073 return nullptr;
1074 }
1075
1076 // If *all* of the elements are basic-block-independent, meaning they are
1077 // either function arguments, or constant expressions, then if we didn't
1078 // handle them without predecessor-aware handling, we won't handle them now.
1079 if (!UseBB)
1080 return nullptr;
1081
1082 // If we didn't manage to find source aggregate without looking at
1083 // predecessors, and there are no predecessors to look at, then we're done.
1084 if (pred_empty(UseBB))
1085 return nullptr;
1086
1087 // Arbitrary predecessor count limit.
1088 static const int PredCountLimit = 64;
1089
1090 // Cache the (non-uniqified!) list of predecessors in a vector,
1091 // checking the limit at the same time for efficiency.
1092 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1093 for (BasicBlock *Pred : predecessors(UseBB)) {
1094 // Don't bother if there are too many predecessors.
1095 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1096 return nullptr;
1097 Preds.emplace_back(Pred);
1098 }
1099
1100 // For each predecessor, what is the source aggregate,
1101 // from which all the elements were originally extracted from?
1102 // Note that we want for the map to have stable iteration order!
1104 for (BasicBlock *Pred : Preds) {
1105 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1106 SourceAggregates.insert({Pred, nullptr});
1107 // Did we already evaluate this predecessor?
1108 if (!IV.second)
1109 continue;
1110
1111 // Let's hope that when coming from predecessor Pred, all elements of the
1112 // aggregate produced by OrigIVI must have been originally extracted from
1113 // the same aggregate. Is that so? Can we find said original aggregate?
1114 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1115 if (Describe(SourceAggregate) != AggregateDescription::Found)
1116 return nullptr; // Give up.
1117 IV.first->second = *SourceAggregate;
1118 }
1119
1120 // All good! Now we just need to thread the source aggregates here.
1121 // Note that we have to insert the new PHI here, ourselves, because we can't
1122 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1123 // Note that the same block can be a predecessor more than once,
1124 // and we need to preserve that invariant for the PHI node.
1125 BuilderTy::InsertPointGuard Guard(Builder);
1126 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1127 auto *PHI =
1128 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1129 for (BasicBlock *Pred : Preds)
1130 PHI->addIncoming(SourceAggregates[Pred], Pred);
1131
1132 ++NumAggregateReconstructionsSimplified;
1133 return replaceInstUsesWith(OrigIVI, PHI);
1134}
1135
1136/// Try to find redundant insertvalue instructions, like the following ones:
1137/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1138/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1139/// Here the second instruction inserts values at the same indices, as the
1140/// first one, making the first one redundant.
1141/// It should be transformed to:
1142/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1145 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1147 return replaceInstUsesWith(I, V);
1148
1149 bool IsRedundant = false;
1150 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1151
1152 // If there is a chain of insertvalue instructions (each of them except the
1153 // last one has only one use and it's another insertvalue insn from this
1154 // chain), check if any of the 'children' uses the same indices as the first
1155 // instruction. In this case, the first one is redundant.
1156 Value *V = &I;
1157 unsigned Depth = 0;
1158 while (V->hasOneUse() && Depth < 10) {
1159 User *U = V->user_back();
1160 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1161 if (!UserInsInst || U->getOperand(0) != V)
1162 break;
1163 if (UserInsInst->getIndices() == FirstIndices) {
1164 IsRedundant = true;
1165 break;
1166 }
1167 V = UserInsInst;
1168 Depth++;
1169 }
1170
1171 if (IsRedundant)
1172 return replaceInstUsesWith(I, I.getOperand(0));
1173
1175 return NewI;
1176
1177 return nullptr;
1178}
1179
1181 // Can not analyze scalable type, the number of elements is not a compile-time
1182 // constant.
1183 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1184 return false;
1185
1186 int MaskSize = Shuf.getShuffleMask().size();
1187 int VecSize =
1188 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1189
1190 // A vector select does not change the size of the operands.
1191 if (MaskSize != VecSize)
1192 return false;
1193
1194 // Each mask element must be undefined or choose a vector element from one of
1195 // the source operands without crossing vector lanes.
1196 for (int i = 0; i != MaskSize; ++i) {
1197 int Elt = Shuf.getMaskValue(i);
1198 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1199 return false;
1200 }
1201
1202 return true;
1203}
1204
1205/// Turn a chain of inserts that splats a value into an insert + shuffle:
1206/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1207/// shufflevector(insertelt(X, %k, 0), poison, zero)
1209 // We are interested in the last insert in a chain. So if this insert has a
1210 // single user and that user is an insert, bail.
1211 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1212 return nullptr;
1213
1214 VectorType *VecTy = InsElt.getType();
1215 // Can not handle scalable type, the number of elements is not a compile-time
1216 // constant.
1217 if (isa<ScalableVectorType>(VecTy))
1218 return nullptr;
1219 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1220
1221 // Do not try to do this for a one-element vector, since that's a nop,
1222 // and will cause an inf-loop.
1223 if (NumElements == 1)
1224 return nullptr;
1225
1226 Value *SplatVal = InsElt.getOperand(1);
1227 InsertElementInst *CurrIE = &InsElt;
1228 SmallBitVector ElementPresent(NumElements, false);
1229 InsertElementInst *FirstIE = nullptr;
1230
1231 // Walk the chain backwards, keeping track of which indices we inserted into,
1232 // until we hit something that isn't an insert of the splatted value.
1233 while (CurrIE) {
1234 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1235 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1236 return nullptr;
1237
1238 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1239 // Check none of the intermediate steps have any additional uses, except
1240 // for the root insertelement instruction, which can be re-used, if it
1241 // inserts at position 0.
1242 if (CurrIE != &InsElt &&
1243 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1244 return nullptr;
1245
1246 ElementPresent[Idx->getZExtValue()] = true;
1247 FirstIE = CurrIE;
1248 CurrIE = NextIE;
1249 }
1250
1251 // If this is just a single insertelement (not a sequence), we are done.
1252 if (FirstIE == &InsElt)
1253 return nullptr;
1254
1255 // If we are not inserting into a poison vector, make sure we've seen an
1256 // insert into every element.
1257 // TODO: If the base vector is not undef, it might be better to create a splat
1258 // and then a select-shuffle (blend) with the base vector.
1259 if (!match(FirstIE->getOperand(0), m_Poison()))
1260 if (!ElementPresent.all())
1261 return nullptr;
1262
1263 // Create the insert + shuffle.
1264 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1265 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1266 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1267 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1268 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1269 InsElt.getIterator());
1270
1271 // Splat from element 0, but replace absent elements with poison in the mask.
1272 SmallVector<int, 16> Mask(NumElements, 0);
1273 for (unsigned i = 0; i != NumElements; ++i)
1274 if (!ElementPresent[i])
1275 Mask[i] = -1;
1276
1277 return new ShuffleVectorInst(FirstIE, Mask);
1278}
1279
1280/// Try to fold an insert element into an existing splat shuffle by changing
1281/// the shuffle's mask to include the index of this insert element.
1283 // Check if the vector operand of this insert is a canonical splat shuffle.
1284 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1285 if (!Shuf || !Shuf->isZeroEltSplat())
1286 return nullptr;
1287
1288 // Bail out early if shuffle is scalable type. The number of elements in
1289 // shuffle mask is unknown at compile-time.
1290 if (isa<ScalableVectorType>(Shuf->getType()))
1291 return nullptr;
1292
1293 // Check for a constant insertion index.
1294 uint64_t IdxC;
1295 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1296 return nullptr;
1297
1298 // Check if the splat shuffle's input is the same as this insert's scalar op.
1299 Value *X = InsElt.getOperand(1);
1300 Value *Op0 = Shuf->getOperand(0);
1301 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1302 return nullptr;
1303
1304 // Replace the shuffle mask element at the index of this insert with a zero.
1305 // For example:
1306 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1307 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1308 unsigned NumMaskElts =
1309 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1310 SmallVector<int, 16> NewMask(NumMaskElts);
1311 for (unsigned i = 0; i != NumMaskElts; ++i)
1312 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1313
1314 return new ShuffleVectorInst(Op0, NewMask);
1315}
1316
1317/// Try to fold an extract+insert element into an existing identity shuffle by
1318/// changing the shuffle's mask to include the index of this insert element.
1320 // Check if the vector operand of this insert is an identity shuffle.
1321 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1322 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1323 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1324 return nullptr;
1325
1326 // Bail out early if shuffle is scalable type. The number of elements in
1327 // shuffle mask is unknown at compile-time.
1328 if (isa<ScalableVectorType>(Shuf->getType()))
1329 return nullptr;
1330
1331 // Check for a constant insertion index.
1332 uint64_t IdxC;
1333 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1334 return nullptr;
1335
1336 // Check if this insert's scalar op is extracted from the identity shuffle's
1337 // input vector.
1338 Value *Scalar = InsElt.getOperand(1);
1339 Value *X = Shuf->getOperand(0);
1340 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1341 return nullptr;
1342
1343 // Replace the shuffle mask element at the index of this extract+insert with
1344 // that same index value.
1345 // For example:
1346 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1347 unsigned NumMaskElts =
1348 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1349 SmallVector<int, 16> NewMask(NumMaskElts);
1350 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1351 for (unsigned i = 0; i != NumMaskElts; ++i) {
1352 if (i != IdxC) {
1353 // All mask elements besides the inserted element remain the same.
1354 NewMask[i] = OldMask[i];
1355 } else if (OldMask[i] == (int)IdxC) {
1356 // If the mask element was already set, there's nothing to do
1357 // (demanded elements analysis may unset it later).
1358 return nullptr;
1359 } else {
1360 assert(OldMask[i] == PoisonMaskElem &&
1361 "Unexpected shuffle mask element for identity shuffle");
1362 NewMask[i] = IdxC;
1363 }
1364 }
1365
1366 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1367}
1368
1369/// If we have an insertelement instruction feeding into another insertelement
1370/// and the 2nd is inserting a constant into the vector, canonicalize that
1371/// constant insertion before the insertion of a variable:
1372///
1373/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1374/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1375///
1376/// This has the potential of eliminating the 2nd insertelement instruction
1377/// via constant folding of the scalar constant into a vector constant.
1379 InstCombiner::BuilderTy &Builder) {
1380 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1381 if (!InsElt1 || !InsElt1->hasOneUse())
1382 return nullptr;
1383
1384 Value *X, *Y;
1385 Constant *ScalarC;
1386 ConstantInt *IdxC1, *IdxC2;
1387 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1388 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1389 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1390 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1391 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1392 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1393 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1394 }
1395
1396 return nullptr;
1397}
1398
1399/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1400/// --> shufflevector X, CVec', Mask'
1402 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1403 // Bail out if the parent has more than one use. In that case, we'd be
1404 // replacing the insertelt with a shuffle, and that's not a clear win.
1405 if (!Inst || !Inst->hasOneUse())
1406 return nullptr;
1407 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1408 // The shuffle must have a constant vector operand. The insertelt must have
1409 // a constant scalar being inserted at a constant position in the vector.
1410 Constant *ShufConstVec, *InsEltScalar;
1411 uint64_t InsEltIndex;
1412 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1413 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1414 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1415 return nullptr;
1416
1417 // Adding an element to an arbitrary shuffle could be expensive, but a
1418 // shuffle that selects elements from vectors without crossing lanes is
1419 // assumed cheap.
1420 // If we're just adding a constant into that shuffle, it will still be
1421 // cheap.
1422 if (!isShuffleEquivalentToSelect(*Shuf))
1423 return nullptr;
1424
1425 // From the above 'select' check, we know that the mask has the same number
1426 // of elements as the vector input operands. We also know that each constant
1427 // input element is used in its lane and can not be used more than once by
1428 // the shuffle. Therefore, replace the constant in the shuffle's constant
1429 // vector with the insertelt constant. Replace the constant in the shuffle's
1430 // mask vector with the insertelt index plus the length of the vector
1431 // (because the constant vector operand of a shuffle is always the 2nd
1432 // operand).
1433 ArrayRef<int> Mask = Shuf->getShuffleMask();
1434 unsigned NumElts = Mask.size();
1435 SmallVector<Constant *, 16> NewShufElts(NumElts);
1436 SmallVector<int, 16> NewMaskElts(NumElts);
1437 for (unsigned I = 0; I != NumElts; ++I) {
1438 if (I == InsEltIndex) {
1439 NewShufElts[I] = InsEltScalar;
1440 NewMaskElts[I] = InsEltIndex + NumElts;
1441 } else {
1442 // Copy over the existing values.
1443 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1444 NewMaskElts[I] = Mask[I];
1445 }
1446
1447 // Bail if we failed to find an element.
1448 if (!NewShufElts[I])
1449 return nullptr;
1450 }
1451
1452 // Create new operands for a shuffle that includes the constant of the
1453 // original insertelt. The old shuffle will be dead now.
1454 return new ShuffleVectorInst(Shuf->getOperand(0),
1455 ConstantVector::get(NewShufElts), NewMaskElts);
1456 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1457 // Transform sequences of insertelements ops with constant data/indexes into
1458 // a single shuffle op.
1459 // Can not handle scalable type, the number of elements needed to create
1460 // shuffle mask is not a compile-time constant.
1461 if (isa<ScalableVectorType>(InsElt.getType()))
1462 return nullptr;
1463 unsigned NumElts =
1464 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1465
1466 uint64_t InsertIdx[2];
1467 Constant *Val[2];
1468 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1469 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1470 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1471 !match(IEI->getOperand(1), m_Constant(Val[1])))
1472 return nullptr;
1473 SmallVector<Constant *, 16> Values(NumElts);
1474 SmallVector<int, 16> Mask(NumElts);
1475 auto ValI = std::begin(Val);
1476 // Generate new constant vector and mask.
1477 // We have 2 values/masks from the insertelements instructions. Insert them
1478 // into new value/mask vectors.
1479 for (uint64_t I : InsertIdx) {
1480 if (!Values[I]) {
1481 Values[I] = *ValI;
1482 Mask[I] = NumElts + I;
1483 }
1484 ++ValI;
1485 }
1486 // Remaining values are filled with 'poison' values.
1487 for (unsigned I = 0; I < NumElts; ++I) {
1488 if (!Values[I]) {
1489 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1490 Mask[I] = I;
1491 }
1492 }
1493 // Create new operands for a shuffle that includes the constant of the
1494 // original insertelt.
1495 return new ShuffleVectorInst(IEI->getOperand(0),
1496 ConstantVector::get(Values), Mask);
1497 }
1498 return nullptr;
1499}
1500
1501/// If both the base vector and the inserted element are extended from the same
1502/// type, do the insert element in the narrow source type followed by extend.
1503/// TODO: This can be extended to include other cast opcodes, but particularly
1504/// if we create a wider insertelement, make sure codegen is not harmed.
1506 InstCombiner::BuilderTy &Builder) {
1507 // We are creating a vector extend. If the original vector extend has another
1508 // use, that would mean we end up with 2 vector extends, so avoid that.
1509 // TODO: We could ease the use-clause to "if at least one op has one use"
1510 // (assuming that the source types match - see next TODO comment).
1511 Value *Vec = InsElt.getOperand(0);
1512 if (!Vec->hasOneUse())
1513 return nullptr;
1514
1515 Value *Scalar = InsElt.getOperand(1);
1516 Value *X, *Y;
1517 CastInst::CastOps CastOpcode;
1518 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1519 CastOpcode = Instruction::FPExt;
1520 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1521 CastOpcode = Instruction::SExt;
1522 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1523 CastOpcode = Instruction::ZExt;
1524 else
1525 return nullptr;
1526
1527 // TODO: We can allow mismatched types by creating an intermediate cast.
1528 if (X->getType()->getScalarType() != Y->getType())
1529 return nullptr;
1530
1531 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1532 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1533 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1534}
1535
1536/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1537/// try to convert to a single insert with appropriate bitcasts.
1539 bool IsBigEndian,
1540 InstCombiner::BuilderTy &Builder) {
1541 Value *VecOp = InsElt.getOperand(0);
1542 Value *ScalarOp = InsElt.getOperand(1);
1543 Value *IndexOp = InsElt.getOperand(2);
1544
1545 // Pattern depends on endian because we expect lower index is inserted first.
1546 // Big endian:
1547 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1548 // Little endian:
1549 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1550 // Note: It is not safe to do this transform with an arbitrary base vector
1551 // because the bitcast of that vector to fewer/larger elements could
1552 // allow poison to spill into an element that was not poison before.
1553 // TODO: Detect smaller fractions of the scalar.
1554 // TODO: One-use checks are conservative.
1555 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1556 Value *Scalar0, *BaseVec;
1557 uint64_t Index0, Index1;
1558 if (!VTy || (VTy->getNumElements() & 1) ||
1559 !match(IndexOp, m_ConstantInt(Index1)) ||
1560 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1561 m_ConstantInt(Index0))) ||
1562 !match(BaseVec, m_Undef()))
1563 return nullptr;
1564
1565 // The first insert must be to the index one less than this one, and
1566 // the first insert must be to an even index.
1567 if (Index0 + 1 != Index1 || Index0 & 1)
1568 return nullptr;
1569
1570 // For big endian, the high half of the value should be inserted first.
1571 // For little endian, the low half of the value should be inserted first.
1572 Value *X;
1573 uint64_t ShAmt;
1574 if (IsBigEndian) {
1575 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1576 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1577 return nullptr;
1578 } else {
1579 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1580 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1581 return nullptr;
1582 }
1583
1584 Type *SrcTy = X->getType();
1585 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1586 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1587 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1588 return nullptr;
1589
1590 // Bitcast the base vector to a vector type with the source element type.
1591 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1592 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1593
1594 // Scale the insert index for a vector with half as many elements.
1595 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1596 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1597 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1598 return new BitCastInst(NewInsert, VTy);
1599}
1600
1602 Value *VecOp = IE.getOperand(0);
1603 Value *ScalarOp = IE.getOperand(1);
1604 Value *IdxOp = IE.getOperand(2);
1605
1606 if (auto *V = simplifyInsertElementInst(
1607 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1608 return replaceInstUsesWith(IE, V);
1609
1610 // Canonicalize type of constant indices to i64 to simplify CSE
1611 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1612 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1613 return replaceOperand(IE, 2, NewIdx);
1614
1615 Value *BaseVec, *OtherScalar;
1616 uint64_t OtherIndexVal;
1617 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1618 m_Value(OtherScalar),
1619 m_ConstantInt(OtherIndexVal)))) &&
1620 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1621 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1622 return InsertElementInst::Create(NewIns, OtherScalar,
1623 Builder.getInt64(OtherIndexVal));
1624 }
1625 }
1626
1627 // If the scalar is bitcast and inserted into undef, do the insert in the
1628 // source type followed by bitcast.
1629 // TODO: Generalize for insert into any constant, not just undef?
1630 Value *ScalarSrc;
1631 if (match(VecOp, m_Undef()) &&
1632 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1633 (ScalarSrc->getType()->isIntegerTy() ||
1634 ScalarSrc->getType()->isFloatingPointTy())) {
1635 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1636 // bitcast (inselt undef, ScalarSrc, IdxOp)
1637 Type *ScalarTy = ScalarSrc->getType();
1638 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1639 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1640 : UndefValue::get(VecTy);
1641 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1642 return new BitCastInst(NewInsElt, IE.getType());
1643 }
1644
1645 // If the vector and scalar are both bitcast from the same element type, do
1646 // the insert in that source type followed by bitcast.
1647 Value *VecSrc;
1648 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1649 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1650 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1651 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1652 cast<VectorType>(VecSrc->getType())->getElementType() ==
1653 ScalarSrc->getType()) {
1654 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1655 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1656 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1657 return new BitCastInst(NewInsElt, IE.getType());
1658 }
1659
1660 // If the inserted element was extracted from some other fixed-length vector
1661 // and both indexes are valid constants, try to turn this into a shuffle.
1662 // Can not handle scalable vector type, the number of elements needed to
1663 // create shuffle mask is not a compile-time constant.
1664 uint64_t InsertedIdx, ExtractedIdx;
1665 Value *ExtVecOp;
1666 if (isa<FixedVectorType>(IE.getType()) &&
1667 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1668 match(ScalarOp,
1669 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1670 isa<FixedVectorType>(ExtVecOp->getType()) &&
1671 ExtractedIdx <
1672 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1673 // TODO: Looking at the user(s) to determine if this insert is a
1674 // fold-to-shuffle opportunity does not match the usual instcombine
1675 // constraints. We should decide if the transform is worthy based only
1676 // on this instruction and its operands, but that may not work currently.
1677 //
1678 // Here, we are trying to avoid creating shuffles before reaching
1679 // the end of a chain of extract-insert pairs. This is complicated because
1680 // we do not generally form arbitrary shuffle masks in instcombine
1681 // (because those may codegen poorly), but collectShuffleElements() does
1682 // exactly that.
1683 //
1684 // The rules for determining what is an acceptable target-independent
1685 // shuffle mask are fuzzy because they evolve based on the backend's
1686 // capabilities and real-world impact.
1687 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1688 if (!Insert.hasOneUse())
1689 return true;
1690 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1691 if (!InsertUser)
1692 return true;
1693 return false;
1694 };
1695
1696 // Try to form a shuffle from a chain of extract-insert ops.
1697 if (isShuffleRootCandidate(IE)) {
1698 bool Rerun = true;
1699 while (Rerun) {
1700 Rerun = false;
1701
1703 ShuffleOps LR =
1704 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1705
1706 // The proposed shuffle may be trivial, in which case we shouldn't
1707 // perform the combine.
1708 if (LR.first != &IE && LR.second != &IE) {
1709 // We now have a shuffle of LHS, RHS, Mask.
1710 if (LR.second == nullptr)
1711 LR.second = PoisonValue::get(LR.first->getType());
1712 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1713 }
1714 }
1715 }
1716 }
1717
1718 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1719 unsigned VWidth = VecTy->getNumElements();
1720 APInt PoisonElts(VWidth, 0);
1721 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1722 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1723 PoisonElts)) {
1724 if (V != &IE)
1725 return replaceInstUsesWith(IE, V);
1726 return &IE;
1727 }
1728 }
1729
1731 return Shuf;
1732
1733 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1734 return NewInsElt;
1735
1736 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1737 return Broadcast;
1738
1740 return Splat;
1741
1742 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1743 return IdentityShuf;
1744
1745 if (Instruction *Ext = narrowInsElt(IE, Builder))
1746 return Ext;
1747
1749 return Ext;
1750
1751 return nullptr;
1752}
1753
1754/// Return true if we can evaluate the specified expression tree if the vector
1755/// elements were shuffled in a different order.
1757 unsigned Depth = 5) {
1758 // We can always reorder the elements of a constant.
1759 if (isa<Constant>(V))
1760 return true;
1761
1762 // We won't reorder vector arguments. No IPO here.
1763 Instruction *I = dyn_cast<Instruction>(V);
1764 if (!I) return false;
1765
1766 // Two users may expect different orders of the elements. Don't try it.
1767 if (!I->hasOneUse())
1768 return false;
1769
1770 if (Depth == 0) return false;
1771
1772 switch (I->getOpcode()) {
1773 case Instruction::UDiv:
1774 case Instruction::SDiv:
1775 case Instruction::URem:
1776 case Instruction::SRem:
1777 // Propagating an undefined shuffle mask element to integer div/rem is not
1778 // allowed because those opcodes can create immediate undefined behavior
1779 // from an undefined element in an operand.
1780 if (llvm::is_contained(Mask, -1))
1781 return false;
1782 [[fallthrough]];
1783 case Instruction::Add:
1784 case Instruction::FAdd:
1785 case Instruction::Sub:
1786 case Instruction::FSub:
1787 case Instruction::Mul:
1788 case Instruction::FMul:
1789 case Instruction::FDiv:
1790 case Instruction::FRem:
1791 case Instruction::Shl:
1792 case Instruction::LShr:
1793 case Instruction::AShr:
1794 case Instruction::And:
1795 case Instruction::Or:
1796 case Instruction::Xor:
1797 case Instruction::ICmp:
1798 case Instruction::FCmp:
1799 case Instruction::Trunc:
1800 case Instruction::ZExt:
1801 case Instruction::SExt:
1802 case Instruction::FPToUI:
1803 case Instruction::FPToSI:
1804 case Instruction::UIToFP:
1805 case Instruction::SIToFP:
1806 case Instruction::FPTrunc:
1807 case Instruction::FPExt:
1808 case Instruction::GetElementPtr: {
1809 // Bail out if we would create longer vector ops. We could allow creating
1810 // longer vector ops, but that may result in more expensive codegen.
1811 Type *ITy = I->getType();
1812 if (ITy->isVectorTy() &&
1813 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1814 return false;
1815 for (Value *Operand : I->operands()) {
1816 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1817 return false;
1818 }
1819 return true;
1820 }
1821 case Instruction::InsertElement: {
1822 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1823 if (!CI) return false;
1824 int ElementNumber = CI->getLimitedValue();
1825
1826 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1827 // can't put an element into multiple indices.
1828 bool SeenOnce = false;
1829 for (int I : Mask) {
1830 if (I == ElementNumber) {
1831 if (SeenOnce)
1832 return false;
1833 SeenOnce = true;
1834 }
1835 }
1836 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1837 }
1838 }
1839 return false;
1840}
1841
1842/// Rebuild a new instruction just like 'I' but with the new operands given.
1843/// In the event of type mismatch, the type of the operands is correct.
1845 IRBuilderBase &Builder) {
1846 Builder.SetInsertPoint(I);
1847 switch (I->getOpcode()) {
1848 case Instruction::Add:
1849 case Instruction::FAdd:
1850 case Instruction::Sub:
1851 case Instruction::FSub:
1852 case Instruction::Mul:
1853 case Instruction::FMul:
1854 case Instruction::UDiv:
1855 case Instruction::SDiv:
1856 case Instruction::FDiv:
1857 case Instruction::URem:
1858 case Instruction::SRem:
1859 case Instruction::FRem:
1860 case Instruction::Shl:
1861 case Instruction::LShr:
1862 case Instruction::AShr:
1863 case Instruction::And:
1864 case Instruction::Or:
1865 case Instruction::Xor: {
1866 BinaryOperator *BO = cast<BinaryOperator>(I);
1867 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1868 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1869 NewOps[0], NewOps[1]);
1870 if (auto *NewI = dyn_cast<Instruction>(New)) {
1871 if (isa<OverflowingBinaryOperator>(BO)) {
1872 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1873 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1874 }
1875 if (isa<PossiblyExactOperator>(BO)) {
1876 NewI->setIsExact(BO->isExact());
1877 }
1878 if (isa<FPMathOperator>(BO))
1879 NewI->copyFastMathFlags(I);
1880 }
1881 return New;
1882 }
1883 case Instruction::ICmp:
1884 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1885 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1886 NewOps[1]);
1887 case Instruction::FCmp:
1888 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1889 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1890 NewOps[1]);
1891 case Instruction::Trunc:
1892 case Instruction::ZExt:
1893 case Instruction::SExt:
1894 case Instruction::FPToUI:
1895 case Instruction::FPToSI:
1896 case Instruction::UIToFP:
1897 case Instruction::SIToFP:
1898 case Instruction::FPTrunc:
1899 case Instruction::FPExt: {
1900 // It's possible that the mask has a different number of elements from
1901 // the original cast. We recompute the destination type to match the mask.
1902 Type *DestTy = VectorType::get(
1903 I->getType()->getScalarType(),
1904 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1905 assert(NewOps.size() == 1 && "cast with #ops != 1");
1906 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1907 DestTy);
1908 }
1909 case Instruction::GetElementPtr: {
1910 Value *Ptr = NewOps[0];
1911 ArrayRef<Value*> Idx = NewOps.slice(1);
1912 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1913 Ptr, Idx, "",
1914 cast<GEPOperator>(I)->isInBounds());
1915 }
1916 }
1917 llvm_unreachable("failed to rebuild vector instructions");
1918}
1919
1921 IRBuilderBase &Builder) {
1922 // Mask.size() does not need to be equal to the number of vector elements.
1923
1924 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1925 Type *EltTy = V->getType()->getScalarType();
1926
1927 if (isa<PoisonValue>(V))
1928 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
1929
1930 if (match(V, m_Undef()))
1931 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1932
1933 if (isa<ConstantAggregateZero>(V))
1934 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1935
1936 if (Constant *C = dyn_cast<Constant>(V))
1938 Mask);
1939
1940 Instruction *I = cast<Instruction>(V);
1941 switch (I->getOpcode()) {
1942 case Instruction::Add:
1943 case Instruction::FAdd:
1944 case Instruction::Sub:
1945 case Instruction::FSub:
1946 case Instruction::Mul:
1947 case Instruction::FMul:
1948 case Instruction::UDiv:
1949 case Instruction::SDiv:
1950 case Instruction::FDiv:
1951 case Instruction::URem:
1952 case Instruction::SRem:
1953 case Instruction::FRem:
1954 case Instruction::Shl:
1955 case Instruction::LShr:
1956 case Instruction::AShr:
1957 case Instruction::And:
1958 case Instruction::Or:
1959 case Instruction::Xor:
1960 case Instruction::ICmp:
1961 case Instruction::FCmp:
1962 case Instruction::Trunc:
1963 case Instruction::ZExt:
1964 case Instruction::SExt:
1965 case Instruction::FPToUI:
1966 case Instruction::FPToSI:
1967 case Instruction::UIToFP:
1968 case Instruction::SIToFP:
1969 case Instruction::FPTrunc:
1970 case Instruction::FPExt:
1971 case Instruction::Select:
1972 case Instruction::GetElementPtr: {
1974 bool NeedsRebuild =
1975 (Mask.size() !=
1976 cast<FixedVectorType>(I->getType())->getNumElements());
1977 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1978 Value *V;
1979 // Recursively call evaluateInDifferentElementOrder on vector arguments
1980 // as well. E.g. GetElementPtr may have scalar operands even if the
1981 // return value is a vector, so we need to examine the operand type.
1982 if (I->getOperand(i)->getType()->isVectorTy())
1983 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
1984 else
1985 V = I->getOperand(i);
1986 NewOps.push_back(V);
1987 NeedsRebuild |= (V != I->getOperand(i));
1988 }
1989 if (NeedsRebuild)
1990 return buildNew(I, NewOps, Builder);
1991 return I;
1992 }
1993 case Instruction::InsertElement: {
1994 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1995
1996 // The insertelement was inserting at Element. Figure out which element
1997 // that becomes after shuffling. The answer is guaranteed to be unique
1998 // by CanEvaluateShuffled.
1999 bool Found = false;
2000 int Index = 0;
2001 for (int e = Mask.size(); Index != e; ++Index) {
2002 if (Mask[Index] == Element) {
2003 Found = true;
2004 break;
2005 }
2006 }
2007
2008 // If element is not in Mask, no need to handle the operand 1 (element to
2009 // be inserted). Just evaluate values in operand 0 according to Mask.
2010 if (!Found)
2011 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2012
2013 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2014 Builder);
2015 Builder.SetInsertPoint(I);
2016 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2017 }
2018 }
2019 llvm_unreachable("failed to reorder elements of vector instruction!");
2020}
2021
2022// Returns true if the shuffle is extracting a contiguous range of values from
2023// LHS, for example:
2024// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2025// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2026// Shuffles to: |EE|FF|GG|HH|
2027// +--+--+--+--+
2029 ArrayRef<int> Mask) {
2030 unsigned LHSElems =
2031 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2032 unsigned MaskElems = Mask.size();
2033 unsigned BegIdx = Mask.front();
2034 unsigned EndIdx = Mask.back();
2035 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2036 return false;
2037 for (unsigned I = 0; I != MaskElems; ++I)
2038 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2039 return false;
2040 return true;
2041}
2042
2043/// These are the ingredients in an alternate form binary operator as described
2044/// below.
2050 Value *V0 = nullptr, Value *V1 = nullptr) :
2051 Opcode(Opc), Op0(V0), Op1(V1) {}
2052 operator bool() const { return Opcode != 0; }
2053};
2054
2055/// Binops may be transformed into binops with different opcodes and operands.
2056/// Reverse the usual canonicalization to enable folds with the non-canonical
2057/// form of the binop. If a transform is possible, return the elements of the
2058/// new binop. If not, return invalid elements.
2060 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2061 Type *Ty = BO->getType();
2062 switch (BO->getOpcode()) {
2063 case Instruction::Shl: {
2064 // shl X, C --> mul X, (1 << C)
2065 Constant *C;
2066 if (match(BO1, m_ImmConstant(C))) {
2068 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2069 assert(ShlOne && "Constant folding of immediate constants failed");
2070 return {Instruction::Mul, BO0, ShlOne};
2071 }
2072 break;
2073 }
2074 case Instruction::Or: {
2075 // or disjoin X, C --> add X, C
2076 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2077 return {Instruction::Add, BO0, BO1};
2078 break;
2079 }
2080 case Instruction::Sub:
2081 // sub 0, X --> mul X, -1
2082 if (match(BO0, m_ZeroInt()))
2083 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2084 break;
2085 default:
2086 break;
2087 }
2088 return {};
2089}
2090
2091/// A select shuffle of a select shuffle with a shared operand can be reduced
2092/// to a single select shuffle. This is an obvious improvement in IR, and the
2093/// backend is expected to lower select shuffles efficiently.
2095 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2096
2097 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2099 Shuf.getShuffleMask(Mask);
2100 unsigned NumElts = Mask.size();
2101
2102 // Canonicalize a select shuffle with common operand as Op1.
2103 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2104 if (ShufOp && ShufOp->isSelect() &&
2105 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2106 std::swap(Op0, Op1);
2108 }
2109
2110 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2111 if (!ShufOp || !ShufOp->isSelect() ||
2112 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2113 return nullptr;
2114
2115 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2117 ShufOp->getShuffleMask(Mask1);
2118 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2119
2120 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2121 if (Y == Op0) {
2122 std::swap(X, Y);
2124 }
2125
2126 // If the mask chooses from X (operand 0), it stays the same.
2127 // If the mask chooses from the earlier shuffle, the other mask value is
2128 // transferred to the combined select shuffle:
2129 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2130 SmallVector<int, 16> NewMask(NumElts);
2131 for (unsigned i = 0; i != NumElts; ++i)
2132 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2133
2134 // A select mask with undef elements might look like an identity mask.
2135 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2136 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2137 "Unexpected shuffle mask");
2138 return new ShuffleVectorInst(X, Y, NewMask);
2139}
2140
2142 const SimplifyQuery &SQ) {
2143 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2144
2145 // Are we shuffling together some value and that same value after it has been
2146 // modified by a binop with a constant?
2147 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2148 Constant *C;
2149 bool Op0IsBinop;
2150 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2151 Op0IsBinop = true;
2152 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2153 Op0IsBinop = false;
2154 else
2155 return nullptr;
2156
2157 // The identity constant for a binop leaves a variable operand unchanged. For
2158 // a vector, this is a splat of something like 0, -1, or 1.
2159 // If there's no identity constant for this binop, we're done.
2160 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2161 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2162 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2163 if (!IdC)
2164 return nullptr;
2165
2166 Value *X = Op0IsBinop ? Op1 : Op0;
2167
2168 // Prevent folding in the case the non-binop operand might have NaN values.
2169 // If X can have NaN elements then we have that the floating point math
2170 // operation in the transformed code may not preserve the exact NaN
2171 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2172 // This makes the transformation incorrect since the original program would
2173 // have preserved the exact NaN bit-pattern.
2174 // Avoid the folding if X can have NaN elements.
2175 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2176 !isKnownNeverNaN(X, 0, SQ))
2177 return nullptr;
2178
2179 // Shuffle identity constants into the lanes that return the original value.
2180 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2181 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2182 // The existing binop constant vector remains in the same operand position.
2183 ArrayRef<int> Mask = Shuf.getShuffleMask();
2184 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2186
2187 bool MightCreatePoisonOrUB =
2189 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2190 if (MightCreatePoisonOrUB)
2191 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2192
2193 // shuf (bop X, C), X, M --> bop X, C'
2194 // shuf X, (bop X, C), M --> bop X, C'
2195 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2196 NewBO->copyIRFlags(BO);
2197
2198 // An undef shuffle mask element may propagate as an undef constant element in
2199 // the new binop. That would produce poison where the original code might not.
2200 // If we already made a safe constant, then there's no danger.
2201 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2203 return NewBO;
2204}
2205
2206/// If we have an insert of a scalar to a non-zero element of an undefined
2207/// vector and then shuffle that value, that's the same as inserting to the zero
2208/// element and shuffling. Splatting from the zero element is recognized as the
2209/// canonical form of splat.
2211 InstCombiner::BuilderTy &Builder) {
2212 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2213 ArrayRef<int> Mask = Shuf.getShuffleMask();
2214 Value *X;
2215 uint64_t IndexC;
2216
2217 // Match a shuffle that is a splat to a non-zero element.
2219 m_ConstantInt(IndexC)))) ||
2220 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2221 return nullptr;
2222
2223 // Insert into element 0 of a poison vector.
2224 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2225 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2226
2227 // Splat from element 0. Any mask element that is poison remains poison.
2228 // For example:
2229 // shuf (inselt poison, X, 2), _, <2,2,undef>
2230 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2231 unsigned NumMaskElts =
2232 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2233 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2234 for (unsigned i = 0; i != NumMaskElts; ++i)
2235 if (Mask[i] == PoisonMaskElem)
2236 NewMask[i] = Mask[i];
2237
2238 return new ShuffleVectorInst(NewIns, NewMask);
2239}
2240
2241/// Try to fold shuffles that are the equivalent of a vector select.
2243 if (!Shuf.isSelect())
2244 return nullptr;
2245
2246 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2247 // Commuting undef to operand 0 conflicts with another canonicalization.
2248 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2249 if (!match(Shuf.getOperand(1), m_Undef()) &&
2250 Shuf.getMaskValue(0) >= (int)NumElts) {
2251 // TODO: Can we assert that both operands of a shuffle-select are not undef
2252 // (otherwise, it would have been folded by instsimplify?
2253 Shuf.commute();
2254 return &Shuf;
2255 }
2256
2258 return I;
2259
2261 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2262 return I;
2263
2264 BinaryOperator *B0, *B1;
2265 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2266 !match(Shuf.getOperand(1), m_BinOp(B1)))
2267 return nullptr;
2268
2269 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2270 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2271 // with a multiply, we will exit because C0/C1 will not be set.
2272 Value *X, *Y;
2273 Constant *C0 = nullptr, *C1 = nullptr;
2274 bool ConstantsAreOp1;
2275 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2276 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2277 ConstantsAreOp1 = false;
2278 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2279 m_Neg(m_Value(X)))) &&
2281 m_Neg(m_Value(Y)))))
2282 ConstantsAreOp1 = true;
2283 else
2284 return nullptr;
2285
2286 // We need matching binops to fold the lanes together.
2289 bool DropNSW = false;
2290 if (ConstantsAreOp1 && Opc0 != Opc1) {
2291 // TODO: We drop "nsw" if shift is converted into multiply because it may
2292 // not be correct when the shift amount is BitWidth - 1. We could examine
2293 // each vector element to determine if it is safe to keep that flag.
2294 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2295 DropNSW = true;
2296 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2297 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2298 Opc0 = AltB0.Opcode;
2299 C0 = cast<Constant>(AltB0.Op1);
2300 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2301 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2302 Opc1 = AltB1.Opcode;
2303 C1 = cast<Constant>(AltB1.Op1);
2304 }
2305 }
2306
2307 if (Opc0 != Opc1 || !C0 || !C1)
2308 return nullptr;
2309
2310 // The opcodes must be the same. Use a new name to make that clear.
2311 BinaryOperator::BinaryOps BOpc = Opc0;
2312
2313 // Select the constant elements needed for the single binop.
2314 ArrayRef<int> Mask = Shuf.getShuffleMask();
2315 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2316
2317 // We are moving a binop after a shuffle. When a shuffle has an undefined
2318 // mask element, the result is undefined, but it is not poison or undefined
2319 // behavior. That is not necessarily true for div/rem/shift.
2320 bool MightCreatePoisonOrUB =
2323 if (MightCreatePoisonOrUB)
2325 ConstantsAreOp1);
2326
2327 Value *V;
2328 if (X == Y) {
2329 // Remove a binop and the shuffle by rearranging the constant:
2330 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2331 // shuffle (op C0, V), (op C1, V), M --> op C', V
2332 V = X;
2333 } else {
2334 // If there are 2 different variable operands, we must create a new shuffle
2335 // (select) first, so check uses to ensure that we don't end up with more
2336 // instructions than we started with.
2337 if (!B0->hasOneUse() && !B1->hasOneUse())
2338 return nullptr;
2339
2340 // If we use the original shuffle mask and op1 is *variable*, we would be
2341 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2342 // poison. We do not have to guard against UB when *constants* are op1
2343 // because safe constants guarantee that we do not overflow sdiv/srem (and
2344 // there's no danger for other opcodes).
2345 // TODO: To allow this case, create a new shuffle mask with no undefs.
2346 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2347 return nullptr;
2348
2349 // Note: In general, we do not create new shuffles in InstCombine because we
2350 // do not know if a target can lower an arbitrary shuffle optimally. In this
2351 // case, the shuffle uses the existing mask, so there is no additional risk.
2352
2353 // Select the variable vectors first, then perform the binop:
2354 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2355 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2356 V = Builder.CreateShuffleVector(X, Y, Mask);
2357 }
2358
2359 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2360 Builder.CreateBinOp(BOpc, NewC, V);
2361
2362 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2363 // 1. If we changed an opcode, poison conditions might have changed.
2364 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2365 // where the original code did not. But if we already made a safe constant,
2366 // then there's no danger.
2367 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2368 NewI->copyIRFlags(B0);
2369 NewI->andIRFlags(B1);
2370 if (DropNSW)
2371 NewI->setHasNoSignedWrap(false);
2372 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2373 NewI->dropPoisonGeneratingFlags();
2374 }
2375 return replaceInstUsesWith(Shuf, NewBO);
2376}
2377
2378/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2379/// Example (little endian):
2380/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2382 bool IsBigEndian) {
2383 // This must be a bitcasted shuffle of 1 vector integer operand.
2384 Type *DestType = Shuf.getType();
2385 Value *X;
2386 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2387 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2388 return nullptr;
2389
2390 // The source type must have the same number of elements as the shuffle,
2391 // and the source element type must be larger than the shuffle element type.
2392 Type *SrcType = X->getType();
2393 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2394 cast<FixedVectorType>(SrcType)->getNumElements() !=
2395 cast<FixedVectorType>(DestType)->getNumElements() ||
2396 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2397 return nullptr;
2398
2399 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2400 "Expected a shuffle that decreases length");
2401
2402 // Last, check that the mask chooses the correct low bits for each narrow
2403 // element in the result.
2404 uint64_t TruncRatio =
2405 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2406 ArrayRef<int> Mask = Shuf.getShuffleMask();
2407 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2408 if (Mask[i] == PoisonMaskElem)
2409 continue;
2410 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2411 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2412 if (Mask[i] != (int)LSBIndex)
2413 return nullptr;
2414 }
2415
2416 return new TruncInst(X, DestType);
2417}
2418
2419/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2420/// narrowing (concatenating with poison and extracting back to the original
2421/// length). This allows replacing the wide select with a narrow select.
2423 InstCombiner::BuilderTy &Builder) {
2424 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2425 // of the 1st vector operand of a shuffle.
2426 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2427 return nullptr;
2428
2429 // The vector being shuffled must be a vector select that we can eliminate.
2430 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2431 Value *Cond, *X, *Y;
2432 if (!match(Shuf.getOperand(0),
2434 return nullptr;
2435
2436 // We need a narrow condition value. It must be extended with poison elements
2437 // and have the same number of elements as this shuffle.
2438 unsigned NarrowNumElts =
2439 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2440 Value *NarrowCond;
2441 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2442 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2443 NarrowNumElts ||
2444 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2445 return nullptr;
2446
2447 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2448 // -->
2449 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2450 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2451 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2452 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2453}
2454
2455/// Canonicalize FP negate/abs after shuffle.
2457 InstCombiner::BuilderTy &Builder) {
2458 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2459 Value *X;
2460 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2461 return nullptr;
2462
2463 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2464
2465 // Match 1-input (unary) shuffle.
2466 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2467 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
2468 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2469 if (IsFNeg)
2470 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2471
2473 Intrinsic::fabs, Shuf.getType());
2474 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2475 NewF->setFastMathFlags(S0->getFastMathFlags());
2476 return NewF;
2477 }
2478
2479 // Match 2-input (binary) shuffle.
2480 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2481 Value *Y;
2482 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2483 S0->getOpcode() != S1->getOpcode() ||
2484 (!S0->hasOneUse() && !S1->hasOneUse()))
2485 return nullptr;
2486
2487 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2488 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2489 Instruction *NewF;
2490 if (IsFNeg) {
2491 NewF = UnaryOperator::CreateFNeg(NewShuf);
2492 } else {
2494 Intrinsic::fabs, Shuf.getType());
2495 NewF = CallInst::Create(FAbs, {NewShuf});
2496 }
2497 NewF->copyIRFlags(S0);
2498 NewF->andIRFlags(S1);
2499 return NewF;
2500}
2501
2502/// Canonicalize casts after shuffle.
2504 InstCombiner::BuilderTy &Builder) {
2505 // Do we have 2 matching cast operands?
2506 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2507 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2508 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2509 Cast0->getSrcTy() != Cast1->getSrcTy())
2510 return nullptr;
2511
2512 // TODO: Allow other opcodes? That would require easing the type restrictions
2513 // below here.
2514 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2515 switch (CastOpcode) {
2516 case Instruction::FPToSI:
2517 case Instruction::FPToUI:
2518 case Instruction::SIToFP:
2519 case Instruction::UIToFP:
2520 break;
2521 default:
2522 return nullptr;
2523 }
2524
2525 VectorType *ShufTy = Shuf.getType();
2526 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2527 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2528
2529 // TODO: Allow length-increasing shuffles?
2530 if (ShufTy->getElementCount().getKnownMinValue() >
2531 ShufOpTy->getElementCount().getKnownMinValue())
2532 return nullptr;
2533
2534 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2535 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2536 "Expected fixed vector operands for casts and binary shuffle");
2537 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2538 return nullptr;
2539
2540 // At least one of the operands must have only one use (the shuffle).
2541 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2542 return nullptr;
2543
2544 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2545 Value *X = Cast0->getOperand(0);
2546 Value *Y = Cast1->getOperand(0);
2547 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2548 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2549}
2550
2551/// Try to fold an extract subvector operation.
2553 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2554 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2555 return nullptr;
2556
2557 // Check if we are extracting all bits of an inserted scalar:
2558 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2559 Value *X;
2560 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2561 X->getType()->getPrimitiveSizeInBits() ==
2563 return new BitCastInst(X, Shuf.getType());
2564
2565 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2566 Value *Y;
2567 ArrayRef<int> Mask;
2568 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2569 return nullptr;
2570
2571 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2572 // then combining may result in worse codegen.
2573 if (!Op0->hasOneUse())
2574 return nullptr;
2575
2576 // We are extracting a subvector from a shuffle. Remove excess elements from
2577 // the 1st shuffle mask to eliminate the extract.
2578 //
2579 // This transform is conservatively limited to identity extracts because we do
2580 // not allow arbitrary shuffle mask creation as a target-independent transform
2581 // (because we can't guarantee that will lower efficiently).
2582 //
2583 // If the extracting shuffle has an poison mask element, it transfers to the
2584 // new shuffle mask. Otherwise, copy the original mask element. Example:
2585 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2586 // shuf X, Y, <C0, poison, C2, poison>
2587 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2588 SmallVector<int, 16> NewMask(NumElts);
2589 assert(NumElts < Mask.size() &&
2590 "Identity with extract must have less elements than its inputs");
2591
2592 for (unsigned i = 0; i != NumElts; ++i) {
2593 int ExtractMaskElt = Shuf.getMaskValue(i);
2594 int MaskElt = Mask[i];
2595 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2596 }
2597 return new ShuffleVectorInst(X, Y, NewMask);
2598}
2599
2600/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2601/// operand with the operand of an insertelement.
2603 InstCombinerImpl &IC) {
2604 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2606 Shuf.getShuffleMask(Mask);
2607
2608 int NumElts = Mask.size();
2609 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2610
2611 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2612 // not be able to handle it there if the insertelement has >1 use.
2613 // If the shuffle has an insertelement operand but does not choose the
2614 // inserted scalar element from that value, then we can replace that shuffle
2615 // operand with the source vector of the insertelement.
2616 Value *X;
2617 uint64_t IdxC;
2618 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2619 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2620 if (!is_contained(Mask, (int)IdxC))
2621 return IC.replaceOperand(Shuf, 0, X);
2622 }
2623 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2624 // Offset the index constant by the vector width because we are checking for
2625 // accesses to the 2nd vector input of the shuffle.
2626 IdxC += InpNumElts;
2627 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2628 if (!is_contained(Mask, (int)IdxC))
2629 return IC.replaceOperand(Shuf, 1, X);
2630 }
2631 // For the rest of the transform, the shuffle must not change vector sizes.
2632 // TODO: This restriction could be removed if the insert has only one use
2633 // (because the transform would require a new length-changing shuffle).
2634 if (NumElts != InpNumElts)
2635 return nullptr;
2636
2637 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2638 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2639 // We need an insertelement with a constant index.
2640 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2641 m_ConstantInt(IndexC))))
2642 return false;
2643
2644 // Test the shuffle mask to see if it splices the inserted scalar into the
2645 // operand 1 vector of the shuffle.
2646 int NewInsIndex = -1;
2647 for (int i = 0; i != NumElts; ++i) {
2648 // Ignore undef mask elements.
2649 if (Mask[i] == -1)
2650 continue;
2651
2652 // The shuffle takes elements of operand 1 without lane changes.
2653 if (Mask[i] == NumElts + i)
2654 continue;
2655
2656 // The shuffle must choose the inserted scalar exactly once.
2657 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2658 return false;
2659
2660 // The shuffle is placing the inserted scalar into element i.
2661 NewInsIndex = i;
2662 }
2663
2664 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2665
2666 // Index is updated to the potentially translated insertion lane.
2667 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2668 return true;
2669 };
2670
2671 // If the shuffle is unnecessary, insert the scalar operand directly into
2672 // operand 1 of the shuffle. Example:
2673 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2674 Value *Scalar;
2675 ConstantInt *IndexC;
2676 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2677 return InsertElementInst::Create(V1, Scalar, IndexC);
2678
2679 // Try again after commuting shuffle. Example:
2680 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2681 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2682 std::swap(V0, V1);
2684 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2685 return InsertElementInst::Create(V1, Scalar, IndexC);
2686
2687 return nullptr;
2688}
2689
2691 // Match the operands as identity with padding (also known as concatenation
2692 // with undef) shuffles of the same source type. The backend is expected to
2693 // recreate these concatenations from a shuffle of narrow operands.
2694 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2695 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2696 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2697 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2698 return nullptr;
2699
2700 // We limit this transform to power-of-2 types because we expect that the
2701 // backend can convert the simplified IR patterns to identical nodes as the
2702 // original IR.
2703 // TODO: If we can verify the same behavior for arbitrary types, the
2704 // power-of-2 checks can be removed.
2705 Value *X = Shuffle0->getOperand(0);
2706 Value *Y = Shuffle1->getOperand(0);
2707 if (X->getType() != Y->getType() ||
2708 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2710 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2711 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2712 match(X, m_Undef()) || match(Y, m_Undef()))
2713 return nullptr;
2714 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2715 match(Shuffle1->getOperand(1), m_Undef()) &&
2716 "Unexpected operand for identity shuffle");
2717
2718 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2719 // operands directly by adjusting the shuffle mask to account for the narrower
2720 // types:
2721 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2722 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2723 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2724 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2725
2726 ArrayRef<int> Mask = Shuf.getShuffleMask();
2727 SmallVector<int, 16> NewMask(Mask.size(), -1);
2728 for (int i = 0, e = Mask.size(); i != e; ++i) {
2729 if (Mask[i] == -1)
2730 continue;
2731
2732 // If this shuffle is choosing an undef element from 1 of the sources, that
2733 // element is undef.
2734 if (Mask[i] < WideElts) {
2735 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2736 continue;
2737 } else {
2738 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2739 continue;
2740 }
2741
2742 // If this shuffle is choosing from the 1st narrow op, the mask element is
2743 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2744 // element is offset down to adjust for the narrow vector widths.
2745 if (Mask[i] < WideElts) {
2746 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2747 NewMask[i] = Mask[i];
2748 } else {
2749 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2750 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2751 }
2752 }
2753 return new ShuffleVectorInst(X, Y, NewMask);
2754}
2755
2756// Splatting the first element of the result of a BinOp, where any of the
2757// BinOp's operands are the result of a first element splat can be simplified to
2758// splatting the first element of the result of the BinOp
2760 if (!match(SVI.getOperand(1), m_Poison()) ||
2761 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2762 !SVI.getOperand(0)->hasOneUse())
2763 return nullptr;
2764
2765 Value *Op0 = SVI.getOperand(0);
2766 Value *X, *Y;
2768 m_Value(Y))) &&
2769 !match(Op0, m_BinOp(m_Value(X),
2771 return nullptr;
2772 if (X->getType() != Y->getType())
2773 return nullptr;
2774
2775 auto *BinOp = cast<BinaryOperator>(Op0);
2776 if (!isSafeToSpeculativelyExecute(BinOp))
2777 return nullptr;
2778
2779 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2780 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2781 NewBOI->copyIRFlags(BinOp);
2782
2783 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2784}
2785
2787 Value *LHS = SVI.getOperand(0);
2788 Value *RHS = SVI.getOperand(1);
2789 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2790 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2791 SVI.getType(), ShufQuery))
2792 return replaceInstUsesWith(SVI, V);
2793
2794 if (Instruction *I = simplifyBinOpSplats(SVI))
2795 return I;
2796
2797 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2798 // order to support scalable vectors.
2799 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2800 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2801
2802 if (isa<ScalableVectorType>(LHS->getType()))
2803 return nullptr;
2804
2805 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2806 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2807
2808 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2809 //
2810 // if X and Y are of the same (vector) type, and the element size is not
2811 // changed by the bitcasts, we can distribute the bitcasts through the
2812 // shuffle, hopefully reducing the number of instructions. We make sure that
2813 // at least one bitcast only has one use, so we don't *increase* the number of
2814 // instructions here.
2815 Value *X, *Y;
2817 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2818 X->getType()->getScalarSizeInBits() ==
2819 SVI.getType()->getScalarSizeInBits() &&
2820 (LHS->hasOneUse() || RHS->hasOneUse())) {
2822 SVI.getName() + ".uncasted");
2823 return new BitCastInst(V, SVI.getType());
2824 }
2825
2826 ArrayRef<int> Mask = SVI.getShuffleMask();
2827
2828 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2829 // simulated shuffle can simplify, then this shuffle is unnecessary:
2830 // shuf (bitcast X), undef, Mask --> bitcast X'
2831 // TODO: This could be extended to allow length-changing shuffles.
2832 // The transform might also be obsoleted if we allowed canonicalization
2833 // of bitcasted shuffles.
2834 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2835 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2836 // Try to create a scaled mask constant.
2837 auto *XType = cast<FixedVectorType>(X->getType());
2838 unsigned XNumElts = XType->getNumElements();
2839 SmallVector<int, 16> ScaledMask;
2840 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2841 // If the shuffled source vector simplifies, cast that value to this
2842 // shuffle's type.
2843 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2844 ScaledMask, XType, ShufQuery))
2845 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2846 }
2847 }
2848
2849 // shuffle x, x, mask --> shuffle x, undef, mask'
2850 if (LHS == RHS) {
2851 assert(!match(RHS, m_Undef()) &&
2852 "Shuffle with 2 undef ops not simplified?");
2853 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2854 }
2855
2856 // shuffle undef, x, mask --> shuffle x, undef, mask'
2857 if (match(LHS, m_Undef())) {
2858 SVI.commute();
2859 return &SVI;
2860 }
2861
2863 return I;
2864
2865 if (Instruction *I = foldSelectShuffle(SVI))
2866 return I;
2867
2869 return I;
2870
2872 return I;
2873
2875 return I;
2876
2877 if (Instruction *I = foldCastShuffle(SVI, Builder))
2878 return I;
2879
2880 APInt PoisonElts(VWidth, 0);
2881 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2882 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2883 if (V != &SVI)
2884 return replaceInstUsesWith(SVI, V);
2885 return &SVI;
2886 }
2887
2889 return I;
2890
2891 // These transforms have the potential to lose undef knowledge, so they are
2892 // intentionally placed after SimplifyDemandedVectorElts().
2893 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2894 return I;
2896 return I;
2897
2898 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
2900 return replaceInstUsesWith(SVI, V);
2901 }
2902
2903 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2904 // a non-vector type. We can instead bitcast the original vector followed by
2905 // an extract of the desired element:
2906 //
2907 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2908 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2909 // %1 = bitcast <4 x i8> %sroa to i32
2910 // Becomes:
2911 // %bc = bitcast <16 x i8> %in to <4 x i32>
2912 // %ext = extractelement <4 x i32> %bc, i32 0
2913 //
2914 // If the shuffle is extracting a contiguous range of values from the input
2915 // vector then each use which is a bitcast of the extracted size can be
2916 // replaced. This will work if the vector types are compatible, and the begin
2917 // index is aligned to a value in the casted vector type. If the begin index
2918 // isn't aligned then we can shuffle the original vector (keeping the same
2919 // vector type) before extracting.
2920 //
2921 // This code will bail out if the target type is fundamentally incompatible
2922 // with vectors of the source type.
2923 //
2924 // Example of <16 x i8>, target type i32:
2925 // Index range [4,8): v-----------v Will work.
2926 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2927 // <16 x i8>: | | | | | | | | | | | | | | | | |
2928 // <4 x i32>: | | | | |
2929 // +-----------+-----------+-----------+-----------+
2930 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2931 // Target type i40: ^--------------^ Won't work, bail.
2932 bool MadeChange = false;
2933 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2934 Value *V = LHS;
2935 unsigned MaskElems = Mask.size();
2936 auto *SrcTy = cast<FixedVectorType>(V->getType());
2937 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2938 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2939 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2940 unsigned SrcNumElems = SrcTy->getNumElements();
2943 for (User *U : SVI.users())
2944 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2945 if (!BC->use_empty())
2946 // Only visit bitcasts that weren't previously handled.
2947 BCs.push_back(BC);
2948 for (BitCastInst *BC : BCs) {
2949 unsigned BegIdx = Mask.front();
2950 Type *TgtTy = BC->getDestTy();
2951 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2952 if (!TgtElemBitWidth)
2953 continue;
2954 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2955 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2956 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2957 if (!VecBitWidthsEqual)
2958 continue;
2960 continue;
2961 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2962 if (!BegIsAligned) {
2963 // Shuffle the input so [0,NumElements) contains the output, and
2964 // [NumElems,SrcNumElems) is undef.
2965 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2966 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2967 ShuffleMask[I] = Idx;
2968 V = Builder.CreateShuffleVector(V, ShuffleMask,
2969 SVI.getName() + ".extract");
2970 BegIdx = 0;
2971 }
2972 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2973 assert(SrcElemsPerTgtElem);
2974 BegIdx /= SrcElemsPerTgtElem;
2975 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2976 auto *NewBC =
2977 BCAlreadyExists
2978 ? NewBCs[CastSrcTy]
2979 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2980 if (!BCAlreadyExists)
2981 NewBCs[CastSrcTy] = NewBC;
2982 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
2983 SVI.getName() + ".extract");
2984 // The shufflevector isn't being replaced: the bitcast that used it
2985 // is. InstCombine will visit the newly-created instructions.
2986 replaceInstUsesWith(*BC, Ext);
2987 MadeChange = true;
2988 }
2989 }
2990
2991 // If the LHS is a shufflevector itself, see if we can combine it with this
2992 // one without producing an unusual shuffle.
2993 // Cases that might be simplified:
2994 // 1.
2995 // x1=shuffle(v1,v2,mask1)
2996 // x=shuffle(x1,undef,mask)
2997 // ==>
2998 // x=shuffle(v1,undef,newMask)
2999 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3000 // 2.
3001 // x1=shuffle(v1,undef,mask1)
3002 // x=shuffle(x1,x2,mask)
3003 // where v1.size() == mask1.size()
3004 // ==>
3005 // x=shuffle(v1,x2,newMask)
3006 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3007 // 3.
3008 // x2=shuffle(v2,undef,mask2)
3009 // x=shuffle(x1,x2,mask)
3010 // where v2.size() == mask2.size()
3011 // ==>
3012 // x=shuffle(x1,v2,newMask)
3013 // newMask[i] = (mask[i] < x1.size())
3014 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3015 // 4.
3016 // x1=shuffle(v1,undef,mask1)
3017 // x2=shuffle(v2,undef,mask2)
3018 // x=shuffle(x1,x2,mask)
3019 // where v1.size() == v2.size()
3020 // ==>
3021 // x=shuffle(v1,v2,newMask)
3022 // newMask[i] = (mask[i] < x1.size())
3023 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3024 //
3025 // Here we are really conservative:
3026 // we are absolutely afraid of producing a shuffle mask not in the input
3027 // program, because the code gen may not be smart enough to turn a merged
3028 // shuffle into two specific shuffles: it may produce worse code. As such,
3029 // we only merge two shuffles if the result is either a splat or one of the
3030 // input shuffle masks. In this case, merging the shuffles just removes
3031 // one instruction, which we know is safe. This is good for things like
3032 // turning: (splat(splat)) -> splat, or
3033 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3034 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3035 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3036 if (LHSShuffle)
3037 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3038 !match(RHS, m_Poison()))
3039 LHSShuffle = nullptr;
3040 if (RHSShuffle)
3041 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3042 RHSShuffle = nullptr;
3043 if (!LHSShuffle && !RHSShuffle)
3044 return MadeChange ? &SVI : nullptr;
3045
3046 Value* LHSOp0 = nullptr;
3047 Value* LHSOp1 = nullptr;
3048 Value* RHSOp0 = nullptr;
3049 unsigned LHSOp0Width = 0;
3050 unsigned RHSOp0Width = 0;
3051 if (LHSShuffle) {
3052 LHSOp0 = LHSShuffle->getOperand(0);
3053 LHSOp1 = LHSShuffle->getOperand(1);
3054 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3055 }
3056 if (RHSShuffle) {
3057 RHSOp0 = RHSShuffle->getOperand(0);
3058 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3059 }
3060 Value* newLHS = LHS;
3061 Value* newRHS = RHS;
3062 if (LHSShuffle) {
3063 // case 1
3064 if (match(RHS, m_Poison())) {
3065 newLHS = LHSOp0;
3066 newRHS = LHSOp1;
3067 }
3068 // case 2 or 4
3069 else if (LHSOp0Width == LHSWidth) {
3070 newLHS = LHSOp0;
3071 }
3072 }
3073 // case 3 or 4
3074 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3075 newRHS = RHSOp0;
3076 }
3077 // case 4
3078 if (LHSOp0 == RHSOp0) {
3079 newLHS = LHSOp0;
3080 newRHS = nullptr;
3081 }
3082
3083 if (newLHS == LHS && newRHS == RHS)
3084 return MadeChange ? &SVI : nullptr;
3085
3086 ArrayRef<int> LHSMask;
3087 ArrayRef<int> RHSMask;
3088 if (newLHS != LHS)
3089 LHSMask = LHSShuffle->getShuffleMask();
3090 if (RHSShuffle && newRHS != RHS)
3091 RHSMask = RHSShuffle->getShuffleMask();
3092
3093 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3094 SmallVector<int, 16> newMask;
3095 bool isSplat = true;
3096 int SplatElt = -1;
3097 // Create a new mask for the new ShuffleVectorInst so that the new
3098 // ShuffleVectorInst is equivalent to the original one.
3099 for (unsigned i = 0; i < VWidth; ++i) {
3100 int eltMask;
3101 if (Mask[i] < 0) {
3102 // This element is a poison value.
3103 eltMask = -1;
3104 } else if (Mask[i] < (int)LHSWidth) {
3105 // This element is from left hand side vector operand.
3106 //
3107 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3108 // new mask value for the element.
3109 if (newLHS != LHS) {
3110 eltMask = LHSMask[Mask[i]];
3111 // If the value selected is an poison value, explicitly specify it
3112 // with a -1 mask value.
3113 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3114 eltMask = -1;
3115 } else
3116 eltMask = Mask[i];
3117 } else {
3118 // This element is from right hand side vector operand
3119 //
3120 // If the value selected is a poison value, explicitly specify it
3121 // with a -1 mask value. (case 1)
3122 if (match(RHS, m_Poison()))
3123 eltMask = -1;
3124 // If RHS is going to be replaced (case 3 or 4), calculate the
3125 // new mask value for the element.
3126 else if (newRHS != RHS) {
3127 eltMask = RHSMask[Mask[i]-LHSWidth];
3128 // If the value selected is an poison value, explicitly specify it
3129 // with a -1 mask value.
3130 if (eltMask >= (int)RHSOp0Width) {
3131 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3132 "should have been check above");
3133 eltMask = -1;
3134 }
3135 } else
3136 eltMask = Mask[i]-LHSWidth;
3137
3138 // If LHS's width is changed, shift the mask value accordingly.
3139 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3140 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3141 // If newRHS == newLHS, we want to remap any references from newRHS to
3142 // newLHS so that we can properly identify splats that may occur due to
3143 // obfuscation across the two vectors.
3144 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3145 eltMask += newLHSWidth;
3146 }
3147
3148 // Check if this could still be a splat.
3149 if (eltMask >= 0) {
3150 if (SplatElt >= 0 && SplatElt != eltMask)
3151 isSplat = false;
3152 SplatElt = eltMask;
3153 }
3154
3155 newMask.push_back(eltMask);
3156 }
3157
3158 // If the result mask is equal to one of the original shuffle masks,
3159 // or is a splat, do the replacement.
3160 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3161 if (!newRHS)
3162 newRHS = PoisonValue::get(newLHS->getType());
3163 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3164 }
3165
3166 return MadeChange ? &SVI : nullptr;
3167}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:77
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1090
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:414
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:209
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:167
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:246
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:842
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2523
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2615
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:256
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:149
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:914
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:937
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2359
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2470
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2458
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1435
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1864
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:489
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2125
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2492
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2005
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1664
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2159
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:178
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2349
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:76
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
const DataLayout & DL
Definition: InstCombiner.h:75
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:284
BuilderTy & Builder
Definition: InstCombiner.h:60
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:342
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:281
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:280
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1433
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1814
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:156
UnaryOps getOpcode() const
Definition: InstrTypes.h:171
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1795
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1066
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1484
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:160
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96