Bug Summary

File:llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Warning:line 196, column 39
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InstCombineVectorOps.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Transforms/InstCombine -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-11-181444-25759-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/InstructionSimplify.h"
21#include "llvm/Analysis/VectorUtils.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/Constant.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Operator.h"
30#include "llvm/IR/PatternMatch.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/User.h"
33#include "llvm/IR/Value.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40#include <utility>
41
42using namespace llvm;
43using namespace PatternMatch;
44
45#define DEBUG_TYPE"instcombine" "instcombine"
46
47/// Return true if the value is cheaper to scalarize than it is to leave as a
48/// vector operation. IsConstantExtractIndex indicates whether we are extracting
49/// one known element from a vector constant.
50///
51/// FIXME: It's possible to create more instructions than previously existed.
52static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53 // If we can pick a scalar constant value out of a vector, that is free.
54 if (auto *C = dyn_cast<Constant>(V))
55 return IsConstantExtractIndex || C->getSplatValue();
56
57 // An insertelement to the same constant index as our extract will simplify
58 // to the scalar inserted element. An insertelement to a different constant
59 // index is irrelevant to our extract.
60 if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
61 return IsConstantExtractIndex;
62
63 if (match(V, m_OneUse(m_Load(m_Value()))))
64 return true;
65
66 Value *V0, *V1;
67 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69 cheapToScalarize(V1, IsConstantExtractIndex))
70 return true;
71
72 CmpInst::Predicate UnusedPred;
73 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75 cheapToScalarize(V1, IsConstantExtractIndex))
76 return true;
77
78 return false;
79}
80
81// If we have a PHI node with a vector type that is only used to feed
82// itself and be an operand of extractelement at a constant location,
83// try to replace the PHI of the vector type with a PHI of a scalar type.
84Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
85 SmallVector<Instruction *, 2> Extracts;
86 // The users we want the PHI to have are:
87 // 1) The EI ExtractElement (we already know this)
88 // 2) Possibly more ExtractElements with the same index.
89 // 3) Another operand, which will feed back into the PHI.
90 Instruction *PHIUser = nullptr;
91 for (auto U : PN->users()) {
92 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93 if (EI.getIndexOperand() == EU->getIndexOperand())
94 Extracts.push_back(EU);
95 else
96 return nullptr;
97 } else if (!PHIUser) {
98 PHIUser = cast<Instruction>(U);
99 } else {
100 return nullptr;
101 }
102 }
103
104 if (!PHIUser)
105 return nullptr;
106
107 // Verify that this PHI user has one use, which is the PHI itself,
108 // and that it is a binary operation which is cheap to scalarize.
109 // otherwise return nullptr.
110 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
111 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
112 return nullptr;
113
114 // Create a scalar PHI node that will replace the vector PHI node
115 // just before the current PHI node.
116 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118 // Scalarize each PHI operand.
119 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
120 Value *PHIInVal = PN->getIncomingValue(i);
121 BasicBlock *inBB = PN->getIncomingBlock(i);
122 Value *Elt = EI.getIndexOperand();
123 // If the operand is the PHI induction variable:
124 if (PHIInVal == PHIUser) {
125 // Scalarize the binary operation. Its first operand is the
126 // scalar PHI, and the second operand is extracted from the other
127 // vector operand.
128 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
130 Value *Op = InsertNewInstWith(
131 ExtractElementInst::Create(B0->getOperand(opId), Elt,
132 B0->getOperand(opId)->getName() + ".Elt"),
133 *B0);
134 Value *newPHIUser = InsertNewInstWith(
135 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
136 scalarPHI, Op, B0), *B0);
137 scalarPHI->addIncoming(newPHIUser, inBB);
138 } else {
139 // Scalarize PHI input:
140 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141 // Insert the new instruction into the predecessor basic block.
142 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143 BasicBlock::iterator InsertPos;
144 if (pos && !isa<PHINode>(pos)) {
145 InsertPos = ++pos->getIterator();
146 } else {
147 InsertPos = inBB->getFirstInsertionPt();
148 }
149
150 InsertNewInstWith(newEI, *InsertPos);
151
152 scalarPHI->addIncoming(newEI, inBB);
153 }
154 }
155
156 for (auto E : Extracts)
157 replaceInstUsesWith(*E, scalarPHI);
158
159 return &EI;
160}
161
162static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
163 InstCombiner::BuilderTy &Builder,
164 bool IsBigEndian) {
165 Value *X;
166 uint64_t ExtIndexC;
167 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
9
Taking false branch
168 !X->getType()->isVectorTy() ||
169 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
170 return nullptr;
171
172 // If this extractelement is using a bitcast from a vector of the same number
173 // of elements, see if we can find the source element from the source vector:
174 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175 Type *SrcTy = X->getType();
176 Type *DestTy = Ext.getType();
177 unsigned NumSrcElts = SrcTy->getVectorNumElements();
10
'NumSrcElts' initialized here
178 unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179 if (NumSrcElts == NumElts)
11
Assuming 'NumSrcElts' is not equal to 'NumElts'
12
Taking false branch
180 if (Value *Elt = findScalarElement(X, ExtIndexC))
181 return new BitCastInst(Elt, DestTy);
182
183 // If the source elements are wider than the destination, try to shift and
184 // truncate a subset of scalar bits of an insert op.
185 if (NumSrcElts < NumElts) {
13
Assuming 'NumSrcElts' is < 'NumElts'
14
Taking true branch
186 Value *Scalar;
187 uint64_t InsIndexC;
188 if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
15
Calling 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 63>>'
23
Returning from 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 63>>'
24
Taking false branch
189 m_ConstantInt(InsIndexC))))
190 return nullptr;
191
192 // The extract must be from the subset of vector elements that we inserted
193 // into. Example: if we inserted element 1 of a <2 x i64> and we are
194 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195 // of elements 4-7 of the bitcasted vector.
196 unsigned NarrowingRatio = NumElts / NumSrcElts;
25
Division by zero
197 if (ExtIndexC / NarrowingRatio != InsIndexC)
198 return nullptr;
199
200 // We are extracting part of the original scalar. How that scalar is
201 // inserted into the vector depends on the endian-ness. Example:
202 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
203 // +--+--+--+--+--+--+--+--+
204 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
205 // extelt <4 x i16> V', 3: | |S2|S3|
206 // +--+--+--+--+--+--+--+--+
207 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209 // In this example, we must right-shift little-endian. Big-endian is just a
210 // truncate.
211 unsigned Chunk = ExtIndexC % NarrowingRatio;
212 if (IsBigEndian)
213 Chunk = NarrowingRatio - 1 - Chunk;
214
215 // Bail out if this is an FP vector to FP vector sequence. That would take
216 // more instructions than we started with unless there is no shift, and it
217 // may not be handled as well in the backend.
218 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219 bool NeedDestBitcast = DestTy->isFloatingPointTy();
220 if (NeedSrcBitcast && NeedDestBitcast)
221 return nullptr;
222
223 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225 unsigned ShAmt = Chunk * DestWidth;
226
227 // TODO: This limitation is more strict than necessary. We could sum the
228 // number of new instructions and subtract the number eliminated to know if
229 // we can proceed.
230 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
231 if (NeedSrcBitcast || NeedDestBitcast)
232 return nullptr;
233
234 if (NeedSrcBitcast) {
235 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237 }
238
239 if (ShAmt) {
240 // Bail out if we could end with more instructions than we started with.
241 if (!Ext.getVectorOperand()->hasOneUse())
242 return nullptr;
243 Scalar = Builder.CreateLShr(Scalar, ShAmt);
244 }
245
246 if (NeedDestBitcast) {
247 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249 }
250 return new TruncInst(Scalar, DestTy);
251 }
252
253 return nullptr;
254}
255
256/// Find elements of V demanded by UserInstr.
257static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
258 unsigned VWidth = V->getType()->getVectorNumElements();
259
260 // Conservatively assume that all elements are needed.
261 APInt UsedElts(APInt::getAllOnesValue(VWidth));
262
263 switch (UserInstr->getOpcode()) {
264 case Instruction::ExtractElement: {
265 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
266 assert(EEI->getVectorOperand() == V)((EEI->getVectorOperand() == V) ? static_cast<void> (
0) : __assert_fail ("EEI->getVectorOperand() == V", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 266, __PRETTY_FUNCTION__))
;
267 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
268 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
269 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
270 }
271 break;
272 }
273 case Instruction::ShuffleVector: {
274 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
275 unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements();
276
277 UsedElts = APInt(VWidth, 0);
278 for (unsigned i = 0; i < MaskNumElts; i++) {
279 unsigned MaskVal = Shuffle->getMaskValue(i);
280 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
281 continue;
282 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
283 UsedElts.setBit(MaskVal);
284 if (Shuffle->getOperand(1) == V &&
285 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
286 UsedElts.setBit(MaskVal - VWidth);
287 }
288 break;
289 }
290 default:
291 break;
292 }
293 return UsedElts;
294}
295
296/// Find union of elements of V demanded by all its users.
297/// If it is known by querying findDemandedEltsBySingleUser that
298/// no user demands an element of V, then the corresponding bit
299/// remains unset in the returned value.
300static APInt findDemandedEltsByAllUsers(Value *V) {
301 unsigned VWidth = V->getType()->getVectorNumElements();
302
303 APInt UnionUsedElts(VWidth, 0);
304 for (const Use &U : V->uses()) {
305 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
306 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
307 } else {
308 UnionUsedElts = APInt::getAllOnesValue(VWidth);
309 break;
310 }
311
312 if (UnionUsedElts.isAllOnesValue())
313 break;
314 }
315
316 return UnionUsedElts;
317}
318
319Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
320 Value *SrcVec = EI.getVectorOperand();
321 Value *Index = EI.getIndexOperand();
322 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
1
Assuming 'V' is null
2
Taking false branch
323 SQ.getWithInstruction(&EI)))
324 return replaceInstUsesWith(EI, V);
325
326 // If extracting a specified index from the vector, see if we can recursively
327 // find a previously computed scalar that was inserted into the vector.
328 auto *IndexC = dyn_cast<ConstantInt>(Index);
3
Assuming 'Index' is a 'ConstantInt'
329 if (IndexC
3.1
'IndexC' is non-null
3.1
'IndexC' is non-null
) {
4
Taking true branch
330 unsigned NumElts = EI.getVectorOperandType()->getNumElements();
331
332 // InstSimplify should handle cases where the index is invalid.
333 if (!IndexC->getValue().ule(NumElts))
5
Taking false branch
334 return nullptr;
335
336 // This instruction only demands the single element from the input vector.
337 if (NumElts != 1) {
6
Assuming 'NumElts' is equal to 1
7
Taking false branch
338 // If the input vector has a single use, simplify it based on this use
339 // property.
340 if (SrcVec->hasOneUse()) {
341 APInt UndefElts(NumElts, 0);
342 APInt DemandedElts(NumElts, 0);
343 DemandedElts.setBit(IndexC->getZExtValue());
344 if (Value *V =
345 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) {
346 EI.setOperand(0, V);
347 return &EI;
348 }
349 } else {
350 // If the input vector has multiple uses, simplify it based on a union
351 // of all elements used.
352 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
353 if (!DemandedElts.isAllOnesValue()) {
354 APInt UndefElts(NumElts, 0);
355 if (Value *V = SimplifyDemandedVectorElts(
356 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
357 true /* AllowMultipleUsers */)) {
358 if (V != SrcVec) {
359 SrcVec->replaceAllUsesWith(V);
360 return &EI;
361 }
362 }
363 }
364 }
365 }
366 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
8
Calling 'foldBitcastExtElt'
367 return I;
368
369 // If there's a vector PHI feeding a scalar use through this extractelement
370 // instruction, try to scalarize the PHI.
371 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
372 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
373 return ScalarPHI;
374 }
375
376 BinaryOperator *BO;
377 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
378 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
379 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
380 Value *E0 = Builder.CreateExtractElement(X, Index);
381 Value *E1 = Builder.CreateExtractElement(Y, Index);
382 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
383 }
384
385 Value *X, *Y;
386 CmpInst::Predicate Pred;
387 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
388 cheapToScalarize(SrcVec, IndexC)) {
389 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
390 Value *E0 = Builder.CreateExtractElement(X, Index);
391 Value *E1 = Builder.CreateExtractElement(Y, Index);
392 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
393 }
394
395 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
396 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
397 // Extracting the inserted element?
398 if (IE->getOperand(2) == Index)
399 return replaceInstUsesWith(EI, IE->getOperand(1));
400 // If the inserted and extracted elements are constants, they must not
401 // be the same value, extract from the pre-inserted value instead.
402 if (isa<Constant>(IE->getOperand(2)) && IndexC) {
403 Worklist.AddValue(SrcVec);
404 EI.setOperand(0, IE->getOperand(0));
405 return &EI;
406 }
407 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
408 // If this is extracting an element from a shufflevector, figure out where
409 // it came from and extract from the appropriate input element instead.
410 if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
411 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
412 Value *Src;
413 unsigned LHSWidth =
414 SVI->getOperand(0)->getType()->getVectorNumElements();
415
416 if (SrcIdx < 0)
417 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
418 if (SrcIdx < (int)LHSWidth)
419 Src = SVI->getOperand(0);
420 else {
421 SrcIdx -= LHSWidth;
422 Src = SVI->getOperand(1);
423 }
424 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
425 return ExtractElementInst::Create(Src,
426 ConstantInt::get(Int32Ty,
427 SrcIdx, false));
428 }
429 } else if (auto *CI = dyn_cast<CastInst>(I)) {
430 // Canonicalize extractelement(cast) -> cast(extractelement).
431 // Bitcasts can change the number of vector elements, and they cost
432 // nothing.
433 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
434 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
435 Worklist.AddValue(EE);
436 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
437 }
438
439 // If the input is a bitcast from x86_mmx, turn into a single bitcast from
440 // the mmx type to the scalar type.
441 if (CI->getOpcode() == Instruction::BitCast &&
442 EI.getVectorOperandType()->getNumElements() == 1 &&
443 CI->getOperand(0)->getType()->isX86_MMXTy())
444 return new BitCastInst(CI->getOperand(0), EI.getType());
445 }
446 }
447 return nullptr;
448}
449
450/// If V is a shuffle of values that ONLY returns elements from either LHS or
451/// RHS, return the shuffle mask and true. Otherwise, return false.
452static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
453 SmallVectorImpl<Constant*> &Mask) {
454 assert(LHS->getType() == RHS->getType() &&((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"
) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 455, __PRETTY_FUNCTION__))
455 "Invalid CollectSingleShuffleElements")((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"
) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 455, __PRETTY_FUNCTION__))
;
456 unsigned NumElts = V->getType()->getVectorNumElements();
457
458 if (isa<UndefValue>(V)) {
459 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
460 return true;
461 }
462
463 if (V == LHS) {
464 for (unsigned i = 0; i != NumElts; ++i)
465 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
466 return true;
467 }
468
469 if (V == RHS) {
470 for (unsigned i = 0; i != NumElts; ++i)
471 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
472 i+NumElts));
473 return true;
474 }
475
476 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
477 // If this is an insert of an extract from some other vector, include it.
478 Value *VecOp = IEI->getOperand(0);
479 Value *ScalarOp = IEI->getOperand(1);
480 Value *IdxOp = IEI->getOperand(2);
481
482 if (!isa<ConstantInt>(IdxOp))
483 return false;
484 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
485
486 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
487 // We can handle this if the vector we are inserting into is
488 // transitively ok.
489 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
490 // If so, update the mask to reflect the inserted undef.
491 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
492 return true;
493 }
494 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
495 if (isa<ConstantInt>(EI->getOperand(1))) {
496 unsigned ExtractedIdx =
497 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
498 unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
499
500 // This must be extracting from either LHS or RHS.
501 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
502 // We can handle this if the vector we are inserting into is
503 // transitively ok.
504 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
505 // If so, update the mask to reflect the inserted value.
506 if (EI->getOperand(0) == LHS) {
507 Mask[InsertedIdx % NumElts] =
508 ConstantInt::get(Type::getInt32Ty(V->getContext()),
509 ExtractedIdx);
510 } else {
511 assert(EI->getOperand(0) == RHS)((EI->getOperand(0) == RHS) ? static_cast<void> (0) :
__assert_fail ("EI->getOperand(0) == RHS", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 511, __PRETTY_FUNCTION__))
;
512 Mask[InsertedIdx % NumElts] =
513 ConstantInt::get(Type::getInt32Ty(V->getContext()),
514 ExtractedIdx + NumLHSElts);
515 }
516 return true;
517 }
518 }
519 }
520 }
521 }
522
523 return false;
524}
525
526/// If we have insertion into a vector that is wider than the vector that we
527/// are extracting from, try to widen the source vector to allow a single
528/// shufflevector to replace one or more insert/extract pairs.
529static void replaceExtractElements(InsertElementInst *InsElt,
530 ExtractElementInst *ExtElt,
531 InstCombiner &IC) {
532 VectorType *InsVecType = InsElt->getType();
533 VectorType *ExtVecType = ExtElt->getVectorOperandType();
534 unsigned NumInsElts = InsVecType->getVectorNumElements();
535 unsigned NumExtElts = ExtVecType->getVectorNumElements();
536
537 // The inserted-to vector must be wider than the extracted-from vector.
538 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
539 NumExtElts >= NumInsElts)
540 return;
541
542 // Create a shuffle mask to widen the extended-from vector using undefined
543 // values. The mask selects all of the values of the original vector followed
544 // by as many undefined values as needed to create a vector of the same length
545 // as the inserted-to vector.
546 SmallVector<Constant *, 16> ExtendMask;
547 IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
548 for (unsigned i = 0; i < NumExtElts; ++i)
549 ExtendMask.push_back(ConstantInt::get(IntType, i));
550 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
551 ExtendMask.push_back(UndefValue::get(IntType));
552
553 Value *ExtVecOp = ExtElt->getVectorOperand();
554 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
555 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
556 ? ExtVecOpInst->getParent()
557 : ExtElt->getParent();
558
559 // TODO: This restriction matches the basic block check below when creating
560 // new extractelement instructions. If that limitation is removed, this one
561 // could also be removed. But for now, we just bail out to ensure that we
562 // will replace the extractelement instruction that is feeding our
563 // insertelement instruction. This allows the insertelement to then be
564 // replaced by a shufflevector. If the insertelement is not replaced, we can
565 // induce infinite looping because there's an optimization for extractelement
566 // that will delete our widening shuffle. This would trigger another attempt
567 // here to create that shuffle, and we spin forever.
568 if (InsertionBlock != InsElt->getParent())
569 return;
570
571 // TODO: This restriction matches the check in visitInsertElementInst() and
572 // prevents an infinite loop caused by not turning the extract/insert pair
573 // into a shuffle. We really should not need either check, but we're lacking
574 // folds for shufflevectors because we're afraid to generate shuffle masks
575 // that the backend can't handle.
576 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
577 return;
578
579 auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
580 ConstantVector::get(ExtendMask));
581
582 // Insert the new shuffle after the vector operand of the extract is defined
583 // (as long as it's not a PHI) or at the start of the basic block of the
584 // extract, so any subsequent extracts in the same basic block can use it.
585 // TODO: Insert before the earliest ExtractElementInst that is replaced.
586 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
587 WideVec->insertAfter(ExtVecOpInst);
588 else
589 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
590
591 // Replace extracts from the original narrow vector with extracts from the new
592 // wide vector.
593 for (User *U : ExtVecOp->users()) {
594 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
595 if (!OldExt || OldExt->getParent() != WideVec->getParent())
596 continue;
597 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
598 NewExt->insertAfter(OldExt);
599 IC.replaceInstUsesWith(*OldExt, NewExt);
600 }
601}
602
603/// We are building a shuffle to create V, which is a sequence of insertelement,
604/// extractelement pairs. If PermittedRHS is set, then we must either use it or
605/// not rely on the second vector source. Return a std::pair containing the
606/// left and right vectors of the proposed shuffle (or 0), and set the Mask
607/// parameter as required.
608///
609/// Note: we intentionally don't try to fold earlier shuffles since they have
610/// often been chosen carefully to be efficiently implementable on the target.
611using ShuffleOps = std::pair<Value *, Value *>;
612
613static ShuffleOps collectShuffleElements(Value *V,
614 SmallVectorImpl<Constant *> &Mask,
615 Value *PermittedRHS,
616 InstCombiner &IC) {
617 assert(V->getType()->isVectorTy() && "Invalid shuffle!")((V->getType()->isVectorTy() && "Invalid shuffle!"
) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"Invalid shuffle!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 617, __PRETTY_FUNCTION__))
;
618 unsigned NumElts = V->getType()->getVectorNumElements();
619
620 if (isa<UndefValue>(V)) {
621 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
622 return std::make_pair(
623 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
624 }
625
626 if (isa<ConstantAggregateZero>(V)) {
627 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
628 return std::make_pair(V, nullptr);
629 }
630
631 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
632 // If this is an insert of an extract from some other vector, include it.
633 Value *VecOp = IEI->getOperand(0);
634 Value *ScalarOp = IEI->getOperand(1);
635 Value *IdxOp = IEI->getOperand(2);
636
637 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
638 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
639 unsigned ExtractedIdx =
640 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
641 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
642
643 // Either the extracted from or inserted into vector must be RHSVec,
644 // otherwise we'd end up with a shuffle of three inputs.
645 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
646 Value *RHS = EI->getOperand(0);
647 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
648 assert(LR.second == nullptr || LR.second == RHS)((LR.second == nullptr || LR.second == RHS) ? static_cast<
void> (0) : __assert_fail ("LR.second == nullptr || LR.second == RHS"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 648, __PRETTY_FUNCTION__))
;
649
650 if (LR.first->getType() != RHS->getType()) {
651 // Although we are giving up for now, see if we can create extracts
652 // that match the inserts for another round of combining.
653 replaceExtractElements(IEI, EI, IC);
654
655 // We tried our best, but we can't find anything compatible with RHS
656 // further up the chain. Return a trivial shuffle.
657 for (unsigned i = 0; i < NumElts; ++i)
658 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
659 return std::make_pair(V, nullptr);
660 }
661
662 unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
663 Mask[InsertedIdx % NumElts] =
664 ConstantInt::get(Type::getInt32Ty(V->getContext()),
665 NumLHSElts+ExtractedIdx);
666 return std::make_pair(LR.first, RHS);
667 }
668
669 if (VecOp == PermittedRHS) {
670 // We've gone as far as we can: anything on the other side of the
671 // extractelement will already have been converted into a shuffle.
672 unsigned NumLHSElts =
673 EI->getOperand(0)->getType()->getVectorNumElements();
674 for (unsigned i = 0; i != NumElts; ++i)
675 Mask.push_back(ConstantInt::get(
676 Type::getInt32Ty(V->getContext()),
677 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
678 return std::make_pair(EI->getOperand(0), PermittedRHS);
679 }
680
681 // If this insertelement is a chain that comes from exactly these two
682 // vectors, return the vector and the effective shuffle.
683 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
684 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
685 Mask))
686 return std::make_pair(EI->getOperand(0), PermittedRHS);
687 }
688 }
689 }
690
691 // Otherwise, we can't do anything fancy. Return an identity vector.
692 for (unsigned i = 0; i != NumElts; ++i)
693 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
694 return std::make_pair(V, nullptr);
695}
696
697/// Try to find redundant insertvalue instructions, like the following ones:
698/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
699/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
700/// Here the second instruction inserts values at the same indices, as the
701/// first one, making the first one redundant.
702/// It should be transformed to:
703/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
704Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
705 bool IsRedundant = false;
706 ArrayRef<unsigned int> FirstIndices = I.getIndices();
707
708 // If there is a chain of insertvalue instructions (each of them except the
709 // last one has only one use and it's another insertvalue insn from this
710 // chain), check if any of the 'children' uses the same indices as the first
711 // instruction. In this case, the first one is redundant.
712 Value *V = &I;
713 unsigned Depth = 0;
714 while (V->hasOneUse() && Depth < 10) {
715 User *U = V->user_back();
716 auto UserInsInst = dyn_cast<InsertValueInst>(U);
717 if (!UserInsInst || U->getOperand(0) != V)
718 break;
719 if (UserInsInst->getIndices() == FirstIndices) {
720 IsRedundant = true;
721 break;
722 }
723 V = UserInsInst;
724 Depth++;
725 }
726
727 if (IsRedundant)
728 return replaceInstUsesWith(I, I.getOperand(0));
729 return nullptr;
730}
731
732static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
733 int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
734 int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
735
736 // A vector select does not change the size of the operands.
737 if (MaskSize != VecSize)
738 return false;
739
740 // Each mask element must be undefined or choose a vector element from one of
741 // the source operands without crossing vector lanes.
742 for (int i = 0; i != MaskSize; ++i) {
743 int Elt = Shuf.getMaskValue(i);
744 if (Elt != -1 && Elt != i && Elt != i + VecSize)
745 return false;
746 }
747
748 return true;
749}
750
751/// Turn a chain of inserts that splats a value into an insert + shuffle:
752/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
753/// shufflevector(insertelt(X, %k, 0), undef, zero)
754static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
755 // We are interested in the last insert in a chain. So if this insert has a
756 // single user and that user is an insert, bail.
757 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
758 return nullptr;
759
760 auto *VecTy = cast<VectorType>(InsElt.getType());
761 unsigned NumElements = VecTy->getNumElements();
762
763 // Do not try to do this for a one-element vector, since that's a nop,
764 // and will cause an inf-loop.
765 if (NumElements == 1)
766 return nullptr;
767
768 Value *SplatVal = InsElt.getOperand(1);
769 InsertElementInst *CurrIE = &InsElt;
770 SmallVector<bool, 16> ElementPresent(NumElements, false);
771 InsertElementInst *FirstIE = nullptr;
772
773 // Walk the chain backwards, keeping track of which indices we inserted into,
774 // until we hit something that isn't an insert of the splatted value.
775 while (CurrIE) {
776 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
777 if (!Idx || CurrIE->getOperand(1) != SplatVal)
778 return nullptr;
779
780 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
781 // Check none of the intermediate steps have any additional uses, except
782 // for the root insertelement instruction, which can be re-used, if it
783 // inserts at position 0.
784 if (CurrIE != &InsElt &&
785 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
786 return nullptr;
787
788 ElementPresent[Idx->getZExtValue()] = true;
789 FirstIE = CurrIE;
790 CurrIE = NextIE;
791 }
792
793 // If this is just a single insertelement (not a sequence), we are done.
794 if (FirstIE == &InsElt)
795 return nullptr;
796
797 // If we are not inserting into an undef vector, make sure we've seen an
798 // insert into every element.
799 // TODO: If the base vector is not undef, it might be better to create a splat
800 // and then a select-shuffle (blend) with the base vector.
801 if (!isa<UndefValue>(FirstIE->getOperand(0)))
802 if (any_of(ElementPresent, [](bool Present) { return !Present; }))
803 return nullptr;
804
805 // Create the insert + shuffle.
806 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
807 UndefValue *UndefVec = UndefValue::get(VecTy);
808 Constant *Zero = ConstantInt::get(Int32Ty, 0);
809 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
810 FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
811
812 // Splat from element 0, but replace absent elements with undef in the mask.
813 SmallVector<Constant *, 16> Mask(NumElements, Zero);
814 for (unsigned i = 0; i != NumElements; ++i)
815 if (!ElementPresent[i])
816 Mask[i] = UndefValue::get(Int32Ty);
817
818 return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
819}
820
821/// Try to fold an insert element into an existing splat shuffle by changing
822/// the shuffle's mask to include the index of this insert element.
823static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
824 // Check if the vector operand of this insert is a canonical splat shuffle.
825 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
826 if (!Shuf || !Shuf->isZeroEltSplat())
827 return nullptr;
828
829 // Check for a constant insertion index.
830 uint64_t IdxC;
831 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
832 return nullptr;
833
834 // Check if the splat shuffle's input is the same as this insert's scalar op.
835 Value *X = InsElt.getOperand(1);
836 Value *Op0 = Shuf->getOperand(0);
837 if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
838 return nullptr;
839
840 // Replace the shuffle mask element at the index of this insert with a zero.
841 // For example:
842 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
843 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
844 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
845 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
846 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
847 Constant *Zero = ConstantInt::getNullValue(I32Ty);
848 for (unsigned i = 0; i != NumMaskElts; ++i)
849 NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
850
851 Constant *NewMask = ConstantVector::get(NewMaskVec);
852 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
853}
854
855/// Try to fold an extract+insert element into an existing identity shuffle by
856/// changing the shuffle's mask to include the index of this insert element.
857static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
858 // Check if the vector operand of this insert is an identity shuffle.
859 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
860 if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
861 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
862 return nullptr;
863
864 // Check for a constant insertion index.
865 uint64_t IdxC;
866 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
867 return nullptr;
868
869 // Check if this insert's scalar op is extracted from the identity shuffle's
870 // input vector.
871 Value *Scalar = InsElt.getOperand(1);
872 Value *X = Shuf->getOperand(0);
873 if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
874 return nullptr;
875
876 // Replace the shuffle mask element at the index of this extract+insert with
877 // that same index value.
878 // For example:
879 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
880 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
881 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
882 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
883 Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC);
884 Constant *OldMask = Shuf->getMask();
885 for (unsigned i = 0; i != NumMaskElts; ++i) {
886 if (i != IdxC) {
887 // All mask elements besides the inserted element remain the same.
888 NewMaskVec[i] = OldMask->getAggregateElement(i);
889 } else if (OldMask->getAggregateElement(i) == NewMaskEltC) {
890 // If the mask element was already set, there's nothing to do
891 // (demanded elements analysis may unset it later).
892 return nullptr;
893 } else {
894 assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&((isa<UndefValue>(OldMask->getAggregateElement(i)) &&
"Unexpected shuffle mask element for identity shuffle") ? static_cast
<void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 895, __PRETTY_FUNCTION__))
895 "Unexpected shuffle mask element for identity shuffle")((isa<UndefValue>(OldMask->getAggregateElement(i)) &&
"Unexpected shuffle mask element for identity shuffle") ? static_cast
<void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 895, __PRETTY_FUNCTION__))
;
896 NewMaskVec[i] = NewMaskEltC;
897 }
898 }
899
900 Constant *NewMask = ConstantVector::get(NewMaskVec);
901 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
902}
903
904/// If we have an insertelement instruction feeding into another insertelement
905/// and the 2nd is inserting a constant into the vector, canonicalize that
906/// constant insertion before the insertion of a variable:
907///
908/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
909/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
910///
911/// This has the potential of eliminating the 2nd insertelement instruction
912/// via constant folding of the scalar constant into a vector constant.
913static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
914 InstCombiner::BuilderTy &Builder) {
915 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
916 if (!InsElt1 || !InsElt1->hasOneUse())
917 return nullptr;
918
919 Value *X, *Y;
920 Constant *ScalarC;
921 ConstantInt *IdxC1, *IdxC2;
922 if (match(InsElt1->getOperand(0), m_Value(X)) &&
923 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
924 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
925 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
926 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
927 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
928 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
929 }
930
931 return nullptr;
932}
933
934/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
935/// --> shufflevector X, CVec', Mask'
936static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
937 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
938 // Bail out if the parent has more than one use. In that case, we'd be
939 // replacing the insertelt with a shuffle, and that's not a clear win.
940 if (!Inst || !Inst->hasOneUse())
941 return nullptr;
942 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
943 // The shuffle must have a constant vector operand. The insertelt must have
944 // a constant scalar being inserted at a constant position in the vector.
945 Constant *ShufConstVec, *InsEltScalar;
946 uint64_t InsEltIndex;
947 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
948 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
949 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
950 return nullptr;
951
952 // Adding an element to an arbitrary shuffle could be expensive, but a
953 // shuffle that selects elements from vectors without crossing lanes is
954 // assumed cheap.
955 // If we're just adding a constant into that shuffle, it will still be
956 // cheap.
957 if (!isShuffleEquivalentToSelect(*Shuf))
958 return nullptr;
959
960 // From the above 'select' check, we know that the mask has the same number
961 // of elements as the vector input operands. We also know that each constant
962 // input element is used in its lane and can not be used more than once by
963 // the shuffle. Therefore, replace the constant in the shuffle's constant
964 // vector with the insertelt constant. Replace the constant in the shuffle's
965 // mask vector with the insertelt index plus the length of the vector
966 // (because the constant vector operand of a shuffle is always the 2nd
967 // operand).
968 Constant *Mask = Shuf->getMask();
969 unsigned NumElts = Mask->getType()->getVectorNumElements();
970 SmallVector<Constant *, 16> NewShufElts(NumElts);
971 SmallVector<Constant *, 16> NewMaskElts(NumElts);
972 for (unsigned I = 0; I != NumElts; ++I) {
973 if (I == InsEltIndex) {
974 NewShufElts[I] = InsEltScalar;
975 Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
976 NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
977 } else {
978 // Copy over the existing values.
979 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
980 NewMaskElts[I] = Mask->getAggregateElement(I);
981 }
982 }
983
984 // Create new operands for a shuffle that includes the constant of the
985 // original insertelt. The old shuffle will be dead now.
986 return new ShuffleVectorInst(Shuf->getOperand(0),
987 ConstantVector::get(NewShufElts),
988 ConstantVector::get(NewMaskElts));
989 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
990 // Transform sequences of insertelements ops with constant data/indexes into
991 // a single shuffle op.
992 unsigned NumElts = InsElt.getType()->getNumElements();
993
994 uint64_t InsertIdx[2];
995 Constant *Val[2];
996 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
997 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
998 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
999 !match(IEI->getOperand(1), m_Constant(Val[1])))
1000 return nullptr;
1001 SmallVector<Constant *, 16> Values(NumElts);
1002 SmallVector<Constant *, 16> Mask(NumElts);
1003 auto ValI = std::begin(Val);
1004 // Generate new constant vector and mask.
1005 // We have 2 values/masks from the insertelements instructions. Insert them
1006 // into new value/mask vectors.
1007 for (uint64_t I : InsertIdx) {
1008 if (!Values[I]) {
1009 assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1009, __PRETTY_FUNCTION__))
;
1010 Values[I] = *ValI;
1011 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
1012 NumElts + I);
1013 }
1014 ++ValI;
1015 }
1016 // Remaining values are filled with 'undef' values.
1017 for (unsigned I = 0; I < NumElts; ++I) {
1018 if (!Values[I]) {
1019 assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1019, __PRETTY_FUNCTION__))
;
1020 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1021 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
1022 }
1023 }
1024 // Create new operands for a shuffle that includes the constant of the
1025 // original insertelt.
1026 return new ShuffleVectorInst(IEI->getOperand(0),
1027 ConstantVector::get(Values),
1028 ConstantVector::get(Mask));
1029 }
1030 return nullptr;
1031}
1032
1033Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
1034 Value *VecOp = IE.getOperand(0);
1035 Value *ScalarOp = IE.getOperand(1);
1036 Value *IdxOp = IE.getOperand(2);
1037
1038 if (auto *V = SimplifyInsertElementInst(
1039 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1040 return replaceInstUsesWith(IE, V);
1041
1042 // If the vector and scalar are both bitcast from the same element type, do
1043 // the insert in that source type followed by bitcast.
1044 Value *VecSrc, *ScalarSrc;
1045 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1046 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1047 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1048 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1049 VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
1050 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1051 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1052 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1053 return new BitCastInst(NewInsElt, IE.getType());
1054 }
1055
1056 // If the inserted element was extracted from some other vector and both
1057 // indexes are valid constants, try to turn this into a shuffle.
1058 uint64_t InsertedIdx, ExtractedIdx;
1059 Value *ExtVecOp;
1060 if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1061 match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
1062 m_ConstantInt(ExtractedIdx))) &&
1063 ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
1064 // TODO: Looking at the user(s) to determine if this insert is a
1065 // fold-to-shuffle opportunity does not match the usual instcombine
1066 // constraints. We should decide if the transform is worthy based only
1067 // on this instruction and its operands, but that may not work currently.
1068 //
1069 // Here, we are trying to avoid creating shuffles before reaching
1070 // the end of a chain of extract-insert pairs. This is complicated because
1071 // we do not generally form arbitrary shuffle masks in instcombine
1072 // (because those may codegen poorly), but collectShuffleElements() does
1073 // exactly that.
1074 //
1075 // The rules for determining what is an acceptable target-independent
1076 // shuffle mask are fuzzy because they evolve based on the backend's
1077 // capabilities and real-world impact.
1078 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1079 if (!Insert.hasOneUse())
1080 return true;
1081 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1082 if (!InsertUser)
1083 return true;
1084 return false;
1085 };
1086
1087 // Try to form a shuffle from a chain of extract-insert ops.
1088 if (isShuffleRootCandidate(IE)) {
1089 SmallVector<Constant*, 16> Mask;
1090 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1091
1092 // The proposed shuffle may be trivial, in which case we shouldn't
1093 // perform the combine.
1094 if (LR.first != &IE && LR.second != &IE) {
1095 // We now have a shuffle of LHS, RHS, Mask.
1096 if (LR.second == nullptr)
1097 LR.second = UndefValue::get(LR.first->getType());
1098 return new ShuffleVectorInst(LR.first, LR.second,
1099 ConstantVector::get(Mask));
1100 }
1101 }
1102 }
1103
1104 unsigned VWidth = VecOp->getType()->getVectorNumElements();
1105 APInt UndefElts(VWidth, 0);
1106 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1107 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1108 if (V != &IE)
1109 return replaceInstUsesWith(IE, V);
1110 return &IE;
1111 }
1112
1113 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1114 return Shuf;
1115
1116 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1117 return NewInsElt;
1118
1119 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1120 return Broadcast;
1121
1122 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1123 return Splat;
1124
1125 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1126 return IdentityShuf;
1127
1128 return nullptr;
1129}
1130
1131/// Return true if we can evaluate the specified expression tree if the vector
1132/// elements were shuffled in a different order.
1133static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1134 unsigned Depth = 5) {
1135 // We can always reorder the elements of a constant.
1136 if (isa<Constant>(V))
1137 return true;
1138
1139 // We won't reorder vector arguments. No IPO here.
1140 Instruction *I = dyn_cast<Instruction>(V);
1141 if (!I) return false;
1142
1143 // Two users may expect different orders of the elements. Don't try it.
1144 if (!I->hasOneUse())
1145 return false;
1146
1147 if (Depth == 0) return false;
1148
1149 switch (I->getOpcode()) {
1150 case Instruction::UDiv:
1151 case Instruction::SDiv:
1152 case Instruction::URem:
1153 case Instruction::SRem:
1154 // Propagating an undefined shuffle mask element to integer div/rem is not
1155 // allowed because those opcodes can create immediate undefined behavior
1156 // from an undefined element in an operand.
1157 if (llvm::any_of(Mask, [](int M){ return M == -1; }))
1158 return false;
1159 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1160 case Instruction::Add:
1161 case Instruction::FAdd:
1162 case Instruction::Sub:
1163 case Instruction::FSub:
1164 case Instruction::Mul:
1165 case Instruction::FMul:
1166 case Instruction::FDiv:
1167 case Instruction::FRem:
1168 case Instruction::Shl:
1169 case Instruction::LShr:
1170 case Instruction::AShr:
1171 case Instruction::And:
1172 case Instruction::Or:
1173 case Instruction::Xor:
1174 case Instruction::ICmp:
1175 case Instruction::FCmp:
1176 case Instruction::Trunc:
1177 case Instruction::ZExt:
1178 case Instruction::SExt:
1179 case Instruction::FPToUI:
1180 case Instruction::FPToSI:
1181 case Instruction::UIToFP:
1182 case Instruction::SIToFP:
1183 case Instruction::FPTrunc:
1184 case Instruction::FPExt:
1185 case Instruction::GetElementPtr: {
1186 // Bail out if we would create longer vector ops. We could allow creating
1187 // longer vector ops, but that may result in more expensive codegen.
1188 Type *ITy = I->getType();
1189 if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1190 return false;
1191 for (Value *Operand : I->operands()) {
1192 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1193 return false;
1194 }
1195 return true;
1196 }
1197 case Instruction::InsertElement: {
1198 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1199 if (!CI) return false;
1200 int ElementNumber = CI->getLimitedValue();
1201
1202 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1203 // can't put an element into multiple indices.
1204 bool SeenOnce = false;
1205 for (int i = 0, e = Mask.size(); i != e; ++i) {
1206 if (Mask[i] == ElementNumber) {
1207 if (SeenOnce)
1208 return false;
1209 SeenOnce = true;
1210 }
1211 }
1212 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1213 }
1214 }
1215 return false;
1216}
1217
1218/// Rebuild a new instruction just like 'I' but with the new operands given.
1219/// In the event of type mismatch, the type of the operands is correct.
1220static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
1221 // We don't want to use the IRBuilder here because we want the replacement
1222 // instructions to appear next to 'I', not the builder's insertion point.
1223 switch (I->getOpcode()) {
1224 case Instruction::Add:
1225 case Instruction::FAdd:
1226 case Instruction::Sub:
1227 case Instruction::FSub:
1228 case Instruction::Mul:
1229 case Instruction::FMul:
1230 case Instruction::UDiv:
1231 case Instruction::SDiv:
1232 case Instruction::FDiv:
1233 case Instruction::URem:
1234 case Instruction::SRem:
1235 case Instruction::FRem:
1236 case Instruction::Shl:
1237 case Instruction::LShr:
1238 case Instruction::AShr:
1239 case Instruction::And:
1240 case Instruction::Or:
1241 case Instruction::Xor: {
1242 BinaryOperator *BO = cast<BinaryOperator>(I);
1243 assert(NewOps.size() == 2 && "binary operator with #ops != 2")((NewOps.size() == 2 && "binary operator with #ops != 2"
) ? static_cast<void> (0) : __assert_fail ("NewOps.size() == 2 && \"binary operator with #ops != 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1243, __PRETTY_FUNCTION__))
;
1244 BinaryOperator *New =
1245 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1246 NewOps[0], NewOps[1], "", BO);
1247 if (isa<OverflowingBinaryOperator>(BO)) {
1248 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1249 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1250 }
1251 if (isa<PossiblyExactOperator>(BO)) {
1252 New->setIsExact(BO->isExact());
1253 }
1254 if (isa<FPMathOperator>(BO))
1255 New->copyFastMathFlags(I);
1256 return New;
1257 }
1258 case Instruction::ICmp:
1259 assert(NewOps.size() == 2 && "icmp with #ops != 2")((NewOps.size() == 2 && "icmp with #ops != 2") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 2 && \"icmp with #ops != 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1259, __PRETTY_FUNCTION__))
;
1260 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1261 NewOps[0], NewOps[1]);
1262 case Instruction::FCmp:
1263 assert(NewOps.size() == 2 && "fcmp with #ops != 2")((NewOps.size() == 2 && "fcmp with #ops != 2") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 2 && \"fcmp with #ops != 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1263, __PRETTY_FUNCTION__))
;
1264 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1265 NewOps[0], NewOps[1]);
1266 case Instruction::Trunc:
1267 case Instruction::ZExt:
1268 case Instruction::SExt:
1269 case Instruction::FPToUI:
1270 case Instruction::FPToSI:
1271 case Instruction::UIToFP:
1272 case Instruction::SIToFP:
1273 case Instruction::FPTrunc:
1274 case Instruction::FPExt: {
1275 // It's possible that the mask has a different number of elements from
1276 // the original cast. We recompute the destination type to match the mask.
1277 Type *DestTy =
1278 VectorType::get(I->getType()->getScalarType(),
1279 NewOps[0]->getType()->getVectorNumElements());
1280 assert(NewOps.size() == 1 && "cast with #ops != 1")((NewOps.size() == 1 && "cast with #ops != 1") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 1 && \"cast with #ops != 1\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1280, __PRETTY_FUNCTION__))
;
1281 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1282 "", I);
1283 }
1284 case Instruction::GetElementPtr: {
1285 Value *Ptr = NewOps[0];
1286 ArrayRef<Value*> Idx = NewOps.slice(1);
1287 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1288 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1289 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1290 return GEP;
1291 }
1292 }
1293 llvm_unreachable("failed to rebuild vector instructions")::llvm::llvm_unreachable_internal("failed to rebuild vector instructions"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1293)
;
1294}
1295
1296static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1297 // Mask.size() does not need to be equal to the number of vector elements.
1298
1299 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")((V->getType()->isVectorTy() && "can't reorder non-vector elements"
) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"can't reorder non-vector elements\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1299, __PRETTY_FUNCTION__))
;
1300 Type *EltTy = V->getType()->getScalarType();
1301 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1302 if (isa<UndefValue>(V))
1303 return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1304
1305 if (isa<ConstantAggregateZero>(V))
1306 return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1307
1308 if (Constant *C = dyn_cast<Constant>(V)) {
1309 SmallVector<Constant *, 16> MaskValues;
1310 for (int i = 0, e = Mask.size(); i != e; ++i) {
1311 if (Mask[i] == -1)
1312 MaskValues.push_back(UndefValue::get(I32Ty));
1313 else
1314 MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1315 }
1316 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
1317 ConstantVector::get(MaskValues));
1318 }
1319
1320 Instruction *I = cast<Instruction>(V);
1321 switch (I->getOpcode()) {
1322 case Instruction::Add:
1323 case Instruction::FAdd:
1324 case Instruction::Sub:
1325 case Instruction::FSub:
1326 case Instruction::Mul:
1327 case Instruction::FMul:
1328 case Instruction::UDiv:
1329 case Instruction::SDiv:
1330 case Instruction::FDiv:
1331 case Instruction::URem:
1332 case Instruction::SRem:
1333 case Instruction::FRem:
1334 case Instruction::Shl:
1335 case Instruction::LShr:
1336 case Instruction::AShr:
1337 case Instruction::And:
1338 case Instruction::Or:
1339 case Instruction::Xor:
1340 case Instruction::ICmp:
1341 case Instruction::FCmp:
1342 case Instruction::Trunc:
1343 case Instruction::ZExt:
1344 case Instruction::SExt:
1345 case Instruction::FPToUI:
1346 case Instruction::FPToSI:
1347 case Instruction::UIToFP:
1348 case Instruction::SIToFP:
1349 case Instruction::FPTrunc:
1350 case Instruction::FPExt:
1351 case Instruction::Select:
1352 case Instruction::GetElementPtr: {
1353 SmallVector<Value*, 8> NewOps;
1354 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1355 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1356 Value *V;
1357 // Recursively call evaluateInDifferentElementOrder on vector arguments
1358 // as well. E.g. GetElementPtr may have scalar operands even if the
1359 // return value is a vector, so we need to examine the operand type.
1360 if (I->getOperand(i)->getType()->isVectorTy())
1361 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1362 else
1363 V = I->getOperand(i);
1364 NewOps.push_back(V);
1365 NeedsRebuild |= (V != I->getOperand(i));
1366 }
1367 if (NeedsRebuild) {
1368 return buildNew(I, NewOps);
1369 }
1370 return I;
1371 }
1372 case Instruction::InsertElement: {
1373 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1374
1375 // The insertelement was inserting at Element. Figure out which element
1376 // that becomes after shuffling. The answer is guaranteed to be unique
1377 // by CanEvaluateShuffled.
1378 bool Found = false;
1379 int Index = 0;
1380 for (int e = Mask.size(); Index != e; ++Index) {
1381 if (Mask[Index] == Element) {
1382 Found = true;
1383 break;
1384 }
1385 }
1386
1387 // If element is not in Mask, no need to handle the operand 1 (element to
1388 // be inserted). Just evaluate values in operand 0 according to Mask.
1389 if (!Found)
1390 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1391
1392 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1393 return InsertElementInst::Create(V, I->getOperand(1),
1394 ConstantInt::get(I32Ty, Index), "", I);
1395 }
1396 }
1397 llvm_unreachable("failed to reorder elements of vector instruction!")::llvm::llvm_unreachable_internal("failed to reorder elements of vector instruction!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1397)
;
1398}
1399
1400static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
1401 bool &isLHSID, bool &isRHSID) {
1402 isLHSID = isRHSID = true;
1403
1404 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1405 if (Mask[i] < 0) continue; // Ignore undef values.
1406 // Is this an identity shuffle of the LHS value?
1407 isLHSID &= (Mask[i] == (int)i);
1408
1409 // Is this an identity shuffle of the RHS value?
1410 isRHSID &= (Mask[i]-e == i);
1411 }
1412}
1413
1414// Returns true if the shuffle is extracting a contiguous range of values from
1415// LHS, for example:
1416// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1417// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1418// Shuffles to: |EE|FF|GG|HH|
1419// +--+--+--+--+
1420static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1421 SmallVector<int, 16> &Mask) {
1422 unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1423 unsigned MaskElems = Mask.size();
1424 unsigned BegIdx = Mask.front();
1425 unsigned EndIdx = Mask.back();
1426 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1427 return false;
1428 for (unsigned I = 0; I != MaskElems; ++I)
1429 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1430 return false;
1431 return true;
1432}
1433
1434/// These are the ingredients in an alternate form binary operator as described
1435/// below.
1436struct BinopElts {
1437 BinaryOperator::BinaryOps Opcode;
1438 Value *Op0;
1439 Value *Op1;
1440 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1441 Value *V0 = nullptr, Value *V1 = nullptr) :
1442 Opcode(Opc), Op0(V0), Op1(V1) {}
1443 operator bool() const { return Opcode != 0; }
1444};
1445
1446/// Binops may be transformed into binops with different opcodes and operands.
1447/// Reverse the usual canonicalization to enable folds with the non-canonical
1448/// form of the binop. If a transform is possible, return the elements of the
1449/// new binop. If not, return invalid elements.
1450static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1451 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1452 Type *Ty = BO->getType();
1453 switch (BO->getOpcode()) {
1454 case Instruction::Shl: {
1455 // shl X, C --> mul X, (1 << C)
1456 Constant *C;
1457 if (match(BO1, m_Constant(C))) {
1458 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1459 return { Instruction::Mul, BO0, ShlOne };
1460 }
1461 break;
1462 }
1463 case Instruction::Or: {
1464 // or X, C --> add X, C (when X and C have no common bits set)
1465 const APInt *C;
1466 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1467 return { Instruction::Add, BO0, BO1 };
1468 break;
1469 }
1470 default:
1471 break;
1472 }
1473 return {};
1474}
1475
1476static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1477 assert(Shuf.isSelect() && "Must have select-equivalent shuffle")((Shuf.isSelect() && "Must have select-equivalent shuffle"
) ? static_cast<void> (0) : __assert_fail ("Shuf.isSelect() && \"Must have select-equivalent shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1477, __PRETTY_FUNCTION__))
;
1478
1479 // Are we shuffling together some value and that same value after it has been
1480 // modified by a binop with a constant?
1481 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1482 Constant *C;
1483 bool Op0IsBinop;
1484 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1485 Op0IsBinop = true;
1486 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1487 Op0IsBinop = false;
1488 else
1489 return nullptr;
1490
1491 // The identity constant for a binop leaves a variable operand unchanged. For
1492 // a vector, this is a splat of something like 0, -1, or 1.
1493 // If there's no identity constant for this binop, we're done.
1494 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1495 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1496 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1497 if (!IdC)
1498 return nullptr;
1499
1500 // Shuffle identity constants into the lanes that return the original value.
1501 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1502 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1503 // The existing binop constant vector remains in the same operand position.
1504 Constant *Mask = Shuf.getMask();
1505 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1506 ConstantExpr::getShuffleVector(IdC, C, Mask);
1507
1508 bool MightCreatePoisonOrUB =
1509 Mask->containsUndefElement() &&
1510 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1511 if (MightCreatePoisonOrUB)
1512 NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1513
1514 // shuf (bop X, C), X, M --> bop X, C'
1515 // shuf X, (bop X, C), M --> bop X, C'
1516 Value *X = Op0IsBinop ? Op1 : Op0;
1517 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1518 NewBO->copyIRFlags(BO);
1519
1520 // An undef shuffle mask element may propagate as an undef constant element in
1521 // the new binop. That would produce poison where the original code might not.
1522 // If we already made a safe constant, then there's no danger.
1523 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1524 NewBO->dropPoisonGeneratingFlags();
1525 return NewBO;
1526}
1527
1528/// If we have an insert of a scalar to a non-zero element of an undefined
1529/// vector and then shuffle that value, that's the same as inserting to the zero
1530/// element and shuffling. Splatting from the zero element is recognized as the
1531/// canonical form of splat.
1532static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1533 InstCombiner::BuilderTy &Builder) {
1534 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1535 Constant *Mask = Shuf.getMask();
1536 Value *X;
1537 uint64_t IndexC;
1538
1539 // Match a shuffle that is a splat to a non-zero element.
1540 if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1541 m_ConstantInt(IndexC)))) ||
1542 !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
1543 return nullptr;
1544
1545 // Insert into element 0 of an undef vector.
1546 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1547 Constant *Zero = Builder.getInt32(0);
1548 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1549
1550 // Splat from element 0. Any mask element that is undefined remains undefined.
1551 // For example:
1552 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1553 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1554 unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1555 SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1556 for (unsigned i = 0; i != NumMaskElts; ++i)
1557 if (isa<UndefValue>(Mask->getAggregateElement(i)))
1558 NewMask[i] = Mask->getAggregateElement(i);
1559
1560 return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1561}
1562
1563/// Try to fold shuffles that are the equivalent of a vector select.
1564static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
1565 InstCombiner::BuilderTy &Builder,
1566 const DataLayout &DL) {
1567 if (!Shuf.isSelect())
1568 return nullptr;
1569
1570 // Canonicalize to choose from operand 0 first.
1571 unsigned NumElts = Shuf.getType()->getVectorNumElements();
1572 if (Shuf.getMaskValue(0) >= (int)NumElts) {
1573 // TODO: Can we assert that both operands of a shuffle-select are not undef
1574 // (otherwise, it would have been folded by instsimplify?
1575 Shuf.commute();
1576 return &Shuf;
1577 }
1578
1579 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1580 return I;
1581
1582 BinaryOperator *B0, *B1;
1583 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1584 !match(Shuf.getOperand(1), m_BinOp(B1)))
1585 return nullptr;
1586
1587 Value *X, *Y;
1588 Constant *C0, *C1;
1589 bool ConstantsAreOp1;
1590 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1591 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1592 ConstantsAreOp1 = true;
1593 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1594 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1595 ConstantsAreOp1 = false;
1596 else
1597 return nullptr;
1598
1599 // We need matching binops to fold the lanes together.
1600 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1601 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1602 bool DropNSW = false;
1603 if (ConstantsAreOp1 && Opc0 != Opc1) {
1604 // TODO: We drop "nsw" if shift is converted into multiply because it may
1605 // not be correct when the shift amount is BitWidth - 1. We could examine
1606 // each vector element to determine if it is safe to keep that flag.
1607 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1608 DropNSW = true;
1609 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1610 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB0.Op1) && \"Expecting constant with alt binop\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1610, __PRETTY_FUNCTION__))
;
1611 Opc0 = AltB0.Opcode;
1612 C0 = cast<Constant>(AltB0.Op1);
1613 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1614 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB1.Op1) && \"Expecting constant with alt binop\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1614, __PRETTY_FUNCTION__))
;
1615 Opc1 = AltB1.Opcode;
1616 C1 = cast<Constant>(AltB1.Op1);
1617 }
1618 }
1619
1620 if (Opc0 != Opc1)
1621 return nullptr;
1622
1623 // The opcodes must be the same. Use a new name to make that clear.
1624 BinaryOperator::BinaryOps BOpc = Opc0;
1625
1626 // Select the constant elements needed for the single binop.
1627 Constant *Mask = Shuf.getMask();
1628 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1629
1630 // We are moving a binop after a shuffle. When a shuffle has an undefined
1631 // mask element, the result is undefined, but it is not poison or undefined
1632 // behavior. That is not necessarily true for div/rem/shift.
1633 bool MightCreatePoisonOrUB =
1634 Mask->containsUndefElement() &&
1635 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
1636 if (MightCreatePoisonOrUB)
1637 NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1638
1639 Value *V;
1640 if (X == Y) {
1641 // Remove a binop and the shuffle by rearranging the constant:
1642 // shuffle (op V, C0), (op V, C1), M --> op V, C'
1643 // shuffle (op C0, V), (op C1, V), M --> op C', V
1644 V = X;
1645 } else {
1646 // If there are 2 different variable operands, we must create a new shuffle
1647 // (select) first, so check uses to ensure that we don't end up with more
1648 // instructions than we started with.
1649 if (!B0->hasOneUse() && !B1->hasOneUse())
1650 return nullptr;
1651
1652 // If we use the original shuffle mask and op1 is *variable*, we would be
1653 // putting an undef into operand 1 of div/rem/shift. This is either UB or
1654 // poison. We do not have to guard against UB when *constants* are op1
1655 // because safe constants guarantee that we do not overflow sdiv/srem (and
1656 // there's no danger for other opcodes).
1657 // TODO: To allow this case, create a new shuffle mask with no undefs.
1658 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1659 return nullptr;
1660
1661 // Note: In general, we do not create new shuffles in InstCombine because we
1662 // do not know if a target can lower an arbitrary shuffle optimally. In this
1663 // case, the shuffle uses the existing mask, so there is no additional risk.
1664
1665 // Select the variable vectors first, then perform the binop:
1666 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1667 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1668 V = Builder.CreateShuffleVector(X, Y, Mask);
1669 }
1670
1671 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1672 BinaryOperator::Create(BOpc, NewC, V);
1673
1674 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1675 // 1. If we changed an opcode, poison conditions might have changed.
1676 // 2. If the shuffle had undef mask elements, the new binop might have undefs
1677 // where the original code did not. But if we already made a safe constant,
1678 // then there's no danger.
1679 NewBO->copyIRFlags(B0);
1680 NewBO->andIRFlags(B1);
1681 if (DropNSW)
1682 NewBO->setHasNoSignedWrap(false);
1683 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1684 NewBO->dropPoisonGeneratingFlags();
1685 return NewBO;
1686}
1687
1688/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1689/// narrowing (concatenating with undef and extracting back to the original
1690/// length). This allows replacing the wide select with a narrow select.
1691static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1692 InstCombiner::BuilderTy &Builder) {
1693 // This must be a narrowing identity shuffle. It extracts the 1st N elements
1694 // of the 1st vector operand of a shuffle.
1695 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1696 return nullptr;
1697
1698 // The vector being shuffled must be a vector select that we can eliminate.
1699 // TODO: The one-use requirement could be eased if X and/or Y are constants.
1700 Value *Cond, *X, *Y;
1701 if (!match(Shuf.getOperand(0),
1702 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1703 return nullptr;
1704
1705 // We need a narrow condition value. It must be extended with undef elements
1706 // and have the same number of elements as this shuffle.
1707 unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1708 Value *NarrowCond;
1709 if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1710 m_Constant()))) ||
1711 NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1712 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1713 return nullptr;
1714
1715 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1716 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1717 Value *Undef = UndefValue::get(X->getType());
1718 Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1719 Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1720 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1721}
1722
1723/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1724static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
1725 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1726 if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1727 return nullptr;
1728
1729 Value *X, *Y;
1730 Constant *Mask;
1731 if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1732 return nullptr;
1733
1734 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1735 // then combining may result in worse codegen.
1736 if (!Op0->hasOneUse())
1737 return nullptr;
1738
1739 // We are extracting a subvector from a shuffle. Remove excess elements from
1740 // the 1st shuffle mask to eliminate the extract.
1741 //
1742 // This transform is conservatively limited to identity extracts because we do
1743 // not allow arbitrary shuffle mask creation as a target-independent transform
1744 // (because we can't guarantee that will lower efficiently).
1745 //
1746 // If the extracting shuffle has an undef mask element, it transfers to the
1747 // new shuffle mask. Otherwise, copy the original mask element. Example:
1748 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1749 // shuf X, Y, <C0, undef, C2, undef>
1750 unsigned NumElts = Shuf.getType()->getVectorNumElements();
1751 SmallVector<Constant *, 16> NewMask(NumElts);
1752 assert(NumElts < Mask->getType()->getVectorNumElements() &&((NumElts < Mask->getType()->getVectorNumElements() &&
"Identity with extract must have less elements than its inputs"
) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1753, __PRETTY_FUNCTION__))
1753 "Identity with extract must have less elements than its inputs")((NumElts < Mask->getType()->getVectorNumElements() &&
"Identity with extract must have less elements than its inputs"
) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1753, __PRETTY_FUNCTION__))
;
1754
1755 for (unsigned i = 0; i != NumElts; ++i) {
1756 Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1757 Constant *MaskElt = Mask->getAggregateElement(i);
1758 NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1759 }
1760 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1761}
1762
1763/// Try to replace a shuffle with an insertelement.
1764static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
1765 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1766 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1767
1768 // The shuffle must not change vector sizes.
1769 // TODO: This restriction could be removed if the insert has only one use
1770 // (because the transform would require a new length-changing shuffle).
1771 int NumElts = Mask.size();
1772 if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1773 return nullptr;
1774
1775 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1776 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1777 // We need an insertelement with a constant index.
1778 if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1779 m_ConstantInt(IndexC))))
1780 return false;
1781
1782 // Test the shuffle mask to see if it splices the inserted scalar into the
1783 // operand 1 vector of the shuffle.
1784 int NewInsIndex = -1;
1785 for (int i = 0; i != NumElts; ++i) {
1786 // Ignore undef mask elements.
1787 if (Mask[i] == -1)
1788 continue;
1789
1790 // The shuffle takes elements of operand 1 without lane changes.
1791 if (Mask[i] == NumElts + i)
1792 continue;
1793
1794 // The shuffle must choose the inserted scalar exactly once.
1795 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1796 return false;
1797
1798 // The shuffle is placing the inserted scalar into element i.
1799 NewInsIndex = i;
1800 }
1801
1802 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")((NewInsIndex != -1 && "Did not fold shuffle with unused operand?"
) ? static_cast<void> (0) : __assert_fail ("NewInsIndex != -1 && \"Did not fold shuffle with unused operand?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1802, __PRETTY_FUNCTION__))
;
1803
1804 // Index is updated to the potentially translated insertion lane.
1805 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1806 return true;
1807 };
1808
1809 // If the shuffle is unnecessary, insert the scalar operand directly into
1810 // operand 1 of the shuffle. Example:
1811 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1812 Value *Scalar;
1813 ConstantInt *IndexC;
1814 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1815 return InsertElementInst::Create(V1, Scalar, IndexC);
1816
1817 // Try again after commuting shuffle. Example:
1818 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1819 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1820 std::swap(V0, V1);
1821 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1822 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1823 return InsertElementInst::Create(V1, Scalar, IndexC);
1824
1825 return nullptr;
1826}
1827
1828static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
1829 // Match the operands as identity with padding (also known as concatenation
1830 // with undef) shuffles of the same source type. The backend is expected to
1831 // recreate these concatenations from a shuffle of narrow operands.
1832 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1833 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1834 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1835 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1836 return nullptr;
1837
1838 // We limit this transform to power-of-2 types because we expect that the
1839 // backend can convert the simplified IR patterns to identical nodes as the
1840 // original IR.
1841 // TODO: If we can verify the same behavior for arbitrary types, the
1842 // power-of-2 checks can be removed.
1843 Value *X = Shuffle0->getOperand(0);
1844 Value *Y = Shuffle1->getOperand(0);
1845 if (X->getType() != Y->getType() ||
1846 !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
1847 !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1848 !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
1849 isa<UndefValue>(X) || isa<UndefValue>(Y))
1850 return nullptr;
1851 assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
1852 isa<UndefValue>(Shuffle1->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
1853 "Unexpected operand for identity shuffle")((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
;
1854
1855 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1856 // operands directly by adjusting the shuffle mask to account for the narrower
1857 // types:
1858 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1859 int NarrowElts = X->getType()->getVectorNumElements();
1860 int WideElts = Shuffle0->getType()->getVectorNumElements();
1861 assert(WideElts > NarrowElts && "Unexpected types for identity with padding")((WideElts > NarrowElts && "Unexpected types for identity with padding"
) ? static_cast<void> (0) : __assert_fail ("WideElts > NarrowElts && \"Unexpected types for identity with padding\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1861, __PRETTY_FUNCTION__))
;
1862
1863 Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1864 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1865 SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1866 for (int i = 0, e = Mask.size(); i != e; ++i) {
1867 if (Mask[i] == -1)
1868 continue;
1869
1870 // If this shuffle is choosing an undef element from 1 of the sources, that
1871 // element is undef.
1872 if (Mask[i] < WideElts) {
1873 if (Shuffle0->getMaskValue(Mask[i]) == -1)
1874 continue;
1875 } else {
1876 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1877 continue;
1878 }
1879
1880 // If this shuffle is choosing from the 1st narrow op, the mask element is
1881 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1882 // element is offset down to adjust for the narrow vector widths.
1883 if (Mask[i] < WideElts) {
1884 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")((Mask[i] < NarrowElts && "Unexpected shuffle mask"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] < NarrowElts && \"Unexpected shuffle mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1884, __PRETTY_FUNCTION__))
;
1885 NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1886 } else {
1887 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")((Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] < (WideElts + NarrowElts) && \"Unexpected shuffle mask\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1889 }
1890 }
1891 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1892}
1893
1894Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1895 Value *LHS = SVI.getOperand(0);
1896 Value *RHS = SVI.getOperand(1);
1897 if (auto *V = SimplifyShuffleVectorInst(
1898 LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1899 return replaceInstUsesWith(SVI, V);
1900
1901 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
1902 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1903 unsigned VWidth = SVI.getType()->getVectorNumElements();
1904 unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1905 SmallVector<int, 16> Mask = SVI.getShuffleMask();
1906 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
1907 if (LHS == RHS || isa<UndefValue>(LHS)) {
1908 // Remap any references to RHS to use LHS.
1909 SmallVector<Constant*, 16> Elts;
1910 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
1911 if (Mask[i] < 0) {
1912 Elts.push_back(UndefValue::get(Int32Ty));
1913 continue;
1914 }
1915
1916 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
1917 (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
1918 Mask[i] = -1; // Turn into undef.
1919 Elts.push_back(UndefValue::get(Int32Ty));
1920 } else {
1921 Mask[i] = Mask[i] % e; // Force to LHS.
1922 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1923 }
1924 }
1925 SVI.setOperand(0, SVI.getOperand(1));
1926 SVI.setOperand(1, UndefValue::get(RHS->getType()));
1927 SVI.setOperand(2, ConstantVector::get(Elts));
1928 return &SVI;
1929 }
1930
1931 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1932 return I;
1933
1934 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1935 return I;
1936
1937 if (Instruction *I = narrowVectorSelect(SVI, Builder))
1938 return I;
1939
1940 APInt UndefElts(VWidth, 0);
1941 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1942 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1943 if (V != &SVI)
1944 return replaceInstUsesWith(SVI, V);
1945 return &SVI;
1946 }
1947
1948 if (Instruction *I = foldIdentityExtractShuffle(SVI))
1949 return I;
1950
1951 // These transforms have the potential to lose undef knowledge, so they are
1952 // intentionally placed after SimplifyDemandedVectorElts().
1953 if (Instruction *I = foldShuffleWithInsert(SVI))
1954 return I;
1955 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
1956 return I;
1957
1958 if (VWidth == LHSWidth) {
1959 // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1960 bool isLHSID, isRHSID;
1961 recognizeIdentityMask(Mask, isLHSID, isRHSID);
1962
1963 // Eliminate identity shuffles.
1964 if (isLHSID) return replaceInstUsesWith(SVI, LHS);
1965 if (isRHSID) return replaceInstUsesWith(SVI, RHS);
1966 }
1967
1968 if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
1969 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1970 return replaceInstUsesWith(SVI, V);
1971 }
1972
1973 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1974 // a non-vector type. We can instead bitcast the original vector followed by
1975 // an extract of the desired element:
1976 //
1977 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1978 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1979 // %1 = bitcast <4 x i8> %sroa to i32
1980 // Becomes:
1981 // %bc = bitcast <16 x i8> %in to <4 x i32>
1982 // %ext = extractelement <4 x i32> %bc, i32 0
1983 //
1984 // If the shuffle is extracting a contiguous range of values from the input
1985 // vector then each use which is a bitcast of the extracted size can be
1986 // replaced. This will work if the vector types are compatible, and the begin
1987 // index is aligned to a value in the casted vector type. If the begin index
1988 // isn't aligned then we can shuffle the original vector (keeping the same
1989 // vector type) before extracting.
1990 //
1991 // This code will bail out if the target type is fundamentally incompatible
1992 // with vectors of the source type.
1993 //
1994 // Example of <16 x i8>, target type i32:
1995 // Index range [4,8): v-----------v Will work.
1996 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1997 // <16 x i8>: | | | | | | | | | | | | | | | | |
1998 // <4 x i32>: | | | | |
1999 // +-----------+-----------+-----------+-----------+
2000 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2001 // Target type i40: ^--------------^ Won't work, bail.
2002 bool MadeChange = false;
2003 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2004 Value *V = LHS;
2005 unsigned MaskElems = Mask.size();
2006 VectorType *SrcTy = cast<VectorType>(V->getType());
2007 unsigned VecBitWidth = SrcTy->getBitWidth();
2008 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2009 assert(SrcElemBitWidth && "vector elements must have a bitwidth")((SrcElemBitWidth && "vector elements must have a bitwidth"
) ? static_cast<void> (0) : __assert_fail ("SrcElemBitWidth && \"vector elements must have a bitwidth\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2009, __PRETTY_FUNCTION__))
;
2010 unsigned SrcNumElems = SrcTy->getNumElements();
2011 SmallVector<BitCastInst *, 8> BCs;
2012 DenseMap<Type *, Value *> NewBCs;
2013 for (User *U : SVI.users())
2014 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2015 if (!BC->use_empty())
2016 // Only visit bitcasts that weren't previously handled.
2017 BCs.push_back(BC);
2018 for (BitCastInst *BC : BCs) {
2019 unsigned BegIdx = Mask.front();
2020 Type *TgtTy = BC->getDestTy();
2021 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2022 if (!TgtElemBitWidth)
2023 continue;
2024 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2025 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2026 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2027 if (!VecBitWidthsEqual)
2028 continue;
2029 if (!VectorType::isValidElementType(TgtTy))
2030 continue;
2031 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
2032 if (!BegIsAligned) {
2033 // Shuffle the input so [0,NumElements) contains the output, and
2034 // [NumElems,SrcNumElems) is undef.
2035 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
2036 UndefValue::get(Int32Ty));
2037 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2038 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
2039 V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
2040 ConstantVector::get(ShuffleMask),
2041 SVI.getName() + ".extract");
2042 BegIdx = 0;
2043 }
2044 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2045 assert(SrcElemsPerTgtElem)((SrcElemsPerTgtElem) ? static_cast<void> (0) : __assert_fail
("SrcElemsPerTgtElem", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2045, __PRETTY_FUNCTION__))
;
2046 BegIdx /= SrcElemsPerTgtElem;
2047 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2048 auto *NewBC =
2049 BCAlreadyExists
2050 ? NewBCs[CastSrcTy]
2051 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2052 if (!BCAlreadyExists)
2053 NewBCs[CastSrcTy] = NewBC;
2054 auto *Ext = Builder.CreateExtractElement(
2055 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2056 // The shufflevector isn't being replaced: the bitcast that used it
2057 // is. InstCombine will visit the newly-created instructions.
2058 replaceInstUsesWith(*BC, Ext);
2059 MadeChange = true;
2060 }
2061 }
2062
2063 // If the LHS is a shufflevector itself, see if we can combine it with this
2064 // one without producing an unusual shuffle.
2065 // Cases that might be simplified:
2066 // 1.
2067 // x1=shuffle(v1,v2,mask1)
2068 // x=shuffle(x1,undef,mask)
2069 // ==>
2070 // x=shuffle(v1,undef,newMask)
2071 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2072 // 2.
2073 // x1=shuffle(v1,undef,mask1)
2074 // x=shuffle(x1,x2,mask)
2075 // where v1.size() == mask1.size()
2076 // ==>
2077 // x=shuffle(v1,x2,newMask)
2078 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2079 // 3.
2080 // x2=shuffle(v2,undef,mask2)
2081 // x=shuffle(x1,x2,mask)
2082 // where v2.size() == mask2.size()
2083 // ==>
2084 // x=shuffle(x1,v2,newMask)
2085 // newMask[i] = (mask[i] < x1.size())
2086 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2087 // 4.
2088 // x1=shuffle(v1,undef,mask1)
2089 // x2=shuffle(v2,undef,mask2)
2090 // x=shuffle(x1,x2,mask)
2091 // where v1.size() == v2.size()
2092 // ==>
2093 // x=shuffle(v1,v2,newMask)
2094 // newMask[i] = (mask[i] < x1.size())
2095 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2096 //
2097 // Here we are really conservative:
2098 // we are absolutely afraid of producing a shuffle mask not in the input
2099 // program, because the code gen may not be smart enough to turn a merged
2100 // shuffle into two specific shuffles: it may produce worse code. As such,
2101 // we only merge two shuffles if the result is either a splat or one of the
2102 // input shuffle masks. In this case, merging the shuffles just removes
2103 // one instruction, which we know is safe. This is good for things like
2104 // turning: (splat(splat)) -> splat, or
2105 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2106 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2107 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2108 if (LHSShuffle)
2109 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
2110 LHSShuffle = nullptr;
2111 if (RHSShuffle)
2112 if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
2113 RHSShuffle = nullptr;
2114 if (!LHSShuffle && !RHSShuffle)
2115 return MadeChange ? &SVI : nullptr;
2116
2117 Value* LHSOp0 = nullptr;
2118 Value* LHSOp1 = nullptr;
2119 Value* RHSOp0 = nullptr;
2120 unsigned LHSOp0Width = 0;
2121 unsigned RHSOp0Width = 0;
2122 if (LHSShuffle) {
2123 LHSOp0 = LHSShuffle->getOperand(0);
2124 LHSOp1 = LHSShuffle->getOperand(1);
2125 LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
2126 }
2127 if (RHSShuffle) {
2128 RHSOp0 = RHSShuffle->getOperand(0);
2129 RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
2130 }
2131 Value* newLHS = LHS;
2132 Value* newRHS = RHS;
2133 if (LHSShuffle) {
2134 // case 1
2135 if (isa<UndefValue>(RHS)) {
2136 newLHS = LHSOp0;
2137 newRHS = LHSOp1;
2138 }
2139 // case 2 or 4
2140 else if (LHSOp0Width == LHSWidth) {
2141 newLHS = LHSOp0;
2142 }
2143 }
2144 // case 3 or 4
2145 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2146 newRHS = RHSOp0;
2147 }
2148 // case 4
2149 if (LHSOp0 == RHSOp0) {
2150 newLHS = LHSOp0;
2151 newRHS = nullptr;
2152 }
2153
2154 if (newLHS == LHS && newRHS == RHS)
2155 return MadeChange ? &SVI : nullptr;
2156
2157 SmallVector<int, 16> LHSMask;
2158 SmallVector<int, 16> RHSMask;
2159 if (newLHS != LHS)
2160 LHSMask = LHSShuffle->getShuffleMask();
2161 if (RHSShuffle && newRHS != RHS)
2162 RHSMask = RHSShuffle->getShuffleMask();
2163
2164 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2165 SmallVector<int, 16> newMask;
2166 bool isSplat = true;
2167 int SplatElt = -1;
2168 // Create a new mask for the new ShuffleVectorInst so that the new
2169 // ShuffleVectorInst is equivalent to the original one.
2170 for (unsigned i = 0; i < VWidth; ++i) {
2171 int eltMask;
2172 if (Mask[i] < 0) {
2173 // This element is an undef value.
2174 eltMask = -1;
2175 } else if (Mask[i] < (int)LHSWidth) {
2176 // This element is from left hand side vector operand.
2177 //
2178 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2179 // new mask value for the element.
2180 if (newLHS != LHS) {
2181 eltMask = LHSMask[Mask[i]];
2182 // If the value selected is an undef value, explicitly specify it
2183 // with a -1 mask value.
2184 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2185 eltMask = -1;
2186 } else
2187 eltMask = Mask[i];
2188 } else {
2189 // This element is from right hand side vector operand
2190 //
2191 // If the value selected is an undef value, explicitly specify it
2192 // with a -1 mask value. (case 1)
2193 if (isa<UndefValue>(RHS))
2194 eltMask = -1;
2195 // If RHS is going to be replaced (case 3 or 4), calculate the
2196 // new mask value for the element.
2197 else if (newRHS != RHS) {
2198 eltMask = RHSMask[Mask[i]-LHSWidth];
2199 // If the value selected is an undef value, explicitly specify it
2200 // with a -1 mask value.
2201 if (eltMask >= (int)RHSOp0Width) {
2202 assert(isa<UndefValue>(RHSShuffle->getOperand(1))((isa<UndefValue>(RHSShuffle->getOperand(1)) &&
"should have been check above") ? static_cast<void> (0
) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2203, __PRETTY_FUNCTION__))
2203 && "should have been check above")((isa<UndefValue>(RHSShuffle->getOperand(1)) &&
"should have been check above") ? static_cast<void> (0
) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2203, __PRETTY_FUNCTION__))
;
2204 eltMask = -1;
2205 }
2206 } else
2207 eltMask = Mask[i]-LHSWidth;
2208
2209 // If LHS's width is changed, shift the mask value accordingly.
2210 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2211 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2212 // If newRHS == newLHS, we want to remap any references from newRHS to
2213 // newLHS so that we can properly identify splats that may occur due to
2214 // obfuscation across the two vectors.
2215 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2216 eltMask += newLHSWidth;
2217 }
2218
2219 // Check if this could still be a splat.
2220 if (eltMask >= 0) {
2221 if (SplatElt >= 0 && SplatElt != eltMask)
2222 isSplat = false;
2223 SplatElt = eltMask;
2224 }
2225
2226 newMask.push_back(eltMask);
2227 }
2228
2229 // If the result mask is equal to one of the original shuffle masks,
2230 // or is a splat, do the replacement.
2231 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2232 SmallVector<Constant*, 16> Elts;
2233 for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2234 if (newMask[i] < 0) {
2235 Elts.push_back(UndefValue::get(Int32Ty));
2236 } else {
2237 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2238 }
2239 }
2240 if (!newRHS)
2241 newRHS = UndefValue::get(newLHS->getType());
2242 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2243 }
2244
2245 // If the result mask is an identity, replace uses of this instruction with
2246 // corresponding argument.
2247 bool isLHSID, isRHSID;
2248 recognizeIdentityMask(newMask, isLHSID, isRHSID);
2249 if (isLHSID && VWidth == LHSOp0Width) return replaceInstUsesWith(SVI, newLHS);
2250 if (isRHSID && VWidth == RHSOp0Width) return replaceInstUsesWith(SVI, newRHS);
2251
2252 return MadeChange ? &SVI : nullptr;
2253}

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/InstrTypes.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Intrinsics.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/Casting.h"
43#include <cstdint>
44
45namespace llvm {
46namespace PatternMatch {
47
48template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
49 return const_cast<Pattern &>(P).match(V);
16
Calling 'ThreeOps_match::match'
21
Returning from 'ThreeOps_match::match'
22
Returning the value 1, which participates in a condition later
50}
51
52template <typename SubPattern_t> struct OneUse_match {
53 SubPattern_t SubPattern;
54
55 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
56
57 template <typename OpTy> bool match(OpTy *V) {
58 return V->hasOneUse() && SubPattern.match(V);
59 }
60};
61
62template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
63 return SubPattern;
64}
65
66template <typename Class> struct class_match {
67 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
68};
69
70/// Match an arbitrary value and ignore it.
71inline class_match<Value> m_Value() { return class_match<Value>(); }
72
73/// Match an arbitrary binary operation and ignore it.
74inline class_match<BinaryOperator> m_BinOp() {
75 return class_match<BinaryOperator>();
76}
77
78/// Matches any compare instruction and ignore it.
79inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
80
81/// Match an arbitrary ConstantInt and ignore it.
82inline class_match<ConstantInt> m_ConstantInt() {
83 return class_match<ConstantInt>();
84}
85
86/// Match an arbitrary undef constant.
87inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
88
89/// Match an arbitrary Constant and ignore it.
90inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
91
92/// Match an arbitrary basic block value and ignore it.
93inline class_match<BasicBlock> m_BasicBlock() {
94 return class_match<BasicBlock>();
95}
96
97/// Inverting matcher
98template <typename Ty> struct match_unless {
99 Ty M;
100
101 match_unless(const Ty &Matcher) : M(Matcher) {}
102
103 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
104};
105
106/// Match if the inner matcher does *NOT* match.
107template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
108 return match_unless<Ty>(M);
109}
110
111/// Matching combinators
112template <typename LTy, typename RTy> struct match_combine_or {
113 LTy L;
114 RTy R;
115
116 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
117
118 template <typename ITy> bool match(ITy *V) {
119 if (L.match(V))
120 return true;
121 if (R.match(V))
122 return true;
123 return false;
124 }
125};
126
127template <typename LTy, typename RTy> struct match_combine_and {
128 LTy L;
129 RTy R;
130
131 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
132
133 template <typename ITy> bool match(ITy *V) {
134 if (L.match(V))
135 if (R.match(V))
136 return true;
137 return false;
138 }
139};
140
141/// Combine two pattern matchers matching L || R
142template <typename LTy, typename RTy>
143inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
144 return match_combine_or<LTy, RTy>(L, R);
145}
146
147/// Combine two pattern matchers matching L && R
148template <typename LTy, typename RTy>
149inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
150 return match_combine_and<LTy, RTy>(L, R);
151}
152
153struct apint_match {
154 const APInt *&Res;
155
156 apint_match(const APInt *&R) : Res(R) {}
157
158 template <typename ITy> bool match(ITy *V) {
159 if (auto *CI = dyn_cast<ConstantInt>(V)) {
160 Res = &CI->getValue();
161 return true;
162 }
163 if (V->getType()->isVectorTy())
164 if (const auto *C = dyn_cast<Constant>(V))
165 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
166 Res = &CI->getValue();
167 return true;
168 }
169 return false;
170 }
171};
172// Either constexpr if or renaming ConstantFP::getValueAPF to
173// ConstantFP::getValue is needed to do it via single template
174// function for both apint/apfloat.
175struct apfloat_match {
176 const APFloat *&Res;
177 apfloat_match(const APFloat *&R) : Res(R) {}
178 template <typename ITy> bool match(ITy *V) {
179 if (auto *CI = dyn_cast<ConstantFP>(V)) {
180 Res = &CI->getValueAPF();
181 return true;
182 }
183 if (V->getType()->isVectorTy())
184 if (const auto *C = dyn_cast<Constant>(V))
185 if (auto *CI = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) {
186 Res = &CI->getValueAPF();
187 return true;
188 }
189 return false;
190 }
191};
192
193/// Match a ConstantInt or splatted ConstantVector, binding the
194/// specified pointer to the contained APInt.
195inline apint_match m_APInt(const APInt *&Res) { return Res; }
196
197/// Match a ConstantFP or splatted ConstantVector, binding the
198/// specified pointer to the contained APFloat.
199inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; }
200
201template <int64_t Val> struct constantint_match {
202 template <typename ITy> bool match(ITy *V) {
203 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
204 const APInt &CIV = CI->getValue();
205 if (Val >= 0)
206 return CIV == static_cast<uint64_t>(Val);
207 // If Val is negative, and CI is shorter than it, truncate to the right
208 // number of bits. If it is larger, then we have to sign extend. Just
209 // compare their negated values.
210 return -CIV == -Val;
211 }
212 return false;
213 }
214};
215
216/// Match a ConstantInt with a specific value.
217template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
218 return constantint_match<Val>();
219}
220
221/// This helper class is used to match scalar and vector integer constants that
222/// satisfy a specified predicate.
223/// For vector constants, undefined elements are ignored.
224template <typename Predicate> struct cst_pred_ty : public Predicate {
225 template <typename ITy> bool match(ITy *V) {
226 if (const auto *CI = dyn_cast<ConstantInt>(V))
227 return this->isValue(CI->getValue());
228 if (V->getType()->isVectorTy()) {
229 if (const auto *C = dyn_cast<Constant>(V)) {
230 if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
231 return this->isValue(CI->getValue());
232
233 // Non-splat vector constant: check each element for a match.
234 unsigned NumElts = V->getType()->getVectorNumElements();
235 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/PatternMatch.h"
, 235, __PRETTY_FUNCTION__))
;
236 bool HasNonUndefElements = false;
237 for (unsigned i = 0; i != NumElts; ++i) {
238 Constant *Elt = C->getAggregateElement(i);
239 if (!Elt)
240 return false;
241 if (isa<UndefValue>(Elt))
242 continue;
243 auto *CI = dyn_cast<ConstantInt>(Elt);
244 if (!CI || !this->isValue(CI->getValue()))
245 return false;
246 HasNonUndefElements = true;
247 }
248 return HasNonUndefElements;
249 }
250 }
251 return false;
252 }
253};
254
255/// This helper class is used to match scalar and vector constants that
256/// satisfy a specified predicate, and bind them to an APInt.
257template <typename Predicate> struct api_pred_ty : public Predicate {
258 const APInt *&Res;
259
260 api_pred_ty(const APInt *&R) : Res(R) {}
261
262 template <typename ITy> bool match(ITy *V) {
263 if (const auto *CI = dyn_cast<ConstantInt>(V))
264 if (this->isValue(CI->getValue())) {
265 Res = &CI->getValue();
266 return true;
267 }
268 if (V->getType()->isVectorTy())
269 if (const auto *C = dyn_cast<Constant>(V))
270 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
271 if (this->isValue(CI->getValue())) {
272 Res = &CI->getValue();
273 return true;
274 }
275
276 return false;
277 }
278};
279
280/// This helper class is used to match scalar and vector floating-point
281/// constants that satisfy a specified predicate.
282/// For vector constants, undefined elements are ignored.
283template <typename Predicate> struct cstfp_pred_ty : public Predicate {
284 template <typename ITy> bool match(ITy *V) {
285 if (const auto *CF = dyn_cast<ConstantFP>(V))
286 return this->isValue(CF->getValueAPF());
287 if (V->getType()->isVectorTy()) {
288 if (const auto *C = dyn_cast<Constant>(V)) {
289 if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
290 return this->isValue(CF->getValueAPF());
291
292 // Non-splat vector constant: check each element for a match.
293 unsigned NumElts = V->getType()->getVectorNumElements();
294 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/PatternMatch.h"
, 294, __PRETTY_FUNCTION__))
;
295 bool HasNonUndefElements = false;
296 for (unsigned i = 0; i != NumElts; ++i) {
297 Constant *Elt = C->getAggregateElement(i);
298 if (!Elt)
299 return false;
300 if (isa<UndefValue>(Elt))
301 continue;
302 auto *CF = dyn_cast<ConstantFP>(Elt);
303 if (!CF || !this->isValue(CF->getValueAPF()))
304 return false;
305 HasNonUndefElements = true;
306 }
307 return HasNonUndefElements;
308 }
309 }
310 return false;
311 }
312};
313
314///////////////////////////////////////////////////////////////////////////////
315//
316// Encapsulate constant value queries for use in templated predicate matchers.
317// This allows checking if constants match using compound predicates and works
318// with vector constants, possibly with relaxed constraints. For example, ignore
319// undef values.
320//
321///////////////////////////////////////////////////////////////////////////////
322
323struct is_any_apint {
324 bool isValue(const APInt &C) { return true; }
325};
326/// Match an integer or vector with any integral constant.
327/// For vectors, this includes constants with undefined elements.
328inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
329 return cst_pred_ty<is_any_apint>();
330}
331
332struct is_all_ones {
333 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
334};
335/// Match an integer or vector with all bits set.
336/// For vectors, this includes constants with undefined elements.
337inline cst_pred_ty<is_all_ones> m_AllOnes() {
338 return cst_pred_ty<is_all_ones>();
339}
340
341struct is_maxsignedvalue {
342 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
343};
344/// Match an integer or vector with values having all bits except for the high
345/// bit set (0x7f...).
346/// For vectors, this includes constants with undefined elements.
347inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
348 return cst_pred_ty<is_maxsignedvalue>();
349}
350inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
351 return V;
352}
353
354struct is_negative {
355 bool isValue(const APInt &C) { return C.isNegative(); }
356};
357/// Match an integer or vector of negative values.
358/// For vectors, this includes constants with undefined elements.
359inline cst_pred_ty<is_negative> m_Negative() {
360 return cst_pred_ty<is_negative>();
361}
362inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
363 return V;
364}
365
366struct is_nonnegative {
367 bool isValue(const APInt &C) { return C.isNonNegative(); }
368};
369/// Match an integer or vector of nonnegative values.
370/// For vectors, this includes constants with undefined elements.
371inline cst_pred_ty<is_nonnegative> m_NonNegative() {
372 return cst_pred_ty<is_nonnegative>();
373}
374inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
375 return V;
376}
377
378struct is_one {
379 bool isValue(const APInt &C) { return C.isOneValue(); }
380};
381/// Match an integer 1 or a vector with all elements equal to 1.
382/// For vectors, this includes constants with undefined elements.
383inline cst_pred_ty<is_one> m_One() {
384 return cst_pred_ty<is_one>();
385}
386
387struct is_zero_int {
388 bool isValue(const APInt &C) { return C.isNullValue(); }
389};
390/// Match an integer 0 or a vector with all elements equal to 0.
391/// For vectors, this includes constants with undefined elements.
392inline cst_pred_ty<is_zero_int> m_ZeroInt() {
393 return cst_pred_ty<is_zero_int>();
394}
395
396struct is_zero {
397 template <typename ITy> bool match(ITy *V) {
398 auto *C = dyn_cast<Constant>(V);
399 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
400 }
401};
402/// Match any null constant or a vector with all elements equal to 0.
403/// For vectors, this includes constants with undefined elements.
404inline is_zero m_Zero() {
405 return is_zero();
406}
407
408struct is_power2 {
409 bool isValue(const APInt &C) { return C.isPowerOf2(); }
410};
411/// Match an integer or vector power-of-2.
412/// For vectors, this includes constants with undefined elements.
413inline cst_pred_ty<is_power2> m_Power2() {
414 return cst_pred_ty<is_power2>();
415}
416inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
417 return V;
418}
419
420struct is_negated_power2 {
421 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
422};
423/// Match a integer or vector negated power-of-2.
424/// For vectors, this includes constants with undefined elements.
425inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
426 return cst_pred_ty<is_negated_power2>();
427}
428inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
429 return V;
430}
431
432struct is_power2_or_zero {
433 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
434};
435/// Match an integer or vector of 0 or power-of-2 values.
436/// For vectors, this includes constants with undefined elements.
437inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
438 return cst_pred_ty<is_power2_or_zero>();
439}
440inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
441 return V;
442}
443
444struct is_sign_mask {
445 bool isValue(const APInt &C) { return C.isSignMask(); }
446};
447/// Match an integer or vector with only the sign bit(s) set.
448/// For vectors, this includes constants with undefined elements.
449inline cst_pred_ty<is_sign_mask> m_SignMask() {
450 return cst_pred_ty<is_sign_mask>();
451}
452
453struct is_lowbit_mask {
454 bool isValue(const APInt &C) { return C.isMask(); }
455};
456/// Match an integer or vector with only the low bit(s) set.
457/// For vectors, this includes constants with undefined elements.
458inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
459 return cst_pred_ty<is_lowbit_mask>();
460}
461
462struct icmp_pred_with_threshold {
463 ICmpInst::Predicate Pred;
464 const APInt *Thr;
465 bool isValue(const APInt &C) {
466 switch (Pred) {
467 case ICmpInst::Predicate::ICMP_EQ:
468 return C.eq(*Thr);
469 case ICmpInst::Predicate::ICMP_NE:
470 return C.ne(*Thr);
471 case ICmpInst::Predicate::ICMP_UGT:
472 return C.ugt(*Thr);
473 case ICmpInst::Predicate::ICMP_UGE:
474 return C.uge(*Thr);
475 case ICmpInst::Predicate::ICMP_ULT:
476 return C.ult(*Thr);
477 case ICmpInst::Predicate::ICMP_ULE:
478 return C.ule(*Thr);
479 case ICmpInst::Predicate::ICMP_SGT:
480 return C.sgt(*Thr);
481 case ICmpInst::Predicate::ICMP_SGE:
482 return C.sge(*Thr);
483 case ICmpInst::Predicate::ICMP_SLT:
484 return C.slt(*Thr);
485 case ICmpInst::Predicate::ICMP_SLE:
486 return C.sle(*Thr);
487 default:
488 llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate",
"/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/PatternMatch.h"
, 488)
;
489 }
490 }
491};
492/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
493/// to Threshold. For vectors, this includes constants with undefined elements.
494inline cst_pred_ty<icmp_pred_with_threshold>
495m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
496 cst_pred_ty<icmp_pred_with_threshold> P;
497 P.Pred = Predicate;
498 P.Thr = &Threshold;
499 return P;
500}
501
502struct is_nan {
503 bool isValue(const APFloat &C) { return C.isNaN(); }
504};
505/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
506/// For vectors, this includes constants with undefined elements.
507inline cstfp_pred_ty<is_nan> m_NaN() {
508 return cstfp_pred_ty<is_nan>();
509}
510
511struct is_any_zero_fp {
512 bool isValue(const APFloat &C) { return C.isZero(); }
513};
514/// Match a floating-point negative zero or positive zero.
515/// For vectors, this includes constants with undefined elements.
516inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
517 return cstfp_pred_ty<is_any_zero_fp>();
518}
519
520struct is_pos_zero_fp {
521 bool isValue(const APFloat &C) { return C.isPosZero(); }
522};
523/// Match a floating-point positive zero.
524/// For vectors, this includes constants with undefined elements.
525inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
526 return cstfp_pred_ty<is_pos_zero_fp>();
527}
528
529struct is_neg_zero_fp {
530 bool isValue(const APFloat &C) { return C.isNegZero(); }
531};
532/// Match a floating-point negative zero.
533/// For vectors, this includes constants with undefined elements.
534inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
535 return cstfp_pred_ty<is_neg_zero_fp>();
536}
537
538///////////////////////////////////////////////////////////////////////////////
539
540template <typename Class> struct bind_ty {
541 Class *&VR;
542
543 bind_ty(Class *&V) : VR(V) {}
544
545 template <typename ITy> bool match(ITy *V) {
546 if (auto *CV = dyn_cast<Class>(V)) {
547 VR = CV;
548 return true;
549 }
550 return false;
551 }
552};
553
554/// Match a value, capturing it if we match.
555inline bind_ty<Value> m_Value(Value *&V) { return V; }
556inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
557
558/// Match an instruction, capturing it if we match.
559inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
560/// Match a binary operator, capturing it if we match.
561inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
562/// Match a with overflow intrinsic, capturing it if we match.
563inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
564
565/// Match a ConstantInt, capturing the value if we match.
566inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
567
568/// Match a Constant, capturing the value if we match.
569inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
570
571/// Match a ConstantFP, capturing the value if we match.
572inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
573
574/// Match a basic block value, capturing it if we match.
575inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
576inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
577 return V;
578}
579
580/// Match a specified Value*.
581struct specificval_ty {
582 const Value *Val;
583
584 specificval_ty(const Value *V) : Val(V) {}
585
586 template <typename ITy> bool match(ITy *V) { return V == Val; }
587};
588
589/// Match if we have a specific specified value.
590inline specificval_ty m_Specific(const Value *V) { return V; }
591
592/// Stores a reference to the Value *, not the Value * itself,
593/// thus can be used in commutative matchers.
594template <typename Class> struct deferredval_ty {
595 Class *const &Val;
596
597 deferredval_ty(Class *const &V) : Val(V) {}
598
599 template <typename ITy> bool match(ITy *const V) { return V == Val; }
600};
601
602/// A commutative-friendly version of m_Specific().
603inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
604inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
605 return V;
606}
607
608/// Match a specified floating point value or vector of all elements of
609/// that value.
610struct specific_fpval {
611 double Val;
612
613 specific_fpval(double V) : Val(V) {}
614
615 template <typename ITy> bool match(ITy *V) {
616 if (const auto *CFP = dyn_cast<ConstantFP>(V))
617 return CFP->isExactlyValue(Val);
618 if (V->getType()->isVectorTy())
619 if (const auto *C = dyn_cast<Constant>(V))
620 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
621 return CFP->isExactlyValue(Val);
622 return false;
623 }
624};
625
626/// Match a specific floating point value or vector with all elements
627/// equal to the value.
628inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
629
630/// Match a float 1.0 or vector with all elements equal to 1.0.
631inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
632
633struct bind_const_intval_ty {
634 uint64_t &VR;
635
636 bind_const_intval_ty(uint64_t &V) : VR(V) {}
637
638 template <typename ITy> bool match(ITy *V) {
639 if (const auto *CV = dyn_cast<ConstantInt>(V))
640 if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) {
641 VR = CV->getZExtValue();
642 return true;
643 }
644 return false;
645 }
646};
647
648/// Match a specified integer value or vector of all elements of that
649/// value.
650struct specific_intval {
651 APInt Val;
652
653 specific_intval(APInt V) : Val(std::move(V)) {}
654
655 template <typename ITy> bool match(ITy *V) {
656 const auto *CI = dyn_cast<ConstantInt>(V);
657 if (!CI && V->getType()->isVectorTy())
658 if (const auto *C = dyn_cast<Constant>(V))
659 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
660
661 return CI && APInt::isSameValue(CI->getValue(), Val);
662 }
663};
664
665/// Match a specific integer value or vector with all elements equal to
666/// the value.
667inline specific_intval m_SpecificInt(APInt V) {
668 return specific_intval(std::move(V));
669}
670
671inline specific_intval m_SpecificInt(uint64_t V) {
672 return m_SpecificInt(APInt(64, V));
673}
674
675/// Match a ConstantInt and bind to its value. This does not match
676/// ConstantInts wider than 64-bits.
677inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
678
679/// Match a specified basic block value.
680struct specific_bbval {
681 BasicBlock *Val;
682
683 specific_bbval(BasicBlock *Val) : Val(Val) {}
684
685 template <typename ITy> bool match(ITy *V) {
686 const auto *BB = dyn_cast<BasicBlock>(V);
687 return BB && BB == Val;
688 }
689};
690
691/// Match a specific basic block value.
692inline specific_bbval m_SpecificBB(BasicBlock *BB) {
693 return specific_bbval(BB);
694}
695
696/// A commutative-friendly version of m_Specific().
697inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
698 return BB;
699}
700inline deferredval_ty<const BasicBlock>
701m_Deferred(const BasicBlock *const &BB) {
702 return BB;
703}
704
705//===----------------------------------------------------------------------===//
706// Matcher for any binary operator.
707//
708template <typename LHS_t, typename RHS_t, bool Commutable = false>
709struct AnyBinaryOp_match {
710 LHS_t L;
711 RHS_t R;
712
713 // The evaluation order is always stable, regardless of Commutability.
714 // The LHS is always matched first.
715 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
716
717 template <typename OpTy> bool match(OpTy *V) {
718 if (auto *I = dyn_cast<BinaryOperator>(V))
719 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
720 (Commutable && L.match(I->getOperand(1)) &&
721 R.match(I->getOperand(0)));
722 return false;
723 }
724};
725
726template <typename LHS, typename RHS>
727inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
728 return AnyBinaryOp_match<LHS, RHS>(L, R);
729}
730
731//===----------------------------------------------------------------------===//
732// Matchers for specific binary operators.
733//
734
735template <typename LHS_t, typename RHS_t, unsigned Opcode,
736 bool Commutable = false>
737struct BinaryOp_match {
738 LHS_t L;
739 RHS_t R;
740
741 // The evaluation order is always stable, regardless of Commutability.
742 // The LHS is always matched first.
743 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
744
745 template <typename OpTy> bool match(OpTy *V) {
746 if (V->getValueID() == Value::InstructionVal + Opcode) {
747 auto *I = cast<BinaryOperator>(V);
748 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
749 (Commutable && L.match(I->getOperand(1)) &&
750 R.match(I->getOperand(0)));
751 }
752 if (auto *CE = dyn_cast<ConstantExpr>(V))
753 return CE->getOpcode() == Opcode &&
754 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
755 (Commutable && L.match(CE->getOperand(1)) &&
756 R.match(CE->getOperand(0))));
757 return false;
758 }
759};
760
761template <typename LHS, typename RHS>
762inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
763 const RHS &R) {
764 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
765}
766
767template <typename LHS, typename RHS>
768inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
769 const RHS &R) {
770 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
771}
772
773template <typename LHS, typename RHS>
774inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
775 const RHS &R) {
776 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
777}
778
779template <typename LHS, typename RHS>
780inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
781 const RHS &R) {
782 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
783}
784
785template <typename Op_t> struct FNeg_match {
786 Op_t X;
787
788 FNeg_match(const Op_t &Op) : X(Op) {}
789 template <typename OpTy> bool match(OpTy *V) {
790 auto *FPMO = dyn_cast<FPMathOperator>(V);
791 if (!FPMO) return false;
792
793 if (FPMO->getOpcode() == Instruction::FNeg)
794 return X.match(FPMO->getOperand(0));
795
796 if (FPMO->getOpcode() == Instruction::FSub) {
797 if (FPMO->hasNoSignedZeros()) {
798 // With 'nsz', any zero goes.
799 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
800 return false;
801 } else {
802 // Without 'nsz', we need fsub -0.0, X exactly.
803 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
804 return false;
805 }
806
807 return X.match(FPMO->getOperand(1));
808 }
809
810 return false;
811 }
812};
813
814/// Match 'fneg X' as 'fsub -0.0, X'.
815template <typename OpTy>
816inline FNeg_match<OpTy>
817m_FNeg(const OpTy &X) {
818 return FNeg_match<OpTy>(X);
819}
820
821/// Match 'fneg X' as 'fsub +-0.0, X'.
822template <typename RHS>
823inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
824m_FNegNSZ(const RHS &X) {
825 return m_FSub(m_AnyZeroFP(), X);
826}
827
828template <typename Op_t> struct Freeze_match {
829 Op_t X;
830
831 Freeze_match(const Op_t &Op) : X(Op) {}
832 template <typename OpTy> bool match(OpTy *V) {
833 auto *I = dyn_cast<UnaryOperator>(V);
834 if (!I) return false;
835
836 if (isa<FreezeOperator>(I))
837 return X.match(I->getOperand(0));
838
839 return false;
840 }
841};
842
843/// Matches freeze.
844template <typename OpTy>
845inline Freeze_match<OpTy>
846m_Freeze(const OpTy &X) {
847 return Freeze_match<OpTy>(X);
848}
849
850template <typename LHS, typename RHS>
851inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
852 const RHS &R) {
853 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
854}
855
856template <typename LHS, typename RHS>
857inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
858 const RHS &R) {
859 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
860}
861
862template <typename LHS, typename RHS>
863inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
864 const RHS &R) {
865 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
866}
867
868template <typename LHS, typename RHS>
869inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
870 const RHS &R) {
871 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
872}
873
874template <typename LHS, typename RHS>
875inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
876 const RHS &R) {
877 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
878}
879
880template <typename LHS, typename RHS>
881inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
882 const RHS &R) {
883 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
884}
885
886template <typename LHS, typename RHS>
887inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
888 const RHS &R) {
889 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
890}
891
892template <typename LHS, typename RHS>
893inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
894 const RHS &R) {
895 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
896}
897
898template <typename LHS, typename RHS>
899inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
900 const RHS &R) {
901 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
902}
903
904template <typename LHS, typename RHS>
905inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
906 const RHS &R) {
907 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
908}
909
910template <typename LHS, typename RHS>
911inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
912 const RHS &R) {
913 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
914}
915
916template <typename LHS, typename RHS>
917inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
918 const RHS &R) {
919 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
920}
921
922template <typename LHS, typename RHS>
923inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
924 const RHS &R) {
925 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
926}
927
928template <typename LHS, typename RHS>
929inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
930 const RHS &R) {
931 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
932}
933
934template <typename LHS_t, typename RHS_t, unsigned Opcode,
935 unsigned WrapFlags = 0>
936struct OverflowingBinaryOp_match {
937 LHS_t L;
938 RHS_t R;
939
940 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
941 : L(LHS), R(RHS) {}
942
943 template <typename OpTy> bool match(OpTy *V) {
944 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
945 if (Op->getOpcode() != Opcode)
946 return false;
947 if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap &&
948 !Op->hasNoUnsignedWrap())
949 return false;
950 if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap &&
951 !Op->hasNoSignedWrap())
952 return false;
953 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
954 }
955 return false;
956 }
957};
958
959template <typename LHS, typename RHS>
960inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
961 OverflowingBinaryOperator::NoSignedWrap>
962m_NSWAdd(const LHS &L, const RHS &R) {
963 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
964 OverflowingBinaryOperator::NoSignedWrap>(
965 L, R);
966}
967template <typename LHS, typename RHS>
968inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
969 OverflowingBinaryOperator::NoSignedWrap>
970m_NSWSub(const LHS &L, const RHS &R) {
971 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
972 OverflowingBinaryOperator::NoSignedWrap>(
973 L, R);
974}
975template <typename LHS, typename RHS>
976inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
977 OverflowingBinaryOperator::NoSignedWrap>
978m_NSWMul(const LHS &L, const RHS &R) {
979 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
980 OverflowingBinaryOperator::NoSignedWrap>(
981 L, R);
982}
983template <typename LHS, typename RHS>
984inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
985 OverflowingBinaryOperator::NoSignedWrap>
986m_NSWShl(const LHS &L, const RHS &R) {
987 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
988 OverflowingBinaryOperator::NoSignedWrap>(
989 L, R);
990}
991
992template <typename LHS, typename RHS>
993inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
994 OverflowingBinaryOperator::NoUnsignedWrap>
995m_NUWAdd(const LHS &L, const RHS &R) {
996 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
997 OverflowingBinaryOperator::NoUnsignedWrap>(
998 L, R);
999}
1000template <typename LHS, typename RHS>
1001inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1002 OverflowingBinaryOperator::NoUnsignedWrap>
1003m_NUWSub(const LHS &L, const RHS &R) {
1004 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1005 OverflowingBinaryOperator::NoUnsignedWrap>(
1006 L, R);
1007}
1008template <typename LHS, typename RHS>
1009inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1010 OverflowingBinaryOperator::NoUnsignedWrap>
1011m_NUWMul(const LHS &L, const RHS &R) {
1012 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1013 OverflowingBinaryOperator::NoUnsignedWrap>(
1014 L, R);
1015}
1016template <typename LHS, typename RHS>
1017inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1018 OverflowingBinaryOperator::NoUnsignedWrap>
1019m_NUWShl(const LHS &L, const RHS &R) {
1020 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1021 OverflowingBinaryOperator::NoUnsignedWrap>(
1022 L, R);
1023}
1024
1025//===----------------------------------------------------------------------===//
1026// Class that matches a group of binary opcodes.
1027//
1028template <typename LHS_t, typename RHS_t, typename Predicate>
1029struct BinOpPred_match : Predicate {
1030 LHS_t L;
1031 RHS_t R;
1032
1033 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1034
1035 template <typename OpTy> bool match(OpTy *V) {
1036 if (auto *I = dyn_cast<Instruction>(V))
1037 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1038 R.match(I->getOperand(1));
1039 if (auto *CE = dyn_cast<ConstantExpr>(V))
1040 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1041 R.match(CE->getOperand(1));
1042 return false;
1043 }
1044};
1045
1046struct is_shift_op {
1047 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1048};
1049
1050struct is_right_shift_op {
1051 bool isOpType(unsigned Opcode) {
1052 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1053 }
1054};
1055
1056struct is_logical_shift_op {
1057 bool isOpType(unsigned Opcode) {
1058 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1059 }
1060};
1061
1062struct is_bitwiselogic_op {
1063 bool isOpType(unsigned Opcode) {
1064 return Instruction::isBitwiseLogicOp(Opcode);
1065 }
1066};
1067
1068struct is_idiv_op {
1069 bool isOpType(unsigned Opcode) {
1070 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1071 }
1072};
1073
1074struct is_irem_op {
1075 bool isOpType(unsigned Opcode) {
1076 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1077 }
1078};
1079
1080/// Matches shift operations.
1081template <typename LHS, typename RHS>
1082inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1083 const RHS &R) {
1084 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1085}
1086
1087/// Matches logical shift operations.
1088template <typename LHS, typename RHS>
1089inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1090 const RHS &R) {
1091 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1092}
1093
1094/// Matches logical shift operations.
1095template <typename LHS, typename RHS>
1096inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1097m_LogicalShift(const LHS &L, const RHS &R) {
1098 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1099}
1100
1101/// Matches bitwise logic operations.
1102template <typename LHS, typename RHS>
1103inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1104m_BitwiseLogic(const LHS &L, const RHS &R) {
1105 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1106}
1107
1108/// Matches integer division operations.
1109template <typename LHS, typename RHS>
1110inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1111 const RHS &R) {
1112 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1113}
1114
1115/// Matches integer remainder operations.
1116template <typename LHS, typename RHS>
1117inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1118 const RHS &R) {
1119 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1120}
1121
1122//===----------------------------------------------------------------------===//
1123// Class that matches exact binary ops.
1124//
1125template <typename SubPattern_t> struct Exact_match {
1126 SubPattern_t SubPattern;
1127
1128 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1129
1130 template <typename OpTy> bool match(OpTy *V) {
1131 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1132 return PEO->isExact() && SubPattern.match(V);
1133 return false;
1134 }
1135};
1136
1137template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1138 return SubPattern;
1139}
1140
1141//===----------------------------------------------------------------------===//
1142// Matchers for CmpInst classes
1143//
1144
1145template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1146 bool Commutable = false>
1147struct CmpClass_match {
1148 PredicateTy &Predicate;
1149 LHS_t L;
1150 RHS_t R;
1151
1152 // The evaluation order is always stable, regardless of Commutability.
1153 // The LHS is always matched first.
1154 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1155 : Predicate(Pred), L(LHS), R(RHS) {}
1156
1157 template <typename OpTy> bool match(OpTy *V) {
1158 if (auto *I = dyn_cast<Class>(V))
1159 if ((L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
1160 (Commutable && L.match(I->getOperand(1)) &&
1161 R.match(I->getOperand(0)))) {
1162 Predicate = I->getPredicate();
1163 return true;
1164 }
1165 return false;
1166 }
1167};
1168
1169template <typename LHS, typename RHS>
1170inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1171m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1172 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1173}
1174
1175template <typename LHS, typename RHS>
1176inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1177m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1178 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1179}
1180
1181template <typename LHS, typename RHS>
1182inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1183m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1184 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1185}
1186
1187//===----------------------------------------------------------------------===//
1188// Matchers for instructions with a given opcode and number of operands.
1189//
1190
1191/// Matches instructions with Opcode and three operands.
1192template <typename T0, unsigned Opcode> struct OneOps_match {
1193 T0 Op1;
1194
1195 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1196
1197 template <typename OpTy> bool match(OpTy *V) {
1198 if (V->getValueID() == Value::InstructionVal + Opcode) {
1199 auto *I = cast<Instruction>(V);
1200 return Op1.match(I->getOperand(0));
1201 }
1202 return false;
1203 }
1204};
1205
1206/// Matches instructions with Opcode and three operands.
1207template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1208 T0 Op1;
1209 T1 Op2;
1210
1211 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1212
1213 template <typename OpTy> bool match(OpTy *V) {
1214 if (V->getValueID() == Value::InstructionVal + Opcode) {
1215 auto *I = cast<Instruction>(V);
1216 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1217 }
1218 return false;
1219 }
1220};
1221
1222/// Matches instructions with Opcode and three operands.
1223template <typename T0, typename T1, typename T2, unsigned Opcode>
1224struct ThreeOps_match {
1225 T0 Op1;
1226 T1 Op2;
1227 T2 Op3;
1228
1229 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1230 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1231
1232 template <typename OpTy> bool match(OpTy *V) {
1233 if (V->getValueID() == Value::InstructionVal + Opcode) {
17
Assuming the condition is true
18
Taking true branch
1234 auto *I = cast<Instruction>(V);
19
'V' is a 'Instruction'
1235 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
20
Returning the value 1, which participates in a condition later
1236 Op3.match(I->getOperand(2));
1237 }
1238 return false;
1239 }
1240};
1241
1242/// Matches SelectInst.
1243template <typename Cond, typename LHS, typename RHS>
1244inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1245m_Select(const Cond &C, const LHS &L, const RHS &R) {
1246 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1247}
1248
1249/// This matches a select of two constants, e.g.:
1250/// m_SelectCst<-1, 0>(m_Value(V))
1251template <int64_t L, int64_t R, typename Cond>
1252inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1253 Instruction::Select>
1254m_SelectCst(const Cond &C) {
1255 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1256}
1257
1258/// Matches InsertElementInst.
1259template <typename Val_t, typename Elt_t, typename Idx_t>
1260inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1261m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1262 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1263 Val, Elt, Idx);
1264}
1265
1266/// Matches ExtractElementInst.
1267template <typename Val_t, typename Idx_t>
1268inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1269m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
1270 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1271}
1272
1273/// Matches ShuffleVectorInst.
1274template <typename V1_t, typename V2_t, typename Mask_t>
1275inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>
1276m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
1277 return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2,
1278 m);
1279}
1280
1281/// Matches LoadInst.
1282template <typename OpTy>
1283inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1284 return OneOps_match<OpTy, Instruction::Load>(Op);
1285}
1286
1287/// Matches StoreInst.
1288template <typename ValueOpTy, typename PointerOpTy>
1289inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1290m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1291 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1292 PointerOp);
1293}
1294
1295//===----------------------------------------------------------------------===//
1296// Matchers for CastInst classes
1297//
1298
1299template <typename Op_t, unsigned Opcode> struct CastClass_match {
1300 Op_t Op;
1301
1302 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1303
1304 template <typename OpTy> bool match(OpTy *V) {
1305 if (auto *O = dyn_cast<Operator>(V))
1306 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1307 return false;
1308 }
1309};
1310
1311/// Matches BitCast.
1312template <typename OpTy>
1313inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1314 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1315}
1316
1317/// Matches PtrToInt.
1318template <typename OpTy>
1319inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1320 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1321}
1322
1323/// Matches Trunc.
1324template <typename OpTy>
1325inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1326 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1327}
1328
1329template <typename OpTy>
1330inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1331m_TruncOrSelf(const OpTy &Op) {
1332 return m_CombineOr(m_Trunc(Op), Op);
1333}
1334
1335/// Matches SExt.
1336template <typename OpTy>
1337inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1338 return CastClass_match<OpTy, Instruction::SExt>(Op);
1339}
1340
1341/// Matches ZExt.
1342template <typename OpTy>
1343inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1344 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1345}
1346
1347template <typename OpTy>
1348inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1349m_ZExtOrSelf(const OpTy &Op) {
1350 return m_CombineOr(m_ZExt(Op), Op);
1351}
1352
1353template <typename OpTy>
1354inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1355m_SExtOrSelf(const OpTy &Op) {
1356 return m_CombineOr(m_SExt(Op), Op);
1357}
1358
1359template <typename OpTy>
1360inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1361 CastClass_match<OpTy, Instruction::SExt>>
1362m_ZExtOrSExt(const OpTy &Op) {
1363 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1364}
1365
1366template <typename OpTy>
1367inline match_combine_or<
1368 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1369 CastClass_match<OpTy, Instruction::SExt>>,
1370 OpTy>
1371m_ZExtOrSExtOrSelf(const OpTy &Op) {
1372 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1373}
1374
1375/// Matches UIToFP.
1376template <typename OpTy>
1377inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1378 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1379}
1380
1381/// Matches SIToFP.
1382template <typename OpTy>
1383inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1384 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1385}
1386
1387/// Matches FPTrunc
1388template <typename OpTy>
1389inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1390 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1391}
1392
1393/// Matches FPExt
1394template <typename OpTy>
1395inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1396 return CastClass_match<OpTy, Instruction::FPExt>(Op);
1397}
1398
1399//===----------------------------------------------------------------------===//
1400// Matchers for control flow.
1401//
1402
1403struct br_match {
1404 BasicBlock *&Succ;
1405
1406 br_match(BasicBlock *&Succ) : Succ(Succ) {}
1407
1408 template <typename OpTy> bool match(OpTy *V) {
1409 if (auto *BI = dyn_cast<BranchInst>(V))
1410 if (BI->isUnconditional()) {
1411 Succ = BI->getSuccessor(0);
1412 return true;
1413 }
1414 return false;
1415 }
1416};
1417
1418inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1419
1420template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1421struct brc_match {
1422 Cond_t Cond;
1423 TrueBlock_t T;
1424 FalseBlock_t F;
1425
1426 brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1427 : Cond(C), T(t), F(f) {}
1428
1429 template <typename OpTy> bool match(OpTy *V) {
1430 if (auto *BI = dyn_cast<BranchInst>(V))
1431 if (BI->isConditional() && Cond.match(BI->getCondition()))
1432 return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1433 return false;
1434 }
1435};
1436
1437template <typename Cond_t>
1438inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1439m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1440 return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1441 C, m_BasicBlock(T), m_BasicBlock(F));
1442}
1443
1444template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1445inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1446m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1447 return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1448}
1449
1450//===----------------------------------------------------------------------===//
1451// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1452//
1453
1454template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1455 bool Commutable = false>
1456struct MaxMin_match {
1457 LHS_t L;
1458 RHS_t R;
1459
1460 // The evaluation order is always stable, regardless of Commutability.
1461 // The LHS is always matched first.
1462 MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1463
1464 template <typename OpTy> bool match(OpTy *V) {
1465 // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1466 auto *SI = dyn_cast<SelectInst>(V);
1467 if (!SI)
1468 return false;
1469 auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1470 if (!Cmp)
1471 return false;
1472 // At this point we have a select conditioned on a comparison. Check that
1473 // it is the values returned by the select that are being compared.
1474 Value *TrueVal = SI->getTrueValue();
1475 Value *FalseVal = SI->getFalseValue();
1476 Value *LHS = Cmp->getOperand(0);
1477 Value *RHS = Cmp->getOperand(1);
1478 if ((TrueVal != LHS || FalseVal != RHS) &&
1479 (TrueVal != RHS || FalseVal != LHS))
1480 return false;
1481 typename CmpInst_t::Predicate Pred =
1482 LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1483 // Does "(x pred y) ? x : y" represent the desired max/min operation?
1484 if (!Pred_t::match(Pred))
1485 return false;
1486 // It does! Bind the operands.
1487 return (L.match(LHS) && R.match(RHS)) ||
1488 (Commutable && L.match(RHS) && R.match(LHS));
1489 }
1490};
1491
1492/// Helper class for identifying signed max predicates.
1493struct smax_pred_ty {
1494 static bool match(ICmpInst::Predicate Pred) {
1495 return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1496 }
1497};
1498
1499/// Helper class for identifying signed min predicates.
1500struct smin_pred_ty {
1501 static bool match(ICmpInst::Predicate Pred) {
1502 return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1503 }
1504};
1505
1506/// Helper class for identifying unsigned max predicates.
1507struct umax_pred_ty {
1508 static bool match(ICmpInst::Predicate Pred) {
1509 return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1510 }
1511};
1512
1513/// Helper class for identifying unsigned min predicates.
1514struct umin_pred_ty {
1515 static bool match(ICmpInst::Predicate Pred) {
1516 return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1517 }
1518};
1519
1520/// Helper class for identifying ordered max predicates.
1521struct ofmax_pred_ty {
1522 static bool match(FCmpInst::Predicate Pred) {
1523 return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1524 }
1525};
1526
1527/// Helper class for identifying ordered min predicates.
1528struct ofmin_pred_ty {
1529 static bool match(FCmpInst::Predicate Pred) {
1530 return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1531 }
1532};
1533
1534/// Helper class for identifying unordered max predicates.
1535struct ufmax_pred_ty {
1536 static bool match(FCmpInst::Predicate Pred) {
1537 return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1538 }
1539};
1540
1541/// Helper class for identifying unordered min predicates.
1542struct ufmin_pred_ty {
1543 static bool match(FCmpInst::Predicate Pred) {
1544 return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1545 }
1546};
1547
1548template <typename LHS, typename RHS>
1549inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1550 const RHS &R) {
1551 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1552}
1553
1554template <typename LHS, typename RHS>
1555inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1556 const RHS &R) {
1557 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1558}
1559
1560template <typename LHS, typename RHS>
1561inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1562 const RHS &R) {
1563 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1564}
1565
1566template <typename LHS, typename RHS>
1567inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1568 const RHS &R) {
1569 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1570}
1571
1572/// Match an 'ordered' floating point maximum function.
1573/// Floating point has one special value 'NaN'. Therefore, there is no total
1574/// order. However, if we can ignore the 'NaN' value (for example, because of a
1575/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1576/// semantics. In the presence of 'NaN' we have to preserve the original
1577/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1578///
1579/// max(L, R) iff L and R are not NaN
1580/// m_OrdFMax(L, R) = R iff L or R are NaN
1581template <typename LHS, typename RHS>
1582inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1583 const RHS &R) {
1584 return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1585}
1586
1587/// Match an 'ordered' floating point minimum function.
1588/// Floating point has one special value 'NaN'. Therefore, there is no total
1589/// order. However, if we can ignore the 'NaN' value (for example, because of a
1590/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1591/// semantics. In the presence of 'NaN' we have to preserve the original
1592/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1593///
1594/// min(L, R) iff L and R are not NaN
1595/// m_OrdFMin(L, R) = R iff L or R are NaN
1596template <typename LHS, typename RHS>
1597inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1598 const RHS &R) {
1599 return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1600}
1601
1602/// Match an 'unordered' floating point maximum function.
1603/// Floating point has one special value 'NaN'. Therefore, there is no total
1604/// order. However, if we can ignore the 'NaN' value (for example, because of a
1605/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1606/// semantics. In the presence of 'NaN' we have to preserve the original
1607/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1608///
1609/// max(L, R) iff L and R are not NaN
1610/// m_UnordFMax(L, R) = L iff L or R are NaN
1611template <typename LHS, typename RHS>
1612inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1613m_UnordFMax(const LHS &L, const RHS &R) {
1614 return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1615}
1616
1617/// Match an 'unordered' floating point minimum function.
1618/// Floating point has one special value 'NaN'. Therefore, there is no total
1619/// order. However, if we can ignore the 'NaN' value (for example, because of a
1620/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1621/// semantics. In the presence of 'NaN' we have to preserve the original
1622/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1623///
1624/// min(L, R) iff L and R are not NaN
1625/// m_UnordFMin(L, R) = L iff L or R are NaN
1626template <typename LHS, typename RHS>
1627inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1628m_UnordFMin(const LHS &L, const RHS &R) {
1629 return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1630}
1631
1632//===----------------------------------------------------------------------===//
1633// Matchers for overflow check patterns: e.g. (a + b) u< a
1634//
1635
1636template <typename LHS_t, typename RHS_t, typename Sum_t>
1637struct UAddWithOverflow_match {
1638 LHS_t L;
1639 RHS_t R;
1640 Sum_t S;
1641
1642 UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1643 : L(L), R(R), S(S) {}
1644
1645 template <typename OpTy> bool match(OpTy *V) {
1646 Value *ICmpLHS, *ICmpRHS;
1647 ICmpInst::Predicate Pred;
1648 if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1649 return false;
1650
1651 Value *AddLHS, *AddRHS;
1652 auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1653
1654 // (a + b) u< a, (a + b) u< b
1655 if (Pred == ICmpInst::ICMP_ULT)
1656 if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1657 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1658
1659 // a >u (a + b), b >u (a + b)
1660 if (Pred == ICmpInst::ICMP_UGT)
1661 if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1662 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1663
1664 // Match special-case for increment-by-1.
1665 if (Pred == ICmpInst::ICMP_EQ) {
1666 // (a + 1) == 0
1667 // (1 + a) == 0
1668 if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
1669 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1670 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1671 // 0 == (a + 1)
1672 // 0 == (1 + a)
1673 if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
1674 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1675 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1676 }
1677
1678 return false;
1679 }
1680};
1681
1682/// Match an icmp instruction checking for unsigned overflow on addition.
1683///
1684/// S is matched to the addition whose result is being checked for overflow, and
1685/// L and R are matched to the LHS and RHS of S.
1686template <typename LHS_t, typename RHS_t, typename Sum_t>
1687UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
1688m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
1689 return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
1690}
1691
1692template <typename Opnd_t> struct Argument_match {
1693 unsigned OpI;
1694 Opnd_t Val;
1695
1696 Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
1697
1698 template <typename OpTy> bool match(OpTy *V) {
1699 // FIXME: Should likely be switched to use `CallBase`.
1700 if (const auto *CI = dyn_cast<CallInst>(V))
1701 return Val.match(CI->getArgOperand(OpI));
1702 return false;
1703 }
1704};
1705
1706/// Match an argument.
1707template <unsigned OpI, typename Opnd_t>
1708inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
1709 return Argument_match<Opnd_t>(OpI, Op);
1710}
1711
1712/// Intrinsic matchers.
1713struct IntrinsicID_match {
1714 unsigned ID;
1715
1716 IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
1717
1718 template <typename OpTy> bool match(OpTy *V) {
1719 if (const auto *CI = dyn_cast<CallInst>(V))
1720 if (const auto *F = CI->getCalledFunction())
1721 return F->getIntrinsicID() == ID;
1722 return false;
1723 }
1724};
1725
1726/// Intrinsic matches are combinations of ID matchers, and argument
1727/// matchers. Higher arity matcher are defined recursively in terms of and-ing
1728/// them with lower arity matchers. Here's some convenient typedefs for up to
1729/// several arguments, and more can be added as needed
1730template <typename T0 = void, typename T1 = void, typename T2 = void,
1731 typename T3 = void, typename T4 = void, typename T5 = void,
1732 typename T6 = void, typename T7 = void, typename T8 = void,
1733 typename T9 = void, typename T10 = void>
1734struct m_Intrinsic_Ty;
1735template <typename T0> struct m_Intrinsic_Ty<T0> {
1736 using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
1737};
1738template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
1739 using Ty =
1740 match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
1741};
1742template <typename T0, typename T1, typename T2>
1743struct m_Intrinsic_Ty<T0, T1, T2> {
1744 using Ty =
1745 match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
1746 Argument_match<T2>>;
1747};
1748template <typename T0, typename T1, typename T2, typename T3>
1749struct m_Intrinsic_Ty<T0, T1, T2, T3> {
1750 using Ty =
1751 match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
1752 Argument_match<T3>>;
1753};
1754
1755/// Match intrinsic calls like this:
1756/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
1757template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
1758 return IntrinsicID_match(IntrID);
1759}
1760
1761template <Intrinsic::ID IntrID, typename T0>
1762inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
1763 return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
1764}
1765
1766template <Intrinsic::ID IntrID, typename T0, typename T1>
1767inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
1768 const T1 &Op1) {
1769 return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
1770}
1771
1772template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
1773inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
1774m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
1775 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
1776}
1777
1778template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
1779 typename T3>
1780inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
1781m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
1782 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
1783}
1784
1785// Helper intrinsic matching specializations.
1786template <typename Opnd0>
1787inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
1788 return m_Intrinsic<Intrinsic::bitreverse>(Op0);
1789}
1790
1791template <typename Opnd0>
1792inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
1793 return m_Intrinsic<Intrinsic::bswap>(Op0);
1794}
1795
1796template <typename Opnd0>
1797inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
1798 return m_Intrinsic<Intrinsic::fabs>(Op0);
1799}
1800
1801template <typename Opnd0>
1802inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
1803 return m_Intrinsic<Intrinsic::canonicalize>(Op0);
1804}
1805
1806template <typename Opnd0, typename Opnd1>
1807inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
1808 const Opnd1 &Op1) {
1809 return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
1810}
1811
1812template <typename Opnd0, typename Opnd1>
1813inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
1814 const Opnd1 &Op1) {
1815 return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
1816}
1817
1818//===----------------------------------------------------------------------===//
1819// Matchers for two-operands operators with the operators in either order
1820//
1821
1822/// Matches a BinaryOperator with LHS and RHS in either order.
1823template <typename LHS, typename RHS>
1824inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
1825 return AnyBinaryOp_match<LHS, RHS, true>(L, R);
1826}
1827
1828/// Matches an ICmp with a predicate over LHS and RHS in either order.
1829/// Does not swap the predicate.
1830template <typename LHS, typename RHS>
1831inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
1832m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1833 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
1834 R);
1835}
1836
1837/// Matches a Add with LHS and RHS in either order.
1838template <typename LHS, typename RHS>
1839inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
1840 const RHS &R) {
1841 return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
1842}
1843
1844/// Matches a Mul with LHS and RHS in either order.
1845template <typename LHS, typename RHS>
1846inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
1847 const RHS &R) {
1848 return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
1849}
1850
1851/// Matches an And with LHS and RHS in either order.
1852template <typename LHS, typename RHS>
1853inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
1854 const RHS &R) {
1855 return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
1856}
1857
1858/// Matches an Or with LHS and RHS in either order.
1859template <typename LHS, typename RHS>
1860inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
1861 const RHS &R) {
1862 return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
1863}
1864
1865/// Matches an Xor with LHS and RHS in either order.
1866template <typename LHS, typename RHS>
1867inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
1868 const RHS &R) {
1869 return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
1870}
1871
1872/// Matches a 'Neg' as 'sub 0, V'.
1873template <typename ValTy>
1874inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
1875m_Neg(const ValTy &V) {
1876 return m_Sub(m_ZeroInt(), V);
1877}
1878
1879/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
1880template <typename ValTy>
1881inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
1882m_Not(const ValTy &V) {
1883 return m_c_Xor(V, m_AllOnes());
1884}
1885
1886/// Matches an SMin with LHS and RHS in either order.
1887template <typename LHS, typename RHS>
1888inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
1889m_c_SMin(const LHS &L, const RHS &R) {
1890 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
1891}
1892/// Matches an SMax with LHS and RHS in either order.
1893template <typename LHS, typename RHS>
1894inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
1895m_c_SMax(const LHS &L, const RHS &R) {
1896 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
1897}
1898/// Matches a UMin with LHS and RHS in either order.
1899template <typename LHS, typename RHS>
1900inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
1901m_c_UMin(const LHS &L, const RHS &R) {
1902 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
1903}
1904/// Matches a UMax with LHS and RHS in either order.
1905template <typename LHS, typename RHS>
1906inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
1907m_c_UMax(const LHS &L, const RHS &R) {
1908 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
1909}
1910
1911/// Matches FAdd with LHS and RHS in either order.
1912template <typename LHS, typename RHS>
1913inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
1914m_c_FAdd(const LHS &L, const RHS &R) {
1915 return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
1916}
1917
1918/// Matches FMul with LHS and RHS in either order.
1919template <typename LHS, typename RHS>
1920inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
1921m_c_FMul(const LHS &L, const RHS &R) {
1922 return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
1923}
1924
1925template <typename Opnd_t> struct Signum_match {
1926 Opnd_t Val;
1927 Signum_match(const Opnd_t &V) : Val(V) {}
1928
1929 template <typename OpTy> bool match(OpTy *V) {
1930 unsigned TypeSize = V->getType()->getScalarSizeInBits();
1931 if (TypeSize == 0)
1932 return false;
1933
1934 unsigned ShiftWidth = TypeSize - 1;
1935 Value *OpL = nullptr, *OpR = nullptr;
1936
1937 // This is the representation of signum we match:
1938 //
1939 // signum(x) == (x >> 63) | (-x >>u 63)
1940 //
1941 // An i1 value is its own signum, so it's correct to match
1942 //
1943 // signum(x) == (x >> 0) | (-x >>u 0)
1944 //
1945 // for i1 values.
1946
1947 auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
1948 auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
1949 auto Signum = m_Or(LHS, RHS);
1950
1951 return Signum.match(V) && OpL == OpR && Val.match(OpL);
1952 }
1953};
1954
1955/// Matches a signum pattern.
1956///
1957/// signum(x) =
1958/// x > 0 -> 1
1959/// x == 0 -> 0
1960/// x < 0 -> -1
1961template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
1962 return Signum_match<Val_t>(V);
1963}
1964
1965template <int Ind, typename Opnd_t> struct ExtractValue_match {
1966 Opnd_t Val;
1967 ExtractValue_match(const Opnd_t &V) : Val(V) {}
1968
1969 template <typename OpTy> bool match(OpTy *V) {
1970 if (auto *I = dyn_cast<ExtractValueInst>(V))
1971 return I->getNumIndices() == 1 && I->getIndices()[0] == Ind &&
1972 Val.match(I->getAggregateOperand());
1973 return false;
1974 }
1975};
1976
1977/// Match a single index ExtractValue instruction.
1978/// For example m_ExtractValue<1>(...)
1979template <int Ind, typename Val_t>
1980inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
1981 return ExtractValue_match<Ind, Val_t>(V);
1982}
1983
1984} // end namespace PatternMatch
1985} // end namespace llvm
1986
1987#endif // LLVM_IR_PATTERNMATCH_H