Bug Summary

File:llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Warning:line 196, column 39
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InstCombineVectorOps.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Transforms/InstCombine -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/InstructionSimplify.h"
21#include "llvm/Analysis/VectorUtils.h"
22#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/Constant.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Operator.h"
30#include "llvm/IR/PatternMatch.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/User.h"
33#include "llvm/IR/Value.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40#include <utility>
41
42using namespace llvm;
43using namespace PatternMatch;
44
45#define DEBUG_TYPE"instcombine" "instcombine"
46
47/// Return true if the value is cheaper to scalarize than it is to leave as a
48/// vector operation. IsConstantExtractIndex indicates whether we are extracting
49/// one known element from a vector constant.
50///
51/// FIXME: It's possible to create more instructions than previously existed.
52static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53 // If we can pick a scalar constant value out of a vector, that is free.
54 if (auto *C = dyn_cast<Constant>(V))
55 return IsConstantExtractIndex || C->getSplatValue();
56
57 // An insertelement to the same constant index as our extract will simplify
58 // to the scalar inserted element. An insertelement to a different constant
59 // index is irrelevant to our extract.
60 if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
61 return IsConstantExtractIndex;
62
63 if (match(V, m_OneUse(m_Load(m_Value()))))
64 return true;
65
66 Value *V0, *V1;
67 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69 cheapToScalarize(V1, IsConstantExtractIndex))
70 return true;
71
72 CmpInst::Predicate UnusedPred;
73 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75 cheapToScalarize(V1, IsConstantExtractIndex))
76 return true;
77
78 return false;
79}
80
81// If we have a PHI node with a vector type that is only used to feed
82// itself and be an operand of extractelement at a constant location,
83// try to replace the PHI of the vector type with a PHI of a scalar type.
84Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
85 SmallVector<Instruction *, 2> Extracts;
86 // The users we want the PHI to have are:
87 // 1) The EI ExtractElement (we already know this)
88 // 2) Possibly more ExtractElements with the same index.
89 // 3) Another operand, which will feed back into the PHI.
90 Instruction *PHIUser = nullptr;
91 for (auto U : PN->users()) {
92 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93 if (EI.getIndexOperand() == EU->getIndexOperand())
94 Extracts.push_back(EU);
95 else
96 return nullptr;
97 } else if (!PHIUser) {
98 PHIUser = cast<Instruction>(U);
99 } else {
100 return nullptr;
101 }
102 }
103
104 if (!PHIUser)
105 return nullptr;
106
107 // Verify that this PHI user has one use, which is the PHI itself,
108 // and that it is a binary operation which is cheap to scalarize.
109 // otherwise return nullptr.
110 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
111 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
112 return nullptr;
113
114 // Create a scalar PHI node that will replace the vector PHI node
115 // just before the current PHI node.
116 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118 // Scalarize each PHI operand.
119 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
120 Value *PHIInVal = PN->getIncomingValue(i);
121 BasicBlock *inBB = PN->getIncomingBlock(i);
122 Value *Elt = EI.getIndexOperand();
123 // If the operand is the PHI induction variable:
124 if (PHIInVal == PHIUser) {
125 // Scalarize the binary operation. Its first operand is the
126 // scalar PHI, and the second operand is extracted from the other
127 // vector operand.
128 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
130 Value *Op = InsertNewInstWith(
131 ExtractElementInst::Create(B0->getOperand(opId), Elt,
132 B0->getOperand(opId)->getName() + ".Elt"),
133 *B0);
134 Value *newPHIUser = InsertNewInstWith(
135 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
136 scalarPHI, Op, B0), *B0);
137 scalarPHI->addIncoming(newPHIUser, inBB);
138 } else {
139 // Scalarize PHI input:
140 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141 // Insert the new instruction into the predecessor basic block.
142 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143 BasicBlock::iterator InsertPos;
144 if (pos && !isa<PHINode>(pos)) {
145 InsertPos = ++pos->getIterator();
146 } else {
147 InsertPos = inBB->getFirstInsertionPt();
148 }
149
150 InsertNewInstWith(newEI, *InsertPos);
151
152 scalarPHI->addIncoming(newEI, inBB);
153 }
154 }
155
156 for (auto E : Extracts)
157 replaceInstUsesWith(*E, scalarPHI);
158
159 return &EI;
160}
161
162static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
163 InstCombiner::BuilderTy &Builder,
164 bool IsBigEndian) {
165 Value *X;
166 uint64_t ExtIndexC;
167 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
9
Taking false branch
168 !X->getType()->isVectorTy() ||
169 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
170 return nullptr;
171
172 // If this extractelement is using a bitcast from a vector of the same number
173 // of elements, see if we can find the source element from the source vector:
174 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175 Type *SrcTy = X->getType();
176 Type *DestTy = Ext.getType();
177 unsigned NumSrcElts = SrcTy->getVectorNumElements();
10
'NumSrcElts' initialized here
178 unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179 if (NumSrcElts == NumElts)
11
Assuming 'NumSrcElts' is not equal to 'NumElts'
12
Taking false branch
180 if (Value *Elt = findScalarElement(X, ExtIndexC))
181 return new BitCastInst(Elt, DestTy);
182
183 // If the source elements are wider than the destination, try to shift and
184 // truncate a subset of scalar bits of an insert op.
185 if (NumSrcElts < NumElts) {
13
Assuming 'NumSrcElts' is < 'NumElts'
14
Taking true branch
186 Value *Scalar;
187 uint64_t InsIndexC;
188 if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
15
Calling 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'
23
Returning from 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'
24
Taking false branch
189 m_ConstantInt(InsIndexC))))
190 return nullptr;
191
192 // The extract must be from the subset of vector elements that we inserted
193 // into. Example: if we inserted element 1 of a <2 x i64> and we are
194 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195 // of elements 4-7 of the bitcasted vector.
196 unsigned NarrowingRatio = NumElts / NumSrcElts;
25
Division by zero
197 if (ExtIndexC / NarrowingRatio != InsIndexC)
198 return nullptr;
199
200 // We are extracting part of the original scalar. How that scalar is
201 // inserted into the vector depends on the endian-ness. Example:
202 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
203 // +--+--+--+--+--+--+--+--+
204 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
205 // extelt <4 x i16> V', 3: | |S2|S3|
206 // +--+--+--+--+--+--+--+--+
207 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209 // In this example, we must right-shift little-endian. Big-endian is just a
210 // truncate.
211 unsigned Chunk = ExtIndexC % NarrowingRatio;
212 if (IsBigEndian)
213 Chunk = NarrowingRatio - 1 - Chunk;
214
215 // Bail out if this is an FP vector to FP vector sequence. That would take
216 // more instructions than we started with unless there is no shift, and it
217 // may not be handled as well in the backend.
218 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219 bool NeedDestBitcast = DestTy->isFloatingPointTy();
220 if (NeedSrcBitcast && NeedDestBitcast)
221 return nullptr;
222
223 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225 unsigned ShAmt = Chunk * DestWidth;
226
227 // TODO: This limitation is more strict than necessary. We could sum the
228 // number of new instructions and subtract the number eliminated to know if
229 // we can proceed.
230 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
231 if (NeedSrcBitcast || NeedDestBitcast)
232 return nullptr;
233
234 if (NeedSrcBitcast) {
235 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237 }
238
239 if (ShAmt) {
240 // Bail out if we could end with more instructions than we started with.
241 if (!Ext.getVectorOperand()->hasOneUse())
242 return nullptr;
243 Scalar = Builder.CreateLShr(Scalar, ShAmt);
244 }
245
246 if (NeedDestBitcast) {
247 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249 }
250 return new TruncInst(Scalar, DestTy);
251 }
252
253 return nullptr;
254}
255
256/// Find elements of V demanded by UserInstr.
257static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
258 unsigned VWidth = V->getType()->getVectorNumElements();
259
260 // Conservatively assume that all elements are needed.
261 APInt UsedElts(APInt::getAllOnesValue(VWidth));
262
263 switch (UserInstr->getOpcode()) {
264 case Instruction::ExtractElement: {
265 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
266 assert(EEI->getVectorOperand() == V)((EEI->getVectorOperand() == V) ? static_cast<void> (
0) : __assert_fail ("EEI->getVectorOperand() == V", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 266, __PRETTY_FUNCTION__))
;
267 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
268 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
269 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
270 }
271 break;
272 }
273 case Instruction::ShuffleVector: {
274 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
275 unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements();
276
277 UsedElts = APInt(VWidth, 0);
278 for (unsigned i = 0; i < MaskNumElts; i++) {
279 unsigned MaskVal = Shuffle->getMaskValue(i);
280 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
281 continue;
282 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
283 UsedElts.setBit(MaskVal);
284 if (Shuffle->getOperand(1) == V &&
285 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
286 UsedElts.setBit(MaskVal - VWidth);
287 }
288 break;
289 }
290 default:
291 break;
292 }
293 return UsedElts;
294}
295
296/// Find union of elements of V demanded by all its users.
297/// If it is known by querying findDemandedEltsBySingleUser that
298/// no user demands an element of V, then the corresponding bit
299/// remains unset in the returned value.
300static APInt findDemandedEltsByAllUsers(Value *V) {
301 unsigned VWidth = V->getType()->getVectorNumElements();
302
303 APInt UnionUsedElts(VWidth, 0);
304 for (const Use &U : V->uses()) {
305 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
306 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
307 } else {
308 UnionUsedElts = APInt::getAllOnesValue(VWidth);
309 break;
310 }
311
312 if (UnionUsedElts.isAllOnesValue())
313 break;
314 }
315
316 return UnionUsedElts;
317}
318
319Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
320 Value *SrcVec = EI.getVectorOperand();
321 Value *Index = EI.getIndexOperand();
322 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
1
Assuming 'V' is null
2
Taking false branch
323 SQ.getWithInstruction(&EI)))
324 return replaceInstUsesWith(EI, V);
325
326 // If extracting a specified index from the vector, see if we can recursively
327 // find a previously computed scalar that was inserted into the vector.
328 auto *IndexC = dyn_cast<ConstantInt>(Index);
3
Assuming 'Index' is a 'ConstantInt'
329 if (IndexC
3.1
'IndexC' is non-null
3.1
'IndexC' is non-null
) {
4
Taking true branch
330 unsigned NumElts = EI.getVectorOperandType()->getNumElements();
331
332 // InstSimplify should handle cases where the index is invalid.
333 if (!IndexC->getValue().ule(NumElts))
5
Taking false branch
334 return nullptr;
335
336 // This instruction only demands the single element from the input vector.
337 if (NumElts != 1) {
6
Assuming 'NumElts' is equal to 1
7
Taking false branch
338 // If the input vector has a single use, simplify it based on this use
339 // property.
340 if (SrcVec->hasOneUse()) {
341 APInt UndefElts(NumElts, 0);
342 APInt DemandedElts(NumElts, 0);
343 DemandedElts.setBit(IndexC->getZExtValue());
344 if (Value *V =
345 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) {
346 EI.setOperand(0, V);
347 return &EI;
348 }
349 } else {
350 // If the input vector has multiple uses, simplify it based on a union
351 // of all elements used.
352 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
353 if (!DemandedElts.isAllOnesValue()) {
354 APInt UndefElts(NumElts, 0);
355 if (Value *V = SimplifyDemandedVectorElts(
356 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
357 true /* AllowMultipleUsers */)) {
358 if (V != SrcVec) {
359 SrcVec->replaceAllUsesWith(V);
360 return &EI;
361 }
362 }
363 }
364 }
365 }
366 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
8
Calling 'foldBitcastExtElt'
367 return I;
368
369 // If there's a vector PHI feeding a scalar use through this extractelement
370 // instruction, try to scalarize the PHI.
371 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
372 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
373 return ScalarPHI;
374 }
375
376 BinaryOperator *BO;
377 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
378 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
379 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
380 Value *E0 = Builder.CreateExtractElement(X, Index);
381 Value *E1 = Builder.CreateExtractElement(Y, Index);
382 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
383 }
384
385 Value *X, *Y;
386 CmpInst::Predicate Pred;
387 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
388 cheapToScalarize(SrcVec, IndexC)) {
389 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
390 Value *E0 = Builder.CreateExtractElement(X, Index);
391 Value *E1 = Builder.CreateExtractElement(Y, Index);
392 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
393 }
394
395 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
396 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
397 // Extracting the inserted element?
398 if (IE->getOperand(2) == Index)
399 return replaceInstUsesWith(EI, IE->getOperand(1));
400 // If the inserted and extracted elements are constants, they must not
401 // be the same value, extract from the pre-inserted value instead.
402 if (isa<Constant>(IE->getOperand(2)) && IndexC)
403 return replaceOperand(EI, 0, IE->getOperand(0));
404 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
405 // If this is extracting an element from a shufflevector, figure out where
406 // it came from and extract from the appropriate input element instead.
407 if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
408 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
409 Value *Src;
410 unsigned LHSWidth =
411 SVI->getOperand(0)->getType()->getVectorNumElements();
412
413 if (SrcIdx < 0)
414 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
415 if (SrcIdx < (int)LHSWidth)
416 Src = SVI->getOperand(0);
417 else {
418 SrcIdx -= LHSWidth;
419 Src = SVI->getOperand(1);
420 }
421 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
422 return ExtractElementInst::Create(Src,
423 ConstantInt::get(Int32Ty,
424 SrcIdx, false));
425 }
426 } else if (auto *CI = dyn_cast<CastInst>(I)) {
427 // Canonicalize extractelement(cast) -> cast(extractelement).
428 // Bitcasts can change the number of vector elements, and they cost
429 // nothing.
430 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
431 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
432 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
433 }
434 }
435 }
436 return nullptr;
437}
438
439/// If V is a shuffle of values that ONLY returns elements from either LHS or
440/// RHS, return the shuffle mask and true. Otherwise, return false.
441static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
442 SmallVectorImpl<Constant*> &Mask) {
443 assert(LHS->getType() == RHS->getType() &&((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"
) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 444, __PRETTY_FUNCTION__))
444 "Invalid CollectSingleShuffleElements")((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"
) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 444, __PRETTY_FUNCTION__))
;
445 unsigned NumElts = V->getType()->getVectorNumElements();
446
447 if (isa<UndefValue>(V)) {
448 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
449 return true;
450 }
451
452 if (V == LHS) {
453 for (unsigned i = 0; i != NumElts; ++i)
454 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
455 return true;
456 }
457
458 if (V == RHS) {
459 for (unsigned i = 0; i != NumElts; ++i)
460 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
461 i+NumElts));
462 return true;
463 }
464
465 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
466 // If this is an insert of an extract from some other vector, include it.
467 Value *VecOp = IEI->getOperand(0);
468 Value *ScalarOp = IEI->getOperand(1);
469 Value *IdxOp = IEI->getOperand(2);
470
471 if (!isa<ConstantInt>(IdxOp))
472 return false;
473 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
474
475 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
476 // We can handle this if the vector we are inserting into is
477 // transitively ok.
478 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
479 // If so, update the mask to reflect the inserted undef.
480 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
481 return true;
482 }
483 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
484 if (isa<ConstantInt>(EI->getOperand(1))) {
485 unsigned ExtractedIdx =
486 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
487 unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
488
489 // This must be extracting from either LHS or RHS.
490 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
491 // We can handle this if the vector we are inserting into is
492 // transitively ok.
493 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
494 // If so, update the mask to reflect the inserted value.
495 if (EI->getOperand(0) == LHS) {
496 Mask[InsertedIdx % NumElts] =
497 ConstantInt::get(Type::getInt32Ty(V->getContext()),
498 ExtractedIdx);
499 } else {
500 assert(EI->getOperand(0) == RHS)((EI->getOperand(0) == RHS) ? static_cast<void> (0) :
__assert_fail ("EI->getOperand(0) == RHS", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 500, __PRETTY_FUNCTION__))
;
501 Mask[InsertedIdx % NumElts] =
502 ConstantInt::get(Type::getInt32Ty(V->getContext()),
503 ExtractedIdx + NumLHSElts);
504 }
505 return true;
506 }
507 }
508 }
509 }
510 }
511
512 return false;
513}
514
515/// If we have insertion into a vector that is wider than the vector that we
516/// are extracting from, try to widen the source vector to allow a single
517/// shufflevector to replace one or more insert/extract pairs.
518static void replaceExtractElements(InsertElementInst *InsElt,
519 ExtractElementInst *ExtElt,
520 InstCombiner &IC) {
521 VectorType *InsVecType = InsElt->getType();
522 VectorType *ExtVecType = ExtElt->getVectorOperandType();
523 unsigned NumInsElts = InsVecType->getVectorNumElements();
524 unsigned NumExtElts = ExtVecType->getVectorNumElements();
525
526 // The inserted-to vector must be wider than the extracted-from vector.
527 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
528 NumExtElts >= NumInsElts)
529 return;
530
531 // Create a shuffle mask to widen the extended-from vector using undefined
532 // values. The mask selects all of the values of the original vector followed
533 // by as many undefined values as needed to create a vector of the same length
534 // as the inserted-to vector.
535 SmallVector<Constant *, 16> ExtendMask;
536 IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
537 for (unsigned i = 0; i < NumExtElts; ++i)
538 ExtendMask.push_back(ConstantInt::get(IntType, i));
539 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
540 ExtendMask.push_back(UndefValue::get(IntType));
541
542 Value *ExtVecOp = ExtElt->getVectorOperand();
543 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
544 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
545 ? ExtVecOpInst->getParent()
546 : ExtElt->getParent();
547
548 // TODO: This restriction matches the basic block check below when creating
549 // new extractelement instructions. If that limitation is removed, this one
550 // could also be removed. But for now, we just bail out to ensure that we
551 // will replace the extractelement instruction that is feeding our
552 // insertelement instruction. This allows the insertelement to then be
553 // replaced by a shufflevector. If the insertelement is not replaced, we can
554 // induce infinite looping because there's an optimization for extractelement
555 // that will delete our widening shuffle. This would trigger another attempt
556 // here to create that shuffle, and we spin forever.
557 if (InsertionBlock != InsElt->getParent())
558 return;
559
560 // TODO: This restriction matches the check in visitInsertElementInst() and
561 // prevents an infinite loop caused by not turning the extract/insert pair
562 // into a shuffle. We really should not need either check, but we're lacking
563 // folds for shufflevectors because we're afraid to generate shuffle masks
564 // that the backend can't handle.
565 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
566 return;
567
568 auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
569 ConstantVector::get(ExtendMask));
570
571 // Insert the new shuffle after the vector operand of the extract is defined
572 // (as long as it's not a PHI) or at the start of the basic block of the
573 // extract, so any subsequent extracts in the same basic block can use it.
574 // TODO: Insert before the earliest ExtractElementInst that is replaced.
575 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
576 WideVec->insertAfter(ExtVecOpInst);
577 else
578 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
579
580 // Replace extracts from the original narrow vector with extracts from the new
581 // wide vector.
582 for (User *U : ExtVecOp->users()) {
583 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
584 if (!OldExt || OldExt->getParent() != WideVec->getParent())
585 continue;
586 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
587 NewExt->insertAfter(OldExt);
588 IC.replaceInstUsesWith(*OldExt, NewExt);
589 }
590}
591
592/// We are building a shuffle to create V, which is a sequence of insertelement,
593/// extractelement pairs. If PermittedRHS is set, then we must either use it or
594/// not rely on the second vector source. Return a std::pair containing the
595/// left and right vectors of the proposed shuffle (or 0), and set the Mask
596/// parameter as required.
597///
598/// Note: we intentionally don't try to fold earlier shuffles since they have
599/// often been chosen carefully to be efficiently implementable on the target.
600using ShuffleOps = std::pair<Value *, Value *>;
601
602static ShuffleOps collectShuffleElements(Value *V,
603 SmallVectorImpl<Constant *> &Mask,
604 Value *PermittedRHS,
605 InstCombiner &IC) {
606 assert(V->getType()->isVectorTy() && "Invalid shuffle!")((V->getType()->isVectorTy() && "Invalid shuffle!"
) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"Invalid shuffle!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 606, __PRETTY_FUNCTION__))
;
607 unsigned NumElts = V->getType()->getVectorNumElements();
608
609 if (isa<UndefValue>(V)) {
610 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
611 return std::make_pair(
612 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
613 }
614
615 if (isa<ConstantAggregateZero>(V)) {
616 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
617 return std::make_pair(V, nullptr);
618 }
619
620 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
621 // If this is an insert of an extract from some other vector, include it.
622 Value *VecOp = IEI->getOperand(0);
623 Value *ScalarOp = IEI->getOperand(1);
624 Value *IdxOp = IEI->getOperand(2);
625
626 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
627 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
628 unsigned ExtractedIdx =
629 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
630 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
631
632 // Either the extracted from or inserted into vector must be RHSVec,
633 // otherwise we'd end up with a shuffle of three inputs.
634 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
635 Value *RHS = EI->getOperand(0);
636 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
637 assert(LR.second == nullptr || LR.second == RHS)((LR.second == nullptr || LR.second == RHS) ? static_cast<
void> (0) : __assert_fail ("LR.second == nullptr || LR.second == RHS"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 637, __PRETTY_FUNCTION__))
;
638
639 if (LR.first->getType() != RHS->getType()) {
640 // Although we are giving up for now, see if we can create extracts
641 // that match the inserts for another round of combining.
642 replaceExtractElements(IEI, EI, IC);
643
644 // We tried our best, but we can't find anything compatible with RHS
645 // further up the chain. Return a trivial shuffle.
646 for (unsigned i = 0; i < NumElts; ++i)
647 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
648 return std::make_pair(V, nullptr);
649 }
650
651 unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
652 Mask[InsertedIdx % NumElts] =
653 ConstantInt::get(Type::getInt32Ty(V->getContext()),
654 NumLHSElts+ExtractedIdx);
655 return std::make_pair(LR.first, RHS);
656 }
657
658 if (VecOp == PermittedRHS) {
659 // We've gone as far as we can: anything on the other side of the
660 // extractelement will already have been converted into a shuffle.
661 unsigned NumLHSElts =
662 EI->getOperand(0)->getType()->getVectorNumElements();
663 for (unsigned i = 0; i != NumElts; ++i)
664 Mask.push_back(ConstantInt::get(
665 Type::getInt32Ty(V->getContext()),
666 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
667 return std::make_pair(EI->getOperand(0), PermittedRHS);
668 }
669
670 // If this insertelement is a chain that comes from exactly these two
671 // vectors, return the vector and the effective shuffle.
672 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
673 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
674 Mask))
675 return std::make_pair(EI->getOperand(0), PermittedRHS);
676 }
677 }
678 }
679
680 // Otherwise, we can't do anything fancy. Return an identity vector.
681 for (unsigned i = 0; i != NumElts; ++i)
682 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
683 return std::make_pair(V, nullptr);
684}
685
686/// Try to find redundant insertvalue instructions, like the following ones:
687/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
688/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
689/// Here the second instruction inserts values at the same indices, as the
690/// first one, making the first one redundant.
691/// It should be transformed to:
692/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
693Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
694 bool IsRedundant = false;
695 ArrayRef<unsigned int> FirstIndices = I.getIndices();
696
697 // If there is a chain of insertvalue instructions (each of them except the
698 // last one has only one use and it's another insertvalue insn from this
699 // chain), check if any of the 'children' uses the same indices as the first
700 // instruction. In this case, the first one is redundant.
701 Value *V = &I;
702 unsigned Depth = 0;
703 while (V->hasOneUse() && Depth < 10) {
704 User *U = V->user_back();
705 auto UserInsInst = dyn_cast<InsertValueInst>(U);
706 if (!UserInsInst || U->getOperand(0) != V)
707 break;
708 if (UserInsInst->getIndices() == FirstIndices) {
709 IsRedundant = true;
710 break;
711 }
712 V = UserInsInst;
713 Depth++;
714 }
715
716 if (IsRedundant)
717 return replaceInstUsesWith(I, I.getOperand(0));
718 return nullptr;
719}
720
721static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
722 int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
723 int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
724
725 // A vector select does not change the size of the operands.
726 if (MaskSize != VecSize)
727 return false;
728
729 // Each mask element must be undefined or choose a vector element from one of
730 // the source operands without crossing vector lanes.
731 for (int i = 0; i != MaskSize; ++i) {
732 int Elt = Shuf.getMaskValue(i);
733 if (Elt != -1 && Elt != i && Elt != i + VecSize)
734 return false;
735 }
736
737 return true;
738}
739
740/// Turn a chain of inserts that splats a value into an insert + shuffle:
741/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
742/// shufflevector(insertelt(X, %k, 0), undef, zero)
743static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
744 // We are interested in the last insert in a chain. So if this insert has a
745 // single user and that user is an insert, bail.
746 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
747 return nullptr;
748
749 auto *VecTy = cast<VectorType>(InsElt.getType());
750 unsigned NumElements = VecTy->getNumElements();
751
752 // Do not try to do this for a one-element vector, since that's a nop,
753 // and will cause an inf-loop.
754 if (NumElements == 1)
755 return nullptr;
756
757 Value *SplatVal = InsElt.getOperand(1);
758 InsertElementInst *CurrIE = &InsElt;
759 SmallVector<bool, 16> ElementPresent(NumElements, false);
760 InsertElementInst *FirstIE = nullptr;
761
762 // Walk the chain backwards, keeping track of which indices we inserted into,
763 // until we hit something that isn't an insert of the splatted value.
764 while (CurrIE) {
765 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
766 if (!Idx || CurrIE->getOperand(1) != SplatVal)
767 return nullptr;
768
769 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
770 // Check none of the intermediate steps have any additional uses, except
771 // for the root insertelement instruction, which can be re-used, if it
772 // inserts at position 0.
773 if (CurrIE != &InsElt &&
774 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
775 return nullptr;
776
777 ElementPresent[Idx->getZExtValue()] = true;
778 FirstIE = CurrIE;
779 CurrIE = NextIE;
780 }
781
782 // If this is just a single insertelement (not a sequence), we are done.
783 if (FirstIE == &InsElt)
784 return nullptr;
785
786 // If we are not inserting into an undef vector, make sure we've seen an
787 // insert into every element.
788 // TODO: If the base vector is not undef, it might be better to create a splat
789 // and then a select-shuffle (blend) with the base vector.
790 if (!isa<UndefValue>(FirstIE->getOperand(0)))
791 if (any_of(ElementPresent, [](bool Present) { return !Present; }))
792 return nullptr;
793
794 // Create the insert + shuffle.
795 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
796 UndefValue *UndefVec = UndefValue::get(VecTy);
797 Constant *Zero = ConstantInt::get(Int32Ty, 0);
798 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
799 FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
800
801 // Splat from element 0, but replace absent elements with undef in the mask.
802 SmallVector<Constant *, 16> Mask(NumElements, Zero);
803 for (unsigned i = 0; i != NumElements; ++i)
804 if (!ElementPresent[i])
805 Mask[i] = UndefValue::get(Int32Ty);
806
807 return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
808}
809
810/// Try to fold an insert element into an existing splat shuffle by changing
811/// the shuffle's mask to include the index of this insert element.
812static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
813 // Check if the vector operand of this insert is a canonical splat shuffle.
814 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
815 if (!Shuf || !Shuf->isZeroEltSplat())
816 return nullptr;
817
818 // Check for a constant insertion index.
819 uint64_t IdxC;
820 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
821 return nullptr;
822
823 // Check if the splat shuffle's input is the same as this insert's scalar op.
824 Value *X = InsElt.getOperand(1);
825 Value *Op0 = Shuf->getOperand(0);
826 if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
827 return nullptr;
828
829 // Replace the shuffle mask element at the index of this insert with a zero.
830 // For example:
831 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
832 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
833 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
834 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
835 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
836 Constant *Zero = ConstantInt::getNullValue(I32Ty);
837 for (unsigned i = 0; i != NumMaskElts; ++i)
838 NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
839
840 Constant *NewMask = ConstantVector::get(NewMaskVec);
841 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
842}
843
844/// Try to fold an extract+insert element into an existing identity shuffle by
845/// changing the shuffle's mask to include the index of this insert element.
846static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
847 // Check if the vector operand of this insert is an identity shuffle.
848 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
849 if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
850 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
851 return nullptr;
852
853 // Check for a constant insertion index.
854 uint64_t IdxC;
855 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
856 return nullptr;
857
858 // Check if this insert's scalar op is extracted from the identity shuffle's
859 // input vector.
860 Value *Scalar = InsElt.getOperand(1);
861 Value *X = Shuf->getOperand(0);
862 if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
863 return nullptr;
864
865 // Replace the shuffle mask element at the index of this extract+insert with
866 // that same index value.
867 // For example:
868 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
869 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
870 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
871 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
872 Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC);
873 Constant *OldMask = Shuf->getMask();
874 for (unsigned i = 0; i != NumMaskElts; ++i) {
875 if (i != IdxC) {
876 // All mask elements besides the inserted element remain the same.
877 NewMaskVec[i] = OldMask->getAggregateElement(i);
878 } else if (OldMask->getAggregateElement(i) == NewMaskEltC) {
879 // If the mask element was already set, there's nothing to do
880 // (demanded elements analysis may unset it later).
881 return nullptr;
882 } else {
883 assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&((isa<UndefValue>(OldMask->getAggregateElement(i)) &&
"Unexpected shuffle mask element for identity shuffle") ? static_cast
<void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 884, __PRETTY_FUNCTION__))
884 "Unexpected shuffle mask element for identity shuffle")((isa<UndefValue>(OldMask->getAggregateElement(i)) &&
"Unexpected shuffle mask element for identity shuffle") ? static_cast
<void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 884, __PRETTY_FUNCTION__))
;
885 NewMaskVec[i] = NewMaskEltC;
886 }
887 }
888
889 Constant *NewMask = ConstantVector::get(NewMaskVec);
890 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
891}
892
893/// If we have an insertelement instruction feeding into another insertelement
894/// and the 2nd is inserting a constant into the vector, canonicalize that
895/// constant insertion before the insertion of a variable:
896///
897/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
898/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
899///
900/// This has the potential of eliminating the 2nd insertelement instruction
901/// via constant folding of the scalar constant into a vector constant.
902static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
903 InstCombiner::BuilderTy &Builder) {
904 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
905 if (!InsElt1 || !InsElt1->hasOneUse())
906 return nullptr;
907
908 Value *X, *Y;
909 Constant *ScalarC;
910 ConstantInt *IdxC1, *IdxC2;
911 if (match(InsElt1->getOperand(0), m_Value(X)) &&
912 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
913 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
914 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
915 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
916 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
917 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
918 }
919
920 return nullptr;
921}
922
923/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
924/// --> shufflevector X, CVec', Mask'
925static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
926 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
927 // Bail out if the parent has more than one use. In that case, we'd be
928 // replacing the insertelt with a shuffle, and that's not a clear win.
929 if (!Inst || !Inst->hasOneUse())
930 return nullptr;
931 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
932 // The shuffle must have a constant vector operand. The insertelt must have
933 // a constant scalar being inserted at a constant position in the vector.
934 Constant *ShufConstVec, *InsEltScalar;
935 uint64_t InsEltIndex;
936 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
937 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
938 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
939 return nullptr;
940
941 // Adding an element to an arbitrary shuffle could be expensive, but a
942 // shuffle that selects elements from vectors without crossing lanes is
943 // assumed cheap.
944 // If we're just adding a constant into that shuffle, it will still be
945 // cheap.
946 if (!isShuffleEquivalentToSelect(*Shuf))
947 return nullptr;
948
949 // From the above 'select' check, we know that the mask has the same number
950 // of elements as the vector input operands. We also know that each constant
951 // input element is used in its lane and can not be used more than once by
952 // the shuffle. Therefore, replace the constant in the shuffle's constant
953 // vector with the insertelt constant. Replace the constant in the shuffle's
954 // mask vector with the insertelt index plus the length of the vector
955 // (because the constant vector operand of a shuffle is always the 2nd
956 // operand).
957 Constant *Mask = Shuf->getMask();
958 unsigned NumElts = Mask->getType()->getVectorNumElements();
959 SmallVector<Constant *, 16> NewShufElts(NumElts);
960 SmallVector<Constant *, 16> NewMaskElts(NumElts);
961 for (unsigned I = 0; I != NumElts; ++I) {
962 if (I == InsEltIndex) {
963 NewShufElts[I] = InsEltScalar;
964 Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
965 NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
966 } else {
967 // Copy over the existing values.
968 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
969 NewMaskElts[I] = Mask->getAggregateElement(I);
970 }
971 }
972
973 // Create new operands for a shuffle that includes the constant of the
974 // original insertelt. The old shuffle will be dead now.
975 return new ShuffleVectorInst(Shuf->getOperand(0),
976 ConstantVector::get(NewShufElts),
977 ConstantVector::get(NewMaskElts));
978 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
979 // Transform sequences of insertelements ops with constant data/indexes into
980 // a single shuffle op.
981 unsigned NumElts = InsElt.getType()->getNumElements();
982
983 uint64_t InsertIdx[2];
984 Constant *Val[2];
985 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
986 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
987 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
988 !match(IEI->getOperand(1), m_Constant(Val[1])))
989 return nullptr;
990 SmallVector<Constant *, 16> Values(NumElts);
991 SmallVector<Constant *, 16> Mask(NumElts);
992 auto ValI = std::begin(Val);
993 // Generate new constant vector and mask.
994 // We have 2 values/masks from the insertelements instructions. Insert them
995 // into new value/mask vectors.
996 for (uint64_t I : InsertIdx) {
997 if (!Values[I]) {
998 assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 998, __PRETTY_FUNCTION__))
;
999 Values[I] = *ValI;
1000 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
1001 NumElts + I);
1002 }
1003 ++ValI;
1004 }
1005 // Remaining values are filled with 'undef' values.
1006 for (unsigned I = 0; I < NumElts; ++I) {
1007 if (!Values[I]) {
1008 assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1008, __PRETTY_FUNCTION__))
;
1009 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1010 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
1011 }
1012 }
1013 // Create new operands for a shuffle that includes the constant of the
1014 // original insertelt.
1015 return new ShuffleVectorInst(IEI->getOperand(0),
1016 ConstantVector::get(Values),
1017 ConstantVector::get(Mask));
1018 }
1019 return nullptr;
1020}
1021
1022Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
1023 Value *VecOp = IE.getOperand(0);
1024 Value *ScalarOp = IE.getOperand(1);
1025 Value *IdxOp = IE.getOperand(2);
1026
1027 if (auto *V = SimplifyInsertElementInst(
1028 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1029 return replaceInstUsesWith(IE, V);
1030
1031 // If the vector and scalar are both bitcast from the same element type, do
1032 // the insert in that source type followed by bitcast.
1033 Value *VecSrc, *ScalarSrc;
1034 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1035 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1036 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1037 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1038 VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
1039 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1040 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1041 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1042 return new BitCastInst(NewInsElt, IE.getType());
1043 }
1044
1045 // If the inserted element was extracted from some other vector and both
1046 // indexes are valid constants, try to turn this into a shuffle.
1047 uint64_t InsertedIdx, ExtractedIdx;
1048 Value *ExtVecOp;
1049 if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1050 match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
1051 m_ConstantInt(ExtractedIdx))) &&
1052 ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
1053 // TODO: Looking at the user(s) to determine if this insert is a
1054 // fold-to-shuffle opportunity does not match the usual instcombine
1055 // constraints. We should decide if the transform is worthy based only
1056 // on this instruction and its operands, but that may not work currently.
1057 //
1058 // Here, we are trying to avoid creating shuffles before reaching
1059 // the end of a chain of extract-insert pairs. This is complicated because
1060 // we do not generally form arbitrary shuffle masks in instcombine
1061 // (because those may codegen poorly), but collectShuffleElements() does
1062 // exactly that.
1063 //
1064 // The rules for determining what is an acceptable target-independent
1065 // shuffle mask are fuzzy because they evolve based on the backend's
1066 // capabilities and real-world impact.
1067 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1068 if (!Insert.hasOneUse())
1069 return true;
1070 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1071 if (!InsertUser)
1072 return true;
1073 return false;
1074 };
1075
1076 // Try to form a shuffle from a chain of extract-insert ops.
1077 if (isShuffleRootCandidate(IE)) {
1078 SmallVector<Constant*, 16> Mask;
1079 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1080
1081 // The proposed shuffle may be trivial, in which case we shouldn't
1082 // perform the combine.
1083 if (LR.first != &IE && LR.second != &IE) {
1084 // We now have a shuffle of LHS, RHS, Mask.
1085 if (LR.second == nullptr)
1086 LR.second = UndefValue::get(LR.first->getType());
1087 return new ShuffleVectorInst(LR.first, LR.second,
1088 ConstantVector::get(Mask));
1089 }
1090 }
1091 }
1092
1093 unsigned VWidth = VecOp->getType()->getVectorNumElements();
1094 APInt UndefElts(VWidth, 0);
1095 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1096 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1097 if (V != &IE)
1098 return replaceInstUsesWith(IE, V);
1099 return &IE;
1100 }
1101
1102 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1103 return Shuf;
1104
1105 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1106 return NewInsElt;
1107
1108 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1109 return Broadcast;
1110
1111 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1112 return Splat;
1113
1114 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1115 return IdentityShuf;
1116
1117 return nullptr;
1118}
1119
1120/// Return true if we can evaluate the specified expression tree if the vector
1121/// elements were shuffled in a different order.
1122static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1123 unsigned Depth = 5) {
1124 // We can always reorder the elements of a constant.
1125 if (isa<Constant>(V))
1126 return true;
1127
1128 // We won't reorder vector arguments. No IPO here.
1129 Instruction *I = dyn_cast<Instruction>(V);
1130 if (!I) return false;
1131
1132 // Two users may expect different orders of the elements. Don't try it.
1133 if (!I->hasOneUse())
1134 return false;
1135
1136 if (Depth == 0) return false;
1137
1138 switch (I->getOpcode()) {
1139 case Instruction::UDiv:
1140 case Instruction::SDiv:
1141 case Instruction::URem:
1142 case Instruction::SRem:
1143 // Propagating an undefined shuffle mask element to integer div/rem is not
1144 // allowed because those opcodes can create immediate undefined behavior
1145 // from an undefined element in an operand.
1146 if (llvm::any_of(Mask, [](int M){ return M == -1; }))
1147 return false;
1148 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1149 case Instruction::Add:
1150 case Instruction::FAdd:
1151 case Instruction::Sub:
1152 case Instruction::FSub:
1153 case Instruction::Mul:
1154 case Instruction::FMul:
1155 case Instruction::FDiv:
1156 case Instruction::FRem:
1157 case Instruction::Shl:
1158 case Instruction::LShr:
1159 case Instruction::AShr:
1160 case Instruction::And:
1161 case Instruction::Or:
1162 case Instruction::Xor:
1163 case Instruction::ICmp:
1164 case Instruction::FCmp:
1165 case Instruction::Trunc:
1166 case Instruction::ZExt:
1167 case Instruction::SExt:
1168 case Instruction::FPToUI:
1169 case Instruction::FPToSI:
1170 case Instruction::UIToFP:
1171 case Instruction::SIToFP:
1172 case Instruction::FPTrunc:
1173 case Instruction::FPExt:
1174 case Instruction::GetElementPtr: {
1175 // Bail out if we would create longer vector ops. We could allow creating
1176 // longer vector ops, but that may result in more expensive codegen.
1177 Type *ITy = I->getType();
1178 if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1179 return false;
1180 for (Value *Operand : I->operands()) {
1181 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1182 return false;
1183 }
1184 return true;
1185 }
1186 case Instruction::InsertElement: {
1187 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1188 if (!CI) return false;
1189 int ElementNumber = CI->getLimitedValue();
1190
1191 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1192 // can't put an element into multiple indices.
1193 bool SeenOnce = false;
1194 for (int i = 0, e = Mask.size(); i != e; ++i) {
1195 if (Mask[i] == ElementNumber) {
1196 if (SeenOnce)
1197 return false;
1198 SeenOnce = true;
1199 }
1200 }
1201 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1202 }
1203 }
1204 return false;
1205}
1206
1207/// Rebuild a new instruction just like 'I' but with the new operands given.
1208/// In the event of type mismatch, the type of the operands is correct.
1209static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
1210 // We don't want to use the IRBuilder here because we want the replacement
1211 // instructions to appear next to 'I', not the builder's insertion point.
1212 switch (I->getOpcode()) {
1213 case Instruction::Add:
1214 case Instruction::FAdd:
1215 case Instruction::Sub:
1216 case Instruction::FSub:
1217 case Instruction::Mul:
1218 case Instruction::FMul:
1219 case Instruction::UDiv:
1220 case Instruction::SDiv:
1221 case Instruction::FDiv:
1222 case Instruction::URem:
1223 case Instruction::SRem:
1224 case Instruction::FRem:
1225 case Instruction::Shl:
1226 case Instruction::LShr:
1227 case Instruction::AShr:
1228 case Instruction::And:
1229 case Instruction::Or:
1230 case Instruction::Xor: {
1231 BinaryOperator *BO = cast<BinaryOperator>(I);
1232 assert(NewOps.size() == 2 && "binary operator with #ops != 2")((NewOps.size() == 2 && "binary operator with #ops != 2"
) ? static_cast<void> (0) : __assert_fail ("NewOps.size() == 2 && \"binary operator with #ops != 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1232, __PRETTY_FUNCTION__))
;
1233 BinaryOperator *New =
1234 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1235 NewOps[0], NewOps[1], "", BO);
1236 if (isa<OverflowingBinaryOperator>(BO)) {
1237 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1238 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1239 }
1240 if (isa<PossiblyExactOperator>(BO)) {
1241 New->setIsExact(BO->isExact());
1242 }
1243 if (isa<FPMathOperator>(BO))
1244 New->copyFastMathFlags(I);
1245 return New;
1246 }
1247 case Instruction::ICmp:
1248 assert(NewOps.size() == 2 && "icmp with #ops != 2")((NewOps.size() == 2 && "icmp with #ops != 2") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 2 && \"icmp with #ops != 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1248, __PRETTY_FUNCTION__))
;
1249 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1250 NewOps[0], NewOps[1]);
1251 case Instruction::FCmp:
1252 assert(NewOps.size() == 2 && "fcmp with #ops != 2")((NewOps.size() == 2 && "fcmp with #ops != 2") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 2 && \"fcmp with #ops != 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1252, __PRETTY_FUNCTION__))
;
1253 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1254 NewOps[0], NewOps[1]);
1255 case Instruction::Trunc:
1256 case Instruction::ZExt:
1257 case Instruction::SExt:
1258 case Instruction::FPToUI:
1259 case Instruction::FPToSI:
1260 case Instruction::UIToFP:
1261 case Instruction::SIToFP:
1262 case Instruction::FPTrunc:
1263 case Instruction::FPExt: {
1264 // It's possible that the mask has a different number of elements from
1265 // the original cast. We recompute the destination type to match the mask.
1266 Type *DestTy =
1267 VectorType::get(I->getType()->getScalarType(),
1268 NewOps[0]->getType()->getVectorNumElements());
1269 assert(NewOps.size() == 1 && "cast with #ops != 1")((NewOps.size() == 1 && "cast with #ops != 1") ? static_cast
<void> (0) : __assert_fail ("NewOps.size() == 1 && \"cast with #ops != 1\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1269, __PRETTY_FUNCTION__))
;
1270 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1271 "", I);
1272 }
1273 case Instruction::GetElementPtr: {
1274 Value *Ptr = NewOps[0];
1275 ArrayRef<Value*> Idx = NewOps.slice(1);
1276 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1277 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1278 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1279 return GEP;
1280 }
1281 }
1282 llvm_unreachable("failed to rebuild vector instructions")::llvm::llvm_unreachable_internal("failed to rebuild vector instructions"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1282)
;
1283}
1284
1285static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1286 // Mask.size() does not need to be equal to the number of vector elements.
1287
1288 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")((V->getType()->isVectorTy() && "can't reorder non-vector elements"
) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"can't reorder non-vector elements\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1288, __PRETTY_FUNCTION__))
;
1289 Type *EltTy = V->getType()->getScalarType();
1290 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1291 if (isa<UndefValue>(V))
1292 return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1293
1294 if (isa<ConstantAggregateZero>(V))
1295 return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1296
1297 if (Constant *C = dyn_cast<Constant>(V)) {
1298 SmallVector<Constant *, 16> MaskValues;
1299 for (int i = 0, e = Mask.size(); i != e; ++i) {
1300 if (Mask[i] == -1)
1301 MaskValues.push_back(UndefValue::get(I32Ty));
1302 else
1303 MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1304 }
1305 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
1306 ConstantVector::get(MaskValues));
1307 }
1308
1309 Instruction *I = cast<Instruction>(V);
1310 switch (I->getOpcode()) {
1311 case Instruction::Add:
1312 case Instruction::FAdd:
1313 case Instruction::Sub:
1314 case Instruction::FSub:
1315 case Instruction::Mul:
1316 case Instruction::FMul:
1317 case Instruction::UDiv:
1318 case Instruction::SDiv:
1319 case Instruction::FDiv:
1320 case Instruction::URem:
1321 case Instruction::SRem:
1322 case Instruction::FRem:
1323 case Instruction::Shl:
1324 case Instruction::LShr:
1325 case Instruction::AShr:
1326 case Instruction::And:
1327 case Instruction::Or:
1328 case Instruction::Xor:
1329 case Instruction::ICmp:
1330 case Instruction::FCmp:
1331 case Instruction::Trunc:
1332 case Instruction::ZExt:
1333 case Instruction::SExt:
1334 case Instruction::FPToUI:
1335 case Instruction::FPToSI:
1336 case Instruction::UIToFP:
1337 case Instruction::SIToFP:
1338 case Instruction::FPTrunc:
1339 case Instruction::FPExt:
1340 case Instruction::Select:
1341 case Instruction::GetElementPtr: {
1342 SmallVector<Value*, 8> NewOps;
1343 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1344 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1345 Value *V;
1346 // Recursively call evaluateInDifferentElementOrder on vector arguments
1347 // as well. E.g. GetElementPtr may have scalar operands even if the
1348 // return value is a vector, so we need to examine the operand type.
1349 if (I->getOperand(i)->getType()->isVectorTy())
1350 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1351 else
1352 V = I->getOperand(i);
1353 NewOps.push_back(V);
1354 NeedsRebuild |= (V != I->getOperand(i));
1355 }
1356 if (NeedsRebuild) {
1357 return buildNew(I, NewOps);
1358 }
1359 return I;
1360 }
1361 case Instruction::InsertElement: {
1362 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1363
1364 // The insertelement was inserting at Element. Figure out which element
1365 // that becomes after shuffling. The answer is guaranteed to be unique
1366 // by CanEvaluateShuffled.
1367 bool Found = false;
1368 int Index = 0;
1369 for (int e = Mask.size(); Index != e; ++Index) {
1370 if (Mask[Index] == Element) {
1371 Found = true;
1372 break;
1373 }
1374 }
1375
1376 // If element is not in Mask, no need to handle the operand 1 (element to
1377 // be inserted). Just evaluate values in operand 0 according to Mask.
1378 if (!Found)
1379 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1380
1381 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1382 return InsertElementInst::Create(V, I->getOperand(1),
1383 ConstantInt::get(I32Ty, Index), "", I);
1384 }
1385 }
1386 llvm_unreachable("failed to reorder elements of vector instruction!")::llvm::llvm_unreachable_internal("failed to reorder elements of vector instruction!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1386)
;
1387}
1388
1389// Returns true if the shuffle is extracting a contiguous range of values from
1390// LHS, for example:
1391// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1392// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1393// Shuffles to: |EE|FF|GG|HH|
1394// +--+--+--+--+
1395static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1396 SmallVector<int, 16> &Mask) {
1397 unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1398 unsigned MaskElems = Mask.size();
1399 unsigned BegIdx = Mask.front();
1400 unsigned EndIdx = Mask.back();
1401 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1402 return false;
1403 for (unsigned I = 0; I != MaskElems; ++I)
1404 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1405 return false;
1406 return true;
1407}
1408
1409/// These are the ingredients in an alternate form binary operator as described
1410/// below.
1411struct BinopElts {
1412 BinaryOperator::BinaryOps Opcode;
1413 Value *Op0;
1414 Value *Op1;
1415 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1416 Value *V0 = nullptr, Value *V1 = nullptr) :
1417 Opcode(Opc), Op0(V0), Op1(V1) {}
1418 operator bool() const { return Opcode != 0; }
1419};
1420
1421/// Binops may be transformed into binops with different opcodes and operands.
1422/// Reverse the usual canonicalization to enable folds with the non-canonical
1423/// form of the binop. If a transform is possible, return the elements of the
1424/// new binop. If not, return invalid elements.
1425static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1426 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1427 Type *Ty = BO->getType();
1428 switch (BO->getOpcode()) {
1429 case Instruction::Shl: {
1430 // shl X, C --> mul X, (1 << C)
1431 Constant *C;
1432 if (match(BO1, m_Constant(C))) {
1433 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1434 return { Instruction::Mul, BO0, ShlOne };
1435 }
1436 break;
1437 }
1438 case Instruction::Or: {
1439 // or X, C --> add X, C (when X and C have no common bits set)
1440 const APInt *C;
1441 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1442 return { Instruction::Add, BO0, BO1 };
1443 break;
1444 }
1445 default:
1446 break;
1447 }
1448 return {};
1449}
1450
1451static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1452 assert(Shuf.isSelect() && "Must have select-equivalent shuffle")((Shuf.isSelect() && "Must have select-equivalent shuffle"
) ? static_cast<void> (0) : __assert_fail ("Shuf.isSelect() && \"Must have select-equivalent shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1452, __PRETTY_FUNCTION__))
;
1453
1454 // Are we shuffling together some value and that same value after it has been
1455 // modified by a binop with a constant?
1456 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1457 Constant *C;
1458 bool Op0IsBinop;
1459 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1460 Op0IsBinop = true;
1461 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1462 Op0IsBinop = false;
1463 else
1464 return nullptr;
1465
1466 // The identity constant for a binop leaves a variable operand unchanged. For
1467 // a vector, this is a splat of something like 0, -1, or 1.
1468 // If there's no identity constant for this binop, we're done.
1469 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1470 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1471 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1472 if (!IdC)
1473 return nullptr;
1474
1475 // Shuffle identity constants into the lanes that return the original value.
1476 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1477 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1478 // The existing binop constant vector remains in the same operand position.
1479 Constant *Mask = Shuf.getMask();
1480 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1481 ConstantExpr::getShuffleVector(IdC, C, Mask);
1482
1483 bool MightCreatePoisonOrUB =
1484 Mask->containsUndefElement() &&
1485 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1486 if (MightCreatePoisonOrUB)
1487 NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1488
1489 // shuf (bop X, C), X, M --> bop X, C'
1490 // shuf X, (bop X, C), M --> bop X, C'
1491 Value *X = Op0IsBinop ? Op1 : Op0;
1492 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1493 NewBO->copyIRFlags(BO);
1494
1495 // An undef shuffle mask element may propagate as an undef constant element in
1496 // the new binop. That would produce poison where the original code might not.
1497 // If we already made a safe constant, then there's no danger.
1498 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1499 NewBO->dropPoisonGeneratingFlags();
1500 return NewBO;
1501}
1502
1503/// If we have an insert of a scalar to a non-zero element of an undefined
1504/// vector and then shuffle that value, that's the same as inserting to the zero
1505/// element and shuffling. Splatting from the zero element is recognized as the
1506/// canonical form of splat.
1507static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1508 InstCombiner::BuilderTy &Builder) {
1509 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1510 Constant *Mask = Shuf.getMask();
1511 Value *X;
1512 uint64_t IndexC;
1513
1514 // Match a shuffle that is a splat to a non-zero element.
1515 if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1516 m_ConstantInt(IndexC)))) ||
1517 !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
1518 return nullptr;
1519
1520 // Insert into element 0 of an undef vector.
1521 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1522 Constant *Zero = Builder.getInt32(0);
1523 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1524
1525 // Splat from element 0. Any mask element that is undefined remains undefined.
1526 // For example:
1527 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1528 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1529 unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1530 SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1531 for (unsigned i = 0; i != NumMaskElts; ++i)
1532 if (isa<UndefValue>(Mask->getAggregateElement(i)))
1533 NewMask[i] = Mask->getAggregateElement(i);
1534
1535 return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1536}
1537
1538/// Try to fold shuffles that are the equivalent of a vector select.
1539static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
1540 InstCombiner::BuilderTy &Builder,
1541 const DataLayout &DL) {
1542 if (!Shuf.isSelect())
1543 return nullptr;
1544
1545 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1546 // Commuting undef to operand 0 conflicts with another canonicalization.
1547 unsigned NumElts = Shuf.getType()->getVectorNumElements();
1548 if (!isa<UndefValue>(Shuf.getOperand(1)) &&
1549 Shuf.getMaskValue(0) >= (int)NumElts) {
1550 // TODO: Can we assert that both operands of a shuffle-select are not undef
1551 // (otherwise, it would have been folded by instsimplify?
1552 Shuf.commute();
1553 return &Shuf;
1554 }
1555
1556 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1557 return I;
1558
1559 BinaryOperator *B0, *B1;
1560 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1561 !match(Shuf.getOperand(1), m_BinOp(B1)))
1562 return nullptr;
1563
1564 Value *X, *Y;
1565 Constant *C0, *C1;
1566 bool ConstantsAreOp1;
1567 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1568 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1569 ConstantsAreOp1 = true;
1570 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1571 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1572 ConstantsAreOp1 = false;
1573 else
1574 return nullptr;
1575
1576 // We need matching binops to fold the lanes together.
1577 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1578 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1579 bool DropNSW = false;
1580 if (ConstantsAreOp1 && Opc0 != Opc1) {
1581 // TODO: We drop "nsw" if shift is converted into multiply because it may
1582 // not be correct when the shift amount is BitWidth - 1. We could examine
1583 // each vector element to determine if it is safe to keep that flag.
1584 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1585 DropNSW = true;
1586 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1587 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB0.Op1) && \"Expecting constant with alt binop\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1587, __PRETTY_FUNCTION__))
;
1588 Opc0 = AltB0.Opcode;
1589 C0 = cast<Constant>(AltB0.Op1);
1590 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1591 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB1.Op1) && \"Expecting constant with alt binop\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1591, __PRETTY_FUNCTION__))
;
1592 Opc1 = AltB1.Opcode;
1593 C1 = cast<Constant>(AltB1.Op1);
1594 }
1595 }
1596
1597 if (Opc0 != Opc1)
1598 return nullptr;
1599
1600 // The opcodes must be the same. Use a new name to make that clear.
1601 BinaryOperator::BinaryOps BOpc = Opc0;
1602
1603 // Select the constant elements needed for the single binop.
1604 Constant *Mask = Shuf.getMask();
1605 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1606
1607 // We are moving a binop after a shuffle. When a shuffle has an undefined
1608 // mask element, the result is undefined, but it is not poison or undefined
1609 // behavior. That is not necessarily true for div/rem/shift.
1610 bool MightCreatePoisonOrUB =
1611 Mask->containsUndefElement() &&
1612 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
1613 if (MightCreatePoisonOrUB)
1614 NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1615
1616 Value *V;
1617 if (X == Y) {
1618 // Remove a binop and the shuffle by rearranging the constant:
1619 // shuffle (op V, C0), (op V, C1), M --> op V, C'
1620 // shuffle (op C0, V), (op C1, V), M --> op C', V
1621 V = X;
1622 } else {
1623 // If there are 2 different variable operands, we must create a new shuffle
1624 // (select) first, so check uses to ensure that we don't end up with more
1625 // instructions than we started with.
1626 if (!B0->hasOneUse() && !B1->hasOneUse())
1627 return nullptr;
1628
1629 // If we use the original shuffle mask and op1 is *variable*, we would be
1630 // putting an undef into operand 1 of div/rem/shift. This is either UB or
1631 // poison. We do not have to guard against UB when *constants* are op1
1632 // because safe constants guarantee that we do not overflow sdiv/srem (and
1633 // there's no danger for other opcodes).
1634 // TODO: To allow this case, create a new shuffle mask with no undefs.
1635 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1636 return nullptr;
1637
1638 // Note: In general, we do not create new shuffles in InstCombine because we
1639 // do not know if a target can lower an arbitrary shuffle optimally. In this
1640 // case, the shuffle uses the existing mask, so there is no additional risk.
1641
1642 // Select the variable vectors first, then perform the binop:
1643 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1644 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1645 V = Builder.CreateShuffleVector(X, Y, Mask);
1646 }
1647
1648 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1649 BinaryOperator::Create(BOpc, NewC, V);
1650
1651 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1652 // 1. If we changed an opcode, poison conditions might have changed.
1653 // 2. If the shuffle had undef mask elements, the new binop might have undefs
1654 // where the original code did not. But if we already made a safe constant,
1655 // then there's no danger.
1656 NewBO->copyIRFlags(B0);
1657 NewBO->andIRFlags(B1);
1658 if (DropNSW)
1659 NewBO->setHasNoSignedWrap(false);
1660 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
1661 NewBO->dropPoisonGeneratingFlags();
1662 return NewBO;
1663}
1664
1665/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1666/// narrowing (concatenating with undef and extracting back to the original
1667/// length). This allows replacing the wide select with a narrow select.
1668static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1669 InstCombiner::BuilderTy &Builder) {
1670 // This must be a narrowing identity shuffle. It extracts the 1st N elements
1671 // of the 1st vector operand of a shuffle.
1672 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1673 return nullptr;
1674
1675 // The vector being shuffled must be a vector select that we can eliminate.
1676 // TODO: The one-use requirement could be eased if X and/or Y are constants.
1677 Value *Cond, *X, *Y;
1678 if (!match(Shuf.getOperand(0),
1679 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1680 return nullptr;
1681
1682 // We need a narrow condition value. It must be extended with undef elements
1683 // and have the same number of elements as this shuffle.
1684 unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1685 Value *NarrowCond;
1686 if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1687 m_Constant()))) ||
1688 NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1689 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1690 return nullptr;
1691
1692 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1693 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1694 Value *Undef = UndefValue::get(X->getType());
1695 Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1696 Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1697 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1698}
1699
1700/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1701static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
1702 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1703 if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1704 return nullptr;
1705
1706 Value *X, *Y;
1707 Constant *Mask;
1708 if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1709 return nullptr;
1710
1711 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1712 // then combining may result in worse codegen.
1713 if (!Op0->hasOneUse())
1714 return nullptr;
1715
1716 // We are extracting a subvector from a shuffle. Remove excess elements from
1717 // the 1st shuffle mask to eliminate the extract.
1718 //
1719 // This transform is conservatively limited to identity extracts because we do
1720 // not allow arbitrary shuffle mask creation as a target-independent transform
1721 // (because we can't guarantee that will lower efficiently).
1722 //
1723 // If the extracting shuffle has an undef mask element, it transfers to the
1724 // new shuffle mask. Otherwise, copy the original mask element. Example:
1725 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1726 // shuf X, Y, <C0, undef, C2, undef>
1727 unsigned NumElts = Shuf.getType()->getVectorNumElements();
1728 SmallVector<Constant *, 16> NewMask(NumElts);
1729 assert(NumElts < Mask->getType()->getVectorNumElements() &&((NumElts < Mask->getType()->getVectorNumElements() &&
"Identity with extract must have less elements than its inputs"
) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1730, __PRETTY_FUNCTION__))
1730 "Identity with extract must have less elements than its inputs")((NumElts < Mask->getType()->getVectorNumElements() &&
"Identity with extract must have less elements than its inputs"
) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1730, __PRETTY_FUNCTION__))
;
1731
1732 for (unsigned i = 0; i != NumElts; ++i) {
1733 Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1734 Constant *MaskElt = Mask->getAggregateElement(i);
1735 NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1736 }
1737 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1738}
1739
1740/// Try to replace a shuffle with an insertelement or try to replace a shuffle
1741/// operand with the operand of an insertelement.
1742static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
1743 InstCombiner &IC) {
1744 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1745 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1746
1747 // The shuffle must not change vector sizes.
1748 // TODO: This restriction could be removed if the insert has only one use
1749 // (because the transform would require a new length-changing shuffle).
1750 int NumElts = Mask.size();
1751 if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1752 return nullptr;
1753
1754 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
1755 // not be able to handle it there if the insertelement has >1 use.
1756 // If the shuffle has an insertelement operand but does not choose the
1757 // inserted scalar element from that value, then we can replace that shuffle
1758 // operand with the source vector of the insertelement.
1759 Value *X;
1760 uint64_t IdxC;
1761 if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1762 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
1763 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1764 return IC.replaceOperand(Shuf, 0, X);
1765 }
1766 if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1767 // Offset the index constant by the vector width because we are checking for
1768 // accesses to the 2nd vector input of the shuffle.
1769 IdxC += NumElts;
1770 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
1771 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1772 return IC.replaceOperand(Shuf, 1, X);
1773 }
1774
1775 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1776 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1777 // We need an insertelement with a constant index.
1778 if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1779 m_ConstantInt(IndexC))))
1780 return false;
1781
1782 // Test the shuffle mask to see if it splices the inserted scalar into the
1783 // operand 1 vector of the shuffle.
1784 int NewInsIndex = -1;
1785 for (int i = 0; i != NumElts; ++i) {
1786 // Ignore undef mask elements.
1787 if (Mask[i] == -1)
1788 continue;
1789
1790 // The shuffle takes elements of operand 1 without lane changes.
1791 if (Mask[i] == NumElts + i)
1792 continue;
1793
1794 // The shuffle must choose the inserted scalar exactly once.
1795 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1796 return false;
1797
1798 // The shuffle is placing the inserted scalar into element i.
1799 NewInsIndex = i;
1800 }
1801
1802 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")((NewInsIndex != -1 && "Did not fold shuffle with unused operand?"
) ? static_cast<void> (0) : __assert_fail ("NewInsIndex != -1 && \"Did not fold shuffle with unused operand?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1802, __PRETTY_FUNCTION__))
;
1803
1804 // Index is updated to the potentially translated insertion lane.
1805 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1806 return true;
1807 };
1808
1809 // If the shuffle is unnecessary, insert the scalar operand directly into
1810 // operand 1 of the shuffle. Example:
1811 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1812 Value *Scalar;
1813 ConstantInt *IndexC;
1814 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1815 return InsertElementInst::Create(V1, Scalar, IndexC);
1816
1817 // Try again after commuting shuffle. Example:
1818 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1819 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1820 std::swap(V0, V1);
1821 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1822 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1823 return InsertElementInst::Create(V1, Scalar, IndexC);
1824
1825 return nullptr;
1826}
1827
1828static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
1829 // Match the operands as identity with padding (also known as concatenation
1830 // with undef) shuffles of the same source type. The backend is expected to
1831 // recreate these concatenations from a shuffle of narrow operands.
1832 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1833 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1834 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1835 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1836 return nullptr;
1837
1838 // We limit this transform to power-of-2 types because we expect that the
1839 // backend can convert the simplified IR patterns to identical nodes as the
1840 // original IR.
1841 // TODO: If we can verify the same behavior for arbitrary types, the
1842 // power-of-2 checks can be removed.
1843 Value *X = Shuffle0->getOperand(0);
1844 Value *Y = Shuffle1->getOperand(0);
1845 if (X->getType() != Y->getType() ||
1846 !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
1847 !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1848 !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
1849 isa<UndefValue>(X) || isa<UndefValue>(Y))
1850 return nullptr;
1851 assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
1852 isa<UndefValue>(Shuffle1->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
1853 "Unexpected operand for identity shuffle")((isa<UndefValue>(Shuffle0->getOperand(1)) &&
isa<UndefValue>(Shuffle1->getOperand(1)) &&
"Unexpected operand for identity shuffle") ? static_cast<
void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1853, __PRETTY_FUNCTION__))
;
1854
1855 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1856 // operands directly by adjusting the shuffle mask to account for the narrower
1857 // types:
1858 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1859 int NarrowElts = X->getType()->getVectorNumElements();
1860 int WideElts = Shuffle0->getType()->getVectorNumElements();
1861 assert(WideElts > NarrowElts && "Unexpected types for identity with padding")((WideElts > NarrowElts && "Unexpected types for identity with padding"
) ? static_cast<void> (0) : __assert_fail ("WideElts > NarrowElts && \"Unexpected types for identity with padding\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1861, __PRETTY_FUNCTION__))
;
1862
1863 Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1864 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1865 SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1866 for (int i = 0, e = Mask.size(); i != e; ++i) {
1867 if (Mask[i] == -1)
1868 continue;
1869
1870 // If this shuffle is choosing an undef element from 1 of the sources, that
1871 // element is undef.
1872 if (Mask[i] < WideElts) {
1873 if (Shuffle0->getMaskValue(Mask[i]) == -1)
1874 continue;
1875 } else {
1876 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1877 continue;
1878 }
1879
1880 // If this shuffle is choosing from the 1st narrow op, the mask element is
1881 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1882 // element is offset down to adjust for the narrow vector widths.
1883 if (Mask[i] < WideElts) {
1884 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")((Mask[i] < NarrowElts && "Unexpected shuffle mask"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] < NarrowElts && \"Unexpected shuffle mask\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1884, __PRETTY_FUNCTION__))
;
1885 NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1886 } else {
1887 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")((Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask"
) ? static_cast<void> (0) : __assert_fail ("Mask[i] < (WideElts + NarrowElts) && \"Unexpected shuffle mask\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1889 }
1890 }
1891 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1892}
1893
1894Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1895 Value *LHS = SVI.getOperand(0);
1896 Value *RHS = SVI.getOperand(1);
1897 if (auto *V = SimplifyShuffleVectorInst(
1898 LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1899 return replaceInstUsesWith(SVI, V);
1900
1901 // shuffle x, x, mask --> shuffle x, undef, mask'
1902 unsigned VWidth = SVI.getType()->getVectorNumElements();
1903 unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1904 SmallVector<int, 16> Mask = SVI.getShuffleMask();
1905 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
1906 if (LHS == RHS) {
1907 assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?")((!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?"
) ? static_cast<void> (0) : __assert_fail ("!isa<UndefValue>(RHS) && \"Shuffle with 2 undef ops not simplified?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1907, __PRETTY_FUNCTION__))
;
1908 // Remap any references to RHS to use LHS.
1909 SmallVector<Constant*, 16> Elts;
1910 for (unsigned i = 0; i != VWidth; ++i) {
1911 // Propagate undef elements or force mask to LHS.
1912 if (Mask[i] < 0)
1913 Elts.push_back(UndefValue::get(Int32Ty));
1914 else
1915 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i] % LHSWidth));
1916 }
1917 return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()),
1918 ConstantVector::get(Elts));
1919 }
1920
1921 // shuffle undef, x, mask --> shuffle x, undef, mask'
1922 if (isa<UndefValue>(LHS)) {
1923 SVI.commute();
1924 return &SVI;
1925 }
1926
1927 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1928 return I;
1929
1930 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1931 return I;
1932
1933 if (Instruction *I = narrowVectorSelect(SVI, Builder))
1934 return I;
1935
1936 APInt UndefElts(VWidth, 0);
1937 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1938 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1939 if (V != &SVI)
1940 return replaceInstUsesWith(SVI, V);
1941 return &SVI;
1942 }
1943
1944 if (Instruction *I = foldIdentityExtractShuffle(SVI))
1945 return I;
1946
1947 // These transforms have the potential to lose undef knowledge, so they are
1948 // intentionally placed after SimplifyDemandedVectorElts().
1949 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
1950 return I;
1951 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
1952 return I;
1953
1954 if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
1955 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1956 return replaceInstUsesWith(SVI, V);
1957 }
1958
1959 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1960 // a non-vector type. We can instead bitcast the original vector followed by
1961 // an extract of the desired element:
1962 //
1963 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1964 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1965 // %1 = bitcast <4 x i8> %sroa to i32
1966 // Becomes:
1967 // %bc = bitcast <16 x i8> %in to <4 x i32>
1968 // %ext = extractelement <4 x i32> %bc, i32 0
1969 //
1970 // If the shuffle is extracting a contiguous range of values from the input
1971 // vector then each use which is a bitcast of the extracted size can be
1972 // replaced. This will work if the vector types are compatible, and the begin
1973 // index is aligned to a value in the casted vector type. If the begin index
1974 // isn't aligned then we can shuffle the original vector (keeping the same
1975 // vector type) before extracting.
1976 //
1977 // This code will bail out if the target type is fundamentally incompatible
1978 // with vectors of the source type.
1979 //
1980 // Example of <16 x i8>, target type i32:
1981 // Index range [4,8): v-----------v Will work.
1982 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1983 // <16 x i8>: | | | | | | | | | | | | | | | | |
1984 // <4 x i32>: | | | | |
1985 // +-----------+-----------+-----------+-----------+
1986 // Index range [6,10): ^-----------^ Needs an extra shuffle.
1987 // Target type i40: ^--------------^ Won't work, bail.
1988 bool MadeChange = false;
1989 if (isShuffleExtractingFromLHS(SVI, Mask)) {
1990 Value *V = LHS;
1991 unsigned MaskElems = Mask.size();
1992 VectorType *SrcTy = cast<VectorType>(V->getType());
1993 unsigned VecBitWidth = SrcTy->getBitWidth();
1994 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1995 assert(SrcElemBitWidth && "vector elements must have a bitwidth")((SrcElemBitWidth && "vector elements must have a bitwidth"
) ? static_cast<void> (0) : __assert_fail ("SrcElemBitWidth && \"vector elements must have a bitwidth\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 1995, __PRETTY_FUNCTION__))
;
1996 unsigned SrcNumElems = SrcTy->getNumElements();
1997 SmallVector<BitCastInst *, 8> BCs;
1998 DenseMap<Type *, Value *> NewBCs;
1999 for (User *U : SVI.users())
2000 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2001 if (!BC->use_empty())
2002 // Only visit bitcasts that weren't previously handled.
2003 BCs.push_back(BC);
2004 for (BitCastInst *BC : BCs) {
2005 unsigned BegIdx = Mask.front();
2006 Type *TgtTy = BC->getDestTy();
2007 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2008 if (!TgtElemBitWidth)
2009 continue;
2010 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2011 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2012 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2013 if (!VecBitWidthsEqual)
2014 continue;
2015 if (!VectorType::isValidElementType(TgtTy))
2016 continue;
2017 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
2018 if (!BegIsAligned) {
2019 // Shuffle the input so [0,NumElements) contains the output, and
2020 // [NumElems,SrcNumElems) is undef.
2021 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
2022 UndefValue::get(Int32Ty));
2023 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2024 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
2025 V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
2026 ConstantVector::get(ShuffleMask),
2027 SVI.getName() + ".extract");
2028 BegIdx = 0;
2029 }
2030 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2031 assert(SrcElemsPerTgtElem)((SrcElemsPerTgtElem) ? static_cast<void> (0) : __assert_fail
("SrcElemsPerTgtElem", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2031, __PRETTY_FUNCTION__))
;
2032 BegIdx /= SrcElemsPerTgtElem;
2033 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2034 auto *NewBC =
2035 BCAlreadyExists
2036 ? NewBCs[CastSrcTy]
2037 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2038 if (!BCAlreadyExists)
2039 NewBCs[CastSrcTy] = NewBC;
2040 auto *Ext = Builder.CreateExtractElement(
2041 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2042 // The shufflevector isn't being replaced: the bitcast that used it
2043 // is. InstCombine will visit the newly-created instructions.
2044 replaceInstUsesWith(*BC, Ext);
2045 MadeChange = true;
2046 }
2047 }
2048
2049 // If the LHS is a shufflevector itself, see if we can combine it with this
2050 // one without producing an unusual shuffle.
2051 // Cases that might be simplified:
2052 // 1.
2053 // x1=shuffle(v1,v2,mask1)
2054 // x=shuffle(x1,undef,mask)
2055 // ==>
2056 // x=shuffle(v1,undef,newMask)
2057 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2058 // 2.
2059 // x1=shuffle(v1,undef,mask1)
2060 // x=shuffle(x1,x2,mask)
2061 // where v1.size() == mask1.size()
2062 // ==>
2063 // x=shuffle(v1,x2,newMask)
2064 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2065 // 3.
2066 // x2=shuffle(v2,undef,mask2)
2067 // x=shuffle(x1,x2,mask)
2068 // where v2.size() == mask2.size()
2069 // ==>
2070 // x=shuffle(x1,v2,newMask)
2071 // newMask[i] = (mask[i] < x1.size())
2072 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2073 // 4.
2074 // x1=shuffle(v1,undef,mask1)
2075 // x2=shuffle(v2,undef,mask2)
2076 // x=shuffle(x1,x2,mask)
2077 // where v1.size() == v2.size()
2078 // ==>
2079 // x=shuffle(v1,v2,newMask)
2080 // newMask[i] = (mask[i] < x1.size())
2081 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2082 //
2083 // Here we are really conservative:
2084 // we are absolutely afraid of producing a shuffle mask not in the input
2085 // program, because the code gen may not be smart enough to turn a merged
2086 // shuffle into two specific shuffles: it may produce worse code. As such,
2087 // we only merge two shuffles if the result is either a splat or one of the
2088 // input shuffle masks. In this case, merging the shuffles just removes
2089 // one instruction, which we know is safe. This is good for things like
2090 // turning: (splat(splat)) -> splat, or
2091 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2092 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2093 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2094 if (LHSShuffle)
2095 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
2096 LHSShuffle = nullptr;
2097 if (RHSShuffle)
2098 if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
2099 RHSShuffle = nullptr;
2100 if (!LHSShuffle && !RHSShuffle)
2101 return MadeChange ? &SVI : nullptr;
2102
2103 Value* LHSOp0 = nullptr;
2104 Value* LHSOp1 = nullptr;
2105 Value* RHSOp0 = nullptr;
2106 unsigned LHSOp0Width = 0;
2107 unsigned RHSOp0Width = 0;
2108 if (LHSShuffle) {
2109 LHSOp0 = LHSShuffle->getOperand(0);
2110 LHSOp1 = LHSShuffle->getOperand(1);
2111 LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
2112 }
2113 if (RHSShuffle) {
2114 RHSOp0 = RHSShuffle->getOperand(0);
2115 RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
2116 }
2117 Value* newLHS = LHS;
2118 Value* newRHS = RHS;
2119 if (LHSShuffle) {
2120 // case 1
2121 if (isa<UndefValue>(RHS)) {
2122 newLHS = LHSOp0;
2123 newRHS = LHSOp1;
2124 }
2125 // case 2 or 4
2126 else if (LHSOp0Width == LHSWidth) {
2127 newLHS = LHSOp0;
2128 }
2129 }
2130 // case 3 or 4
2131 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2132 newRHS = RHSOp0;
2133 }
2134 // case 4
2135 if (LHSOp0 == RHSOp0) {
2136 newLHS = LHSOp0;
2137 newRHS = nullptr;
2138 }
2139
2140 if (newLHS == LHS && newRHS == RHS)
2141 return MadeChange ? &SVI : nullptr;
2142
2143 SmallVector<int, 16> LHSMask;
2144 SmallVector<int, 16> RHSMask;
2145 if (newLHS != LHS)
2146 LHSMask = LHSShuffle->getShuffleMask();
2147 if (RHSShuffle && newRHS != RHS)
2148 RHSMask = RHSShuffle->getShuffleMask();
2149
2150 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2151 SmallVector<int, 16> newMask;
2152 bool isSplat = true;
2153 int SplatElt = -1;
2154 // Create a new mask for the new ShuffleVectorInst so that the new
2155 // ShuffleVectorInst is equivalent to the original one.
2156 for (unsigned i = 0; i < VWidth; ++i) {
2157 int eltMask;
2158 if (Mask[i] < 0) {
2159 // This element is an undef value.
2160 eltMask = -1;
2161 } else if (Mask[i] < (int)LHSWidth) {
2162 // This element is from left hand side vector operand.
2163 //
2164 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2165 // new mask value for the element.
2166 if (newLHS != LHS) {
2167 eltMask = LHSMask[Mask[i]];
2168 // If the value selected is an undef value, explicitly specify it
2169 // with a -1 mask value.
2170 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2171 eltMask = -1;
2172 } else
2173 eltMask = Mask[i];
2174 } else {
2175 // This element is from right hand side vector operand
2176 //
2177 // If the value selected is an undef value, explicitly specify it
2178 // with a -1 mask value. (case 1)
2179 if (isa<UndefValue>(RHS))
2180 eltMask = -1;
2181 // If RHS is going to be replaced (case 3 or 4), calculate the
2182 // new mask value for the element.
2183 else if (newRHS != RHS) {
2184 eltMask = RHSMask[Mask[i]-LHSWidth];
2185 // If the value selected is an undef value, explicitly specify it
2186 // with a -1 mask value.
2187 if (eltMask >= (int)RHSOp0Width) {
2188 assert(isa<UndefValue>(RHSShuffle->getOperand(1))((isa<UndefValue>(RHSShuffle->getOperand(1)) &&
"should have been check above") ? static_cast<void> (0
) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2189, __PRETTY_FUNCTION__))
2189 && "should have been check above")((isa<UndefValue>(RHSShuffle->getOperand(1)) &&
"should have been check above") ? static_cast<void> (0
) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp"
, 2189, __PRETTY_FUNCTION__))
;
2190 eltMask = -1;
2191 }
2192 } else
2193 eltMask = Mask[i]-LHSWidth;
2194
2195 // If LHS's width is changed, shift the mask value accordingly.
2196 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2197 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2198 // If newRHS == newLHS, we want to remap any references from newRHS to
2199 // newLHS so that we can properly identify splats that may occur due to
2200 // obfuscation across the two vectors.
2201 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2202 eltMask += newLHSWidth;
2203 }
2204
2205 // Check if this could still be a splat.
2206 if (eltMask >= 0) {
2207 if (SplatElt >= 0 && SplatElt != eltMask)
2208 isSplat = false;
2209 SplatElt = eltMask;
2210 }
2211
2212 newMask.push_back(eltMask);
2213 }
2214
2215 // If the result mask is equal to one of the original shuffle masks,
2216 // or is a splat, do the replacement.
2217 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2218 SmallVector<Constant*, 16> Elts;
2219 for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2220 if (newMask[i] < 0) {
2221 Elts.push_back(UndefValue::get(Int32Ty));
2222 } else {
2223 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2224 }
2225 }
2226 if (!newRHS)
2227 newRHS = UndefValue::get(newLHS->getType());
2228 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2229 }
2230
2231 return MadeChange ? &SVI : nullptr;
2232}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45
46namespace llvm {
47namespace PatternMatch {
48
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50 return const_cast<Pattern &>(P).match(V);
16
Calling 'ThreeOps_match::match'
21
Returning from 'ThreeOps_match::match'
22
Returning the value 1, which participates in a condition later
51}
52
53template <typename SubPattern_t> struct OneUse_match {
54 SubPattern_t SubPattern;
55
56 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
57
58 template <typename OpTy> bool match(OpTy *V) {
59 return V->hasOneUse() && SubPattern.match(V);
60 }
61};
62
63template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
64 return SubPattern;
65}
66
67template <typename Class> struct class_match {
68 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
69};
70
71/// Match an arbitrary value and ignore it.
72inline class_match<Value> m_Value() { return class_match<Value>(); }
73
74/// Match an arbitrary binary operation and ignore it.
75inline class_match<BinaryOperator> m_BinOp() {
76 return class_match<BinaryOperator>();
77}
78
79/// Matches any compare instruction and ignore it.
80inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
81
82/// Match an arbitrary ConstantInt and ignore it.
83inline class_match<ConstantInt> m_ConstantInt() {
84 return class_match<ConstantInt>();
85}
86
87/// Match an arbitrary undef constant.
88inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
89
90/// Match an arbitrary Constant and ignore it.
91inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
92
93/// Match an arbitrary basic block value and ignore it.
94inline class_match<BasicBlock> m_BasicBlock() {
95 return class_match<BasicBlock>();
96}
97
98/// Inverting matcher
99template <typename Ty> struct match_unless {
100 Ty M;
101
102 match_unless(const Ty &Matcher) : M(Matcher) {}
103
104 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
105};
106
107/// Match if the inner matcher does *NOT* match.
108template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
109 return match_unless<Ty>(M);
110}
111
112/// Matching combinators
113template <typename LTy, typename RTy> struct match_combine_or {
114 LTy L;
115 RTy R;
116
117 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
118
119 template <typename ITy> bool match(ITy *V) {
120 if (L.match(V))
121 return true;
122 if (R.match(V))
123 return true;
124 return false;
125 }
126};
127
128template <typename LTy, typename RTy> struct match_combine_and {
129 LTy L;
130 RTy R;
131
132 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
133
134 template <typename ITy> bool match(ITy *V) {
135 if (L.match(V))
136 if (R.match(V))
137 return true;
138 return false;
139 }
140};
141
142/// Combine two pattern matchers matching L || R
143template <typename LTy, typename RTy>
144inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
145 return match_combine_or<LTy, RTy>(L, R);
146}
147
148/// Combine two pattern matchers matching L && R
149template <typename LTy, typename RTy>
150inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
151 return match_combine_and<LTy, RTy>(L, R);
152}
153
154struct apint_match {
155 const APInt *&Res;
156 bool AllowUndef;
157
158 apint_match(const APInt *&Res, bool AllowUndef)
159 : Res(Res), AllowUndef(AllowUndef) {}
160
161 template <typename ITy> bool match(ITy *V) {
162 if (auto *CI = dyn_cast<ConstantInt>(V)) {
163 Res = &CI->getValue();
164 return true;
165 }
166 if (V->getType()->isVectorTy())
167 if (const auto *C = dyn_cast<Constant>(V))
168 if (auto *CI = dyn_cast_or_null<ConstantInt>(
169 C->getSplatValue(AllowUndef))) {
170 Res = &CI->getValue();
171 return true;
172 }
173 return false;
174 }
175};
176// Either constexpr if or renaming ConstantFP::getValueAPF to
177// ConstantFP::getValue is needed to do it via single template
178// function for both apint/apfloat.
179struct apfloat_match {
180 const APFloat *&Res;
181 bool AllowUndef;
182
183 apfloat_match(const APFloat *&Res, bool AllowUndef)
184 : Res(Res), AllowUndef(AllowUndef) {}
185
186 template <typename ITy> bool match(ITy *V) {
187 if (auto *CI = dyn_cast<ConstantFP>(V)) {
188 Res = &CI->getValueAPF();
189 return true;
190 }
191 if (V->getType()->isVectorTy())
192 if (const auto *C = dyn_cast<Constant>(V))
193 if (auto *CI = dyn_cast_or_null<ConstantFP>(
194 C->getSplatValue(AllowUndef))) {
195 Res = &CI->getValueAPF();
196 return true;
197 }
198 return false;
199 }
200};
201
202/// Match a ConstantInt or splatted ConstantVector, binding the
203/// specified pointer to the contained APInt.
204inline apint_match m_APInt(const APInt *&Res) {
205 // Forbid undefs by default to maintain previous behavior.
206 return apint_match(Res, /* AllowUndef */ false);
207}
208
209/// Match APInt while allowing undefs in splat vector constants.
210inline apint_match m_APIntAllowUndef(const APInt *&Res) {
211 return apint_match(Res, /* AllowUndef */ true);
212}
213
214/// Match APInt while forbidding undefs in splat vector constants.
215inline apint_match m_APIntForbidUndef(const APInt *&Res) {
216 return apint_match(Res, /* AllowUndef */ false);
217}
218
219/// Match a ConstantFP or splatted ConstantVector, binding the
220/// specified pointer to the contained APFloat.
221inline apfloat_match m_APFloat(const APFloat *&Res) {
222 // Forbid undefs by default to maintain previous behavior.
223 return apfloat_match(Res, /* AllowUndef */ false);
224}
225
226/// Match APFloat while allowing undefs in splat vector constants.
227inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
228 return apfloat_match(Res, /* AllowUndef */ true);
229}
230
231/// Match APFloat while forbidding undefs in splat vector constants.
232inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
233 return apfloat_match(Res, /* AllowUndef */ false);
234}
235
236template <int64_t Val> struct constantint_match {
237 template <typename ITy> bool match(ITy *V) {
238 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
239 const APInt &CIV = CI->getValue();
240 if (Val >= 0)
241 return CIV == static_cast<uint64_t>(Val);
242 // If Val is negative, and CI is shorter than it, truncate to the right
243 // number of bits. If it is larger, then we have to sign extend. Just
244 // compare their negated values.
245 return -CIV == -Val;
246 }
247 return false;
248 }
249};
250
251/// Match a ConstantInt with a specific value.
252template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
253 return constantint_match<Val>();
254}
255
256/// This helper class is used to match scalar and vector integer constants that
257/// satisfy a specified predicate.
258/// For vector constants, undefined elements are ignored.
259template <typename Predicate> struct cst_pred_ty : public Predicate {
260 template <typename ITy> bool match(ITy *V) {
261 if (const auto *CI = dyn_cast<ConstantInt>(V))
262 return this->isValue(CI->getValue());
263 if (V->getType()->isVectorTy()) {
264 if (const auto *C = dyn_cast<Constant>(V)) {
265 if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
266 return this->isValue(CI->getValue());
267
268 // Non-splat vector constant: check each element for a match.
269 unsigned NumElts = V->getType()->getVectorNumElements();
270 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h"
, 270, __PRETTY_FUNCTION__))
;
271 bool HasNonUndefElements = false;
272 for (unsigned i = 0; i != NumElts; ++i) {
273 Constant *Elt = C->getAggregateElement(i);
274 if (!Elt)
275 return false;
276 if (isa<UndefValue>(Elt))
277 continue;
278 auto *CI = dyn_cast<ConstantInt>(Elt);
279 if (!CI || !this->isValue(CI->getValue()))
280 return false;
281 HasNonUndefElements = true;
282 }
283 return HasNonUndefElements;
284 }
285 }
286 return false;
287 }
288};
289
290/// This helper class is used to match scalar and vector constants that
291/// satisfy a specified predicate, and bind them to an APInt.
292template <typename Predicate> struct api_pred_ty : public Predicate {
293 const APInt *&Res;
294
295 api_pred_ty(const APInt *&R) : Res(R) {}
296
297 template <typename ITy> bool match(ITy *V) {
298 if (const auto *CI = dyn_cast<ConstantInt>(V))
299 if (this->isValue(CI->getValue())) {
300 Res = &CI->getValue();
301 return true;
302 }
303 if (V->getType()->isVectorTy())
304 if (const auto *C = dyn_cast<Constant>(V))
305 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
306 if (this->isValue(CI->getValue())) {
307 Res = &CI->getValue();
308 return true;
309 }
310
311 return false;
312 }
313};
314
315/// This helper class is used to match scalar and vector floating-point
316/// constants that satisfy a specified predicate.
317/// For vector constants, undefined elements are ignored.
318template <typename Predicate> struct cstfp_pred_ty : public Predicate {
319 template <typename ITy> bool match(ITy *V) {
320 if (const auto *CF = dyn_cast<ConstantFP>(V))
321 return this->isValue(CF->getValueAPF());
322 if (V->getType()->isVectorTy()) {
323 if (const auto *C = dyn_cast<Constant>(V)) {
324 if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
325 return this->isValue(CF->getValueAPF());
326
327 // Non-splat vector constant: check each element for a match.
328 unsigned NumElts = V->getType()->getVectorNumElements();
329 assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?"
) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h"
, 329, __PRETTY_FUNCTION__))
;
330 bool HasNonUndefElements = false;
331 for (unsigned i = 0; i != NumElts; ++i) {
332 Constant *Elt = C->getAggregateElement(i);
333 if (!Elt)
334 return false;
335 if (isa<UndefValue>(Elt))
336 continue;
337 auto *CF = dyn_cast<ConstantFP>(Elt);
338 if (!CF || !this->isValue(CF->getValueAPF()))
339 return false;
340 HasNonUndefElements = true;
341 }
342 return HasNonUndefElements;
343 }
344 }
345 return false;
346 }
347};
348
349///////////////////////////////////////////////////////////////////////////////
350//
351// Encapsulate constant value queries for use in templated predicate matchers.
352// This allows checking if constants match using compound predicates and works
353// with vector constants, possibly with relaxed constraints. For example, ignore
354// undef values.
355//
356///////////////////////////////////////////////////////////////////////////////
357
358struct is_any_apint {
359 bool isValue(const APInt &C) { return true; }
360};
361/// Match an integer or vector with any integral constant.
362/// For vectors, this includes constants with undefined elements.
363inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
364 return cst_pred_ty<is_any_apint>();
365}
366
367struct is_all_ones {
368 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
369};
370/// Match an integer or vector with all bits set.
371/// For vectors, this includes constants with undefined elements.
372inline cst_pred_ty<is_all_ones> m_AllOnes() {
373 return cst_pred_ty<is_all_ones>();
374}
375
376struct is_maxsignedvalue {
377 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
378};
379/// Match an integer or vector with values having all bits except for the high
380/// bit set (0x7f...).
381/// For vectors, this includes constants with undefined elements.
382inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
383 return cst_pred_ty<is_maxsignedvalue>();
384}
385inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
386 return V;
387}
388
389struct is_negative {
390 bool isValue(const APInt &C) { return C.isNegative(); }
391};
392/// Match an integer or vector of negative values.
393/// For vectors, this includes constants with undefined elements.
394inline cst_pred_ty<is_negative> m_Negative() {
395 return cst_pred_ty<is_negative>();
396}
397inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
398 return V;
399}
400
401struct is_nonnegative {
402 bool isValue(const APInt &C) { return C.isNonNegative(); }
403};
404/// Match an integer or vector of non-negative values.
405/// For vectors, this includes constants with undefined elements.
406inline cst_pred_ty<is_nonnegative> m_NonNegative() {
407 return cst_pred_ty<is_nonnegative>();
408}
409inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
410 return V;
411}
412
413struct is_strictlypositive {
414 bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
415};
416/// Match an integer or vector of strictly positive values.
417/// For vectors, this includes constants with undefined elements.
418inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
419 return cst_pred_ty<is_strictlypositive>();
420}
421inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
422 return V;
423}
424
425struct is_nonpositive {
426 bool isValue(const APInt &C) { return C.isNonPositive(); }
427};
428/// Match an integer or vector of non-positive values.
429/// For vectors, this includes constants with undefined elements.
430inline cst_pred_ty<is_nonpositive> m_NonPositive() {
431 return cst_pred_ty<is_nonpositive>();
432}
433inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
434
435struct is_one {
436 bool isValue(const APInt &C) { return C.isOneValue(); }
437};
438/// Match an integer 1 or a vector with all elements equal to 1.
439/// For vectors, this includes constants with undefined elements.
440inline cst_pred_ty<is_one> m_One() {
441 return cst_pred_ty<is_one>();
442}
443
444struct is_zero_int {
445 bool isValue(const APInt &C) { return C.isNullValue(); }
446};
447/// Match an integer 0 or a vector with all elements equal to 0.
448/// For vectors, this includes constants with undefined elements.
449inline cst_pred_ty<is_zero_int> m_ZeroInt() {
450 return cst_pred_ty<is_zero_int>();
451}
452
453struct is_zero {
454 template <typename ITy> bool match(ITy *V) {
455 auto *C = dyn_cast<Constant>(V);
456 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
457 }
458};
459/// Match any null constant or a vector with all elements equal to 0.
460/// For vectors, this includes constants with undefined elements.
461inline is_zero m_Zero() {
462 return is_zero();
463}
464
465struct is_power2 {
466 bool isValue(const APInt &C) { return C.isPowerOf2(); }
467};
468/// Match an integer or vector power-of-2.
469/// For vectors, this includes constants with undefined elements.
470inline cst_pred_ty<is_power2> m_Power2() {
471 return cst_pred_ty<is_power2>();
472}
473inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
474 return V;
475}
476
477struct is_negated_power2 {
478 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
479};
480/// Match a integer or vector negated power-of-2.
481/// For vectors, this includes constants with undefined elements.
482inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
483 return cst_pred_ty<is_negated_power2>();
484}
485inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
486 return V;
487}
488
489struct is_power2_or_zero {
490 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
491};
492/// Match an integer or vector of 0 or power-of-2 values.
493/// For vectors, this includes constants with undefined elements.
494inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
495 return cst_pred_ty<is_power2_or_zero>();
496}
497inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
498 return V;
499}
500
501struct is_sign_mask {
502 bool isValue(const APInt &C) { return C.isSignMask(); }
503};
504/// Match an integer or vector with only the sign bit(s) set.
505/// For vectors, this includes constants with undefined elements.
506inline cst_pred_ty<is_sign_mask> m_SignMask() {
507 return cst_pred_ty<is_sign_mask>();
508}
509
510struct is_lowbit_mask {
511 bool isValue(const APInt &C) { return C.isMask(); }
512};
513/// Match an integer or vector with only the low bit(s) set.
514/// For vectors, this includes constants with undefined elements.
515inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
516 return cst_pred_ty<is_lowbit_mask>();
517}
518
519struct icmp_pred_with_threshold {
520 ICmpInst::Predicate Pred;
521 const APInt *Thr;
522 bool isValue(const APInt &C) {
523 switch (Pred) {
524 case ICmpInst::Predicate::ICMP_EQ:
525 return C.eq(*Thr);
526 case ICmpInst::Predicate::ICMP_NE:
527 return C.ne(*Thr);
528 case ICmpInst::Predicate::ICMP_UGT:
529 return C.ugt(*Thr);
530 case ICmpInst::Predicate::ICMP_UGE:
531 return C.uge(*Thr);
532 case ICmpInst::Predicate::ICMP_ULT:
533 return C.ult(*Thr);
534 case ICmpInst::Predicate::ICMP_ULE:
535 return C.ule(*Thr);
536 case ICmpInst::Predicate::ICMP_SGT:
537 return C.sgt(*Thr);
538 case ICmpInst::Predicate::ICMP_SGE:
539 return C.sge(*Thr);
540 case ICmpInst::Predicate::ICMP_SLT:
541 return C.slt(*Thr);
542 case ICmpInst::Predicate::ICMP_SLE:
543 return C.sle(*Thr);
544 default:
545 llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate",
"/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h"
, 545)
;
546 }
547 }
548};
549/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
550/// to Threshold. For vectors, this includes constants with undefined elements.
551inline cst_pred_ty<icmp_pred_with_threshold>
552m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
553 cst_pred_ty<icmp_pred_with_threshold> P;
554 P.Pred = Predicate;
555 P.Thr = &Threshold;
556 return P;
557}
558
559struct is_nan {
560 bool isValue(const APFloat &C) { return C.isNaN(); }
561};
562/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
563/// For vectors, this includes constants with undefined elements.
564inline cstfp_pred_ty<is_nan> m_NaN() {
565 return cstfp_pred_ty<is_nan>();
566}
567
568struct is_any_zero_fp {
569 bool isValue(const APFloat &C) { return C.isZero(); }
570};
571/// Match a floating-point negative zero or positive zero.
572/// For vectors, this includes constants with undefined elements.
573inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
574 return cstfp_pred_ty<is_any_zero_fp>();
575}
576
577struct is_pos_zero_fp {
578 bool isValue(const APFloat &C) { return C.isPosZero(); }
579};
580/// Match a floating-point positive zero.
581/// For vectors, this includes constants with undefined elements.
582inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
583 return cstfp_pred_ty<is_pos_zero_fp>();
584}
585
586struct is_neg_zero_fp {
587 bool isValue(const APFloat &C) { return C.isNegZero(); }
588};
589/// Match a floating-point negative zero.
590/// For vectors, this includes constants with undefined elements.
591inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
592 return cstfp_pred_ty<is_neg_zero_fp>();
593}
594
595///////////////////////////////////////////////////////////////////////////////
596
597template <typename Class> struct bind_ty {
598 Class *&VR;
599
600 bind_ty(Class *&V) : VR(V) {}
601
602 template <typename ITy> bool match(ITy *V) {
603 if (auto *CV = dyn_cast<Class>(V)) {
604 VR = CV;
605 return true;
606 }
607 return false;
608 }
609};
610
611/// Match a value, capturing it if we match.
612inline bind_ty<Value> m_Value(Value *&V) { return V; }
613inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
614
615/// Match an instruction, capturing it if we match.
616inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
617/// Match a binary operator, capturing it if we match.
618inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
619/// Match a with overflow intrinsic, capturing it if we match.
620inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
621
622/// Match a ConstantInt, capturing the value if we match.
623inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
624
625/// Match a Constant, capturing the value if we match.
626inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
627
628/// Match a ConstantFP, capturing the value if we match.
629inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
630
631/// Match a basic block value, capturing it if we match.
632inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
633inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
634 return V;
635}
636
637/// Match a specified Value*.
638struct specificval_ty {
639 const Value *Val;
640
641 specificval_ty(const Value *V) : Val(V) {}
642
643 template <typename ITy> bool match(ITy *V) { return V == Val; }
644};
645
646/// Match if we have a specific specified value.
647inline specificval_ty m_Specific(const Value *V) { return V; }
648
649/// Stores a reference to the Value *, not the Value * itself,
650/// thus can be used in commutative matchers.
651template <typename Class> struct deferredval_ty {
652 Class *const &Val;
653
654 deferredval_ty(Class *const &V) : Val(V) {}
655
656 template <typename ITy> bool match(ITy *const V) { return V == Val; }
657};
658
659/// A commutative-friendly version of m_Specific().
660inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
661inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
662 return V;
663}
664
665/// Match a specified floating point value or vector of all elements of
666/// that value.
667struct specific_fpval {
668 double Val;
669
670 specific_fpval(double V) : Val(V) {}
671
672 template <typename ITy> bool match(ITy *V) {
673 if (const auto *CFP = dyn_cast<ConstantFP>(V))
674 return CFP->isExactlyValue(Val);
675 if (V->getType()->isVectorTy())
676 if (const auto *C = dyn_cast<Constant>(V))
677 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
678 return CFP->isExactlyValue(Val);
679 return false;
680 }
681};
682
683/// Match a specific floating point value or vector with all elements
684/// equal to the value.
685inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
686
687/// Match a float 1.0 or vector with all elements equal to 1.0.
688inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
689
690struct bind_const_intval_ty {
691 uint64_t &VR;
692
693 bind_const_intval_ty(uint64_t &V) : VR(V) {}
694
695 template <typename ITy> bool match(ITy *V) {
696 if (const auto *CV = dyn_cast<ConstantInt>(V))
697 if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) {
698 VR = CV->getZExtValue();
699 return true;
700 }
701 return false;
702 }
703};
704
705/// Match a specified integer value or vector of all elements of that
706/// value.
707struct specific_intval {
708 APInt Val;
709
710 specific_intval(APInt V) : Val(std::move(V)) {}
711
712 template <typename ITy> bool match(ITy *V) {
713 const auto *CI = dyn_cast<ConstantInt>(V);
714 if (!CI && V->getType()->isVectorTy())
715 if (const auto *C = dyn_cast<Constant>(V))
716 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue());
717
718 return CI && APInt::isSameValue(CI->getValue(), Val);
719 }
720};
721
722/// Match a specific integer value or vector with all elements equal to
723/// the value.
724inline specific_intval m_SpecificInt(APInt V) {
725 return specific_intval(std::move(V));
726}
727
728inline specific_intval m_SpecificInt(uint64_t V) {
729 return m_SpecificInt(APInt(64, V));
730}
731
732/// Match a ConstantInt and bind to its value. This does not match
733/// ConstantInts wider than 64-bits.
734inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
735
736/// Match a specified basic block value.
737struct specific_bbval {
738 BasicBlock *Val;
739
740 specific_bbval(BasicBlock *Val) : Val(Val) {}
741
742 template <typename ITy> bool match(ITy *V) {
743 const auto *BB = dyn_cast<BasicBlock>(V);
744 return BB && BB == Val;
745 }
746};
747
748/// Match a specific basic block value.
749inline specific_bbval m_SpecificBB(BasicBlock *BB) {
750 return specific_bbval(BB);
751}
752
753/// A commutative-friendly version of m_Specific().
754inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
755 return BB;
756}
757inline deferredval_ty<const BasicBlock>
758m_Deferred(const BasicBlock *const &BB) {
759 return BB;
760}
761
762//===----------------------------------------------------------------------===//
763// Matcher for any binary operator.
764//
765template <typename LHS_t, typename RHS_t, bool Commutable = false>
766struct AnyBinaryOp_match {
767 LHS_t L;
768 RHS_t R;
769
770 // The evaluation order is always stable, regardless of Commutability.
771 // The LHS is always matched first.
772 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
773
774 template <typename OpTy> bool match(OpTy *V) {
775 if (auto *I = dyn_cast<BinaryOperator>(V))
776 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
777 (Commutable && L.match(I->getOperand(1)) &&
778 R.match(I->getOperand(0)));
779 return false;
780 }
781};
782
783template <typename LHS, typename RHS>
784inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
785 return AnyBinaryOp_match<LHS, RHS>(L, R);
786}
787
788//===----------------------------------------------------------------------===//
789// Matchers for specific binary operators.
790//
791
792template <typename LHS_t, typename RHS_t, unsigned Opcode,
793 bool Commutable = false>
794struct BinaryOp_match {
795 LHS_t L;
796 RHS_t R;
797
798 // The evaluation order is always stable, regardless of Commutability.
799 // The LHS is always matched first.
800 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
801
802 template <typename OpTy> bool match(OpTy *V) {
803 if (V->getValueID() == Value::InstructionVal + Opcode) {
804 auto *I = cast<BinaryOperator>(V);
805 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
806 (Commutable && L.match(I->getOperand(1)) &&
807 R.match(I->getOperand(0)));
808 }
809 if (auto *CE = dyn_cast<ConstantExpr>(V))
810 return CE->getOpcode() == Opcode &&
811 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
812 (Commutable && L.match(CE->getOperand(1)) &&
813 R.match(CE->getOperand(0))));
814 return false;
815 }
816};
817
818template <typename LHS, typename RHS>
819inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
820 const RHS &R) {
821 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
822}
823
824template <typename LHS, typename RHS>
825inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
826 const RHS &R) {
827 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
828}
829
830template <typename LHS, typename RHS>
831inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
832 const RHS &R) {
833 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
834}
835
836template <typename LHS, typename RHS>
837inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
838 const RHS &R) {
839 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
840}
841
842template <typename Op_t> struct FNeg_match {
843 Op_t X;
844
845 FNeg_match(const Op_t &Op) : X(Op) {}
846 template <typename OpTy> bool match(OpTy *V) {
847 auto *FPMO = dyn_cast<FPMathOperator>(V);
848 if (!FPMO) return false;
849
850 if (FPMO->getOpcode() == Instruction::FNeg)
851 return X.match(FPMO->getOperand(0));
852
853 if (FPMO->getOpcode() == Instruction::FSub) {
854 if (FPMO->hasNoSignedZeros()) {
855 // With 'nsz', any zero goes.
856 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
857 return false;
858 } else {
859 // Without 'nsz', we need fsub -0.0, X exactly.
860 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
861 return false;
862 }
863
864 return X.match(FPMO->getOperand(1));
865 }
866
867 return false;
868 }
869};
870
871/// Match 'fneg X' as 'fsub -0.0, X'.
872template <typename OpTy>
873inline FNeg_match<OpTy>
874m_FNeg(const OpTy &X) {
875 return FNeg_match<OpTy>(X);
876}
877
878/// Match 'fneg X' as 'fsub +-0.0, X'.
879template <typename RHS>
880inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
881m_FNegNSZ(const RHS &X) {
882 return m_FSub(m_AnyZeroFP(), X);
883}
884
885template <typename LHS, typename RHS>
886inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
887 const RHS &R) {
888 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
889}
890
891template <typename LHS, typename RHS>
892inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
893 const RHS &R) {
894 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
895}
896
897template <typename LHS, typename RHS>
898inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
899 const RHS &R) {
900 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
901}
902
903template <typename LHS, typename RHS>
904inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
905 const RHS &R) {
906 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
907}
908
909template <typename LHS, typename RHS>
910inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
911 const RHS &R) {
912 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
913}
914
915template <typename LHS, typename RHS>
916inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
917 const RHS &R) {
918 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
919}
920
921template <typename LHS, typename RHS>
922inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
923 const RHS &R) {
924 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
925}
926
927template <typename LHS, typename RHS>
928inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
929 const RHS &R) {
930 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
931}
932
933template <typename LHS, typename RHS>
934inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
935 const RHS &R) {
936 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
937}
938
939template <typename LHS, typename RHS>
940inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
941 const RHS &R) {
942 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
943}
944
945template <typename LHS, typename RHS>
946inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
947 const RHS &R) {
948 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
949}
950
951template <typename LHS, typename RHS>
952inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
953 const RHS &R) {
954 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
955}
956
957template <typename LHS, typename RHS>
958inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
959 const RHS &R) {
960 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
961}
962
963template <typename LHS, typename RHS>
964inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
965 const RHS &R) {
966 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
967}
968
969template <typename LHS_t, typename RHS_t, unsigned Opcode,
970 unsigned WrapFlags = 0>
971struct OverflowingBinaryOp_match {
972 LHS_t L;
973 RHS_t R;
974
975 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
976 : L(LHS), R(RHS) {}
977
978 template <typename OpTy> bool match(OpTy *V) {
979 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
980 if (Op->getOpcode() != Opcode)
981 return false;
982 if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap &&
983 !Op->hasNoUnsignedWrap())
984 return false;
985 if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap &&
986 !Op->hasNoSignedWrap())
987 return false;
988 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
989 }
990 return false;
991 }
992};
993
994template <typename LHS, typename RHS>
995inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
996 OverflowingBinaryOperator::NoSignedWrap>
997m_NSWAdd(const LHS &L, const RHS &R) {
998 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
999 OverflowingBinaryOperator::NoSignedWrap>(
1000 L, R);
1001}
1002template <typename LHS, typename RHS>
1003inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1004 OverflowingBinaryOperator::NoSignedWrap>
1005m_NSWSub(const LHS &L, const RHS &R) {
1006 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1007 OverflowingBinaryOperator::NoSignedWrap>(
1008 L, R);
1009}
1010template <typename LHS, typename RHS>
1011inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1012 OverflowingBinaryOperator::NoSignedWrap>
1013m_NSWMul(const LHS &L, const RHS &R) {
1014 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1015 OverflowingBinaryOperator::NoSignedWrap>(
1016 L, R);
1017}
1018template <typename LHS, typename RHS>
1019inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1020 OverflowingBinaryOperator::NoSignedWrap>
1021m_NSWShl(const LHS &L, const RHS &R) {
1022 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1023 OverflowingBinaryOperator::NoSignedWrap>(
1024 L, R);
1025}
1026
1027template <typename LHS, typename RHS>
1028inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1029 OverflowingBinaryOperator::NoUnsignedWrap>
1030m_NUWAdd(const LHS &L, const RHS &R) {
1031 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1032 OverflowingBinaryOperator::NoUnsignedWrap>(
1033 L, R);
1034}
1035template <typename LHS, typename RHS>
1036inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1037 OverflowingBinaryOperator::NoUnsignedWrap>
1038m_NUWSub(const LHS &L, const RHS &R) {
1039 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1040 OverflowingBinaryOperator::NoUnsignedWrap>(
1041 L, R);
1042}
1043template <typename LHS, typename RHS>
1044inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1045 OverflowingBinaryOperator::NoUnsignedWrap>
1046m_NUWMul(const LHS &L, const RHS &R) {
1047 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1048 OverflowingBinaryOperator::NoUnsignedWrap>(
1049 L, R);
1050}
1051template <typename LHS, typename RHS>
1052inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1053 OverflowingBinaryOperator::NoUnsignedWrap>
1054m_NUWShl(const LHS &L, const RHS &R) {
1055 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1056 OverflowingBinaryOperator::NoUnsignedWrap>(
1057 L, R);
1058}
1059
1060//===----------------------------------------------------------------------===//
1061// Class that matches a group of binary opcodes.
1062//
1063template <typename LHS_t, typename RHS_t, typename Predicate>
1064struct BinOpPred_match : Predicate {
1065 LHS_t L;
1066 RHS_t R;
1067
1068 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1069
1070 template <typename OpTy> bool match(OpTy *V) {
1071 if (auto *I = dyn_cast<Instruction>(V))
1072 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1073 R.match(I->getOperand(1));
1074 if (auto *CE = dyn_cast<ConstantExpr>(V))
1075 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1076 R.match(CE->getOperand(1));
1077 return false;
1078 }
1079};
1080
1081struct is_shift_op {
1082 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1083};
1084
1085struct is_right_shift_op {
1086 bool isOpType(unsigned Opcode) {
1087 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1088 }
1089};
1090
1091struct is_logical_shift_op {
1092 bool isOpType(unsigned Opcode) {
1093 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1094 }
1095};
1096
1097struct is_bitwiselogic_op {
1098 bool isOpType(unsigned Opcode) {
1099 return Instruction::isBitwiseLogicOp(Opcode);
1100 }
1101};
1102
1103struct is_idiv_op {
1104 bool isOpType(unsigned Opcode) {
1105 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1106 }
1107};
1108
1109struct is_irem_op {
1110 bool isOpType(unsigned Opcode) {
1111 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1112 }
1113};
1114
1115/// Matches shift operations.
1116template <typename LHS, typename RHS>
1117inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1118 const RHS &R) {
1119 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1120}
1121
1122/// Matches logical shift operations.
1123template <typename LHS, typename RHS>
1124inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1125 const RHS &R) {
1126 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1127}
1128
1129/// Matches logical shift operations.
1130template <typename LHS, typename RHS>
1131inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1132m_LogicalShift(const LHS &L, const RHS &R) {
1133 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1134}
1135
1136/// Matches bitwise logic operations.
1137template <typename LHS, typename RHS>
1138inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1139m_BitwiseLogic(const LHS &L, const RHS &R) {
1140 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1141}
1142
1143/// Matches integer division operations.
1144template <typename LHS, typename RHS>
1145inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1146 const RHS &R) {
1147 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1148}
1149
1150/// Matches integer remainder operations.
1151template <typename LHS, typename RHS>
1152inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1153 const RHS &R) {
1154 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1155}
1156
1157//===----------------------------------------------------------------------===//
1158// Class that matches exact binary ops.
1159//
1160template <typename SubPattern_t> struct Exact_match {
1161 SubPattern_t SubPattern;
1162
1163 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1164
1165 template <typename OpTy> bool match(OpTy *V) {
1166 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1167 return PEO->isExact() && SubPattern.match(V);
1168 return false;
1169 }
1170};
1171
1172template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1173 return SubPattern;
1174}
1175
1176//===----------------------------------------------------------------------===//
1177// Matchers for CmpInst classes
1178//
1179
1180template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1181 bool Commutable = false>
1182struct CmpClass_match {
1183 PredicateTy &Predicate;
1184 LHS_t L;
1185 RHS_t R;
1186
1187 // The evaluation order is always stable, regardless of Commutability.
1188 // The LHS is always matched first.
1189 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1190 : Predicate(Pred), L(LHS), R(RHS) {}
1191
1192 template <typename OpTy> bool match(OpTy *V) {
1193 if (auto *I = dyn_cast<Class>(V)) {
1194 if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1195 Predicate = I->getPredicate();
1196 return true;
1197 } else if (Commutable && L.match(I->getOperand(1)) &&
1198 R.match(I->getOperand(0))) {
1199 Predicate = I->getSwappedPredicate();
1200 return true;
1201 }
1202 }
1203 return false;
1204 }
1205};
1206
1207template <typename LHS, typename RHS>
1208inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1209m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1210 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1211}
1212
1213template <typename LHS, typename RHS>
1214inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1215m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1216 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1217}
1218
1219template <typename LHS, typename RHS>
1220inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1221m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1222 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1223}
1224
1225//===----------------------------------------------------------------------===//
1226// Matchers for instructions with a given opcode and number of operands.
1227//
1228
1229/// Matches instructions with Opcode and three operands.
1230template <typename T0, unsigned Opcode> struct OneOps_match {
1231 T0 Op1;
1232
1233 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1234
1235 template <typename OpTy> bool match(OpTy *V) {
1236 if (V->getValueID() == Value::InstructionVal + Opcode) {
1237 auto *I = cast<Instruction>(V);
1238 return Op1.match(I->getOperand(0));
1239 }
1240 return false;
1241 }
1242};
1243
1244/// Matches instructions with Opcode and three operands.
1245template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1246 T0 Op1;
1247 T1 Op2;
1248
1249 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1250
1251 template <typename OpTy> bool match(OpTy *V) {
1252 if (V->getValueID() == Value::InstructionVal + Opcode) {
1253 auto *I = cast<Instruction>(V);
1254 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1255 }
1256 return false;
1257 }
1258};
1259
1260/// Matches instructions with Opcode and three operands.
1261template <typename T0, typename T1, typename T2, unsigned Opcode>
1262struct ThreeOps_match {
1263 T0 Op1;
1264 T1 Op2;
1265 T2 Op3;
1266
1267 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1268 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1269
1270 template <typename OpTy> bool match(OpTy *V) {
1271 if (V->getValueID() == Value::InstructionVal + Opcode) {
17
Assuming the condition is true
18
Taking true branch
1272 auto *I = cast<Instruction>(V);
19
'V' is a 'Instruction'
1273 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
20
Returning the value 1, which participates in a condition later
1274 Op3.match(I->getOperand(2));
1275 }
1276 return false;
1277 }
1278};
1279
1280/// Matches SelectInst.
1281template <typename Cond, typename LHS, typename RHS>
1282inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1283m_Select(const Cond &C, const LHS &L, const RHS &R) {
1284 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1285}
1286
1287/// This matches a select of two constants, e.g.:
1288/// m_SelectCst<-1, 0>(m_Value(V))
1289template <int64_t L, int64_t R, typename Cond>
1290inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1291 Instruction::Select>
1292m_SelectCst(const Cond &C) {
1293 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1294}
1295
1296/// Matches FreezeInst.
1297template <typename OpTy>
1298inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1299 return OneOps_match<OpTy, Instruction::Freeze>(Op);
1300}
1301
1302/// Matches InsertElementInst.
1303template <typename Val_t, typename Elt_t, typename Idx_t>
1304inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1305m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1306 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1307 Val, Elt, Idx);
1308}
1309
1310/// Matches ExtractElementInst.
1311template <typename Val_t, typename Idx_t>
1312inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1313m_ExtractElement(const Val_t &Val, const Idx_t &Idx) {
1314 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1315}
1316
1317/// Matches ShuffleVectorInst.
1318template <typename V1_t, typename V2_t, typename Mask_t>
1319inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>
1320m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) {
1321 return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2,
1322 m);
1323}
1324
1325/// Matches LoadInst.
1326template <typename OpTy>
1327inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1328 return OneOps_match<OpTy, Instruction::Load>(Op);
1329}
1330
1331/// Matches StoreInst.
1332template <typename ValueOpTy, typename PointerOpTy>
1333inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1334m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1335 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1336 PointerOp);
1337}
1338
1339//===----------------------------------------------------------------------===//
1340// Matchers for CastInst classes
1341//
1342
1343template <typename Op_t, unsigned Opcode> struct CastClass_match {
1344 Op_t Op;
1345
1346 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1347
1348 template <typename OpTy> bool match(OpTy *V) {
1349 if (auto *O = dyn_cast<Operator>(V))
1350 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1351 return false;
1352 }
1353};
1354
1355/// Matches BitCast.
1356template <typename OpTy>
1357inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1358 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1359}
1360
1361/// Matches PtrToInt.
1362template <typename OpTy>
1363inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1364 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1365}
1366
1367/// Matches Trunc.
1368template <typename OpTy>
1369inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1370 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1371}
1372
1373template <typename OpTy>
1374inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1375m_TruncOrSelf(const OpTy &Op) {
1376 return m_CombineOr(m_Trunc(Op), Op);
1377}
1378
1379/// Matches SExt.
1380template <typename OpTy>
1381inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1382 return CastClass_match<OpTy, Instruction::SExt>(Op);
1383}
1384
1385/// Matches ZExt.
1386template <typename OpTy>
1387inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1388 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1389}
1390
1391template <typename OpTy>
1392inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1393m_ZExtOrSelf(const OpTy &Op) {
1394 return m_CombineOr(m_ZExt(Op), Op);
1395}
1396
1397template <typename OpTy>
1398inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1399m_SExtOrSelf(const OpTy &Op) {
1400 return m_CombineOr(m_SExt(Op), Op);
1401}
1402
1403template <typename OpTy>
1404inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1405 CastClass_match<OpTy, Instruction::SExt>>
1406m_ZExtOrSExt(const OpTy &Op) {
1407 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1408}
1409
1410template <typename OpTy>
1411inline match_combine_or<
1412 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1413 CastClass_match<OpTy, Instruction::SExt>>,
1414 OpTy>
1415m_ZExtOrSExtOrSelf(const OpTy &Op) {
1416 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1417}
1418
1419/// Matches UIToFP.
1420template <typename OpTy>
1421inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1422 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1423}
1424
1425/// Matches SIToFP.
1426template <typename OpTy>
1427inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1428 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1429}
1430
1431/// Matches FPTrunc
1432template <typename OpTy>
1433inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1434 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1435}
1436
1437/// Matches FPExt
1438template <typename OpTy>
1439inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1440 return CastClass_match<OpTy, Instruction::FPExt>(Op);
1441}
1442
1443//===----------------------------------------------------------------------===//
1444// Matchers for control flow.
1445//
1446
1447struct br_match {
1448 BasicBlock *&Succ;
1449
1450 br_match(BasicBlock *&Succ) : Succ(Succ) {}
1451
1452 template <typename OpTy> bool match(OpTy *V) {
1453 if (auto *BI = dyn_cast<BranchInst>(V))
1454 if (BI->isUnconditional()) {
1455 Succ = BI->getSuccessor(0);
1456 return true;
1457 }
1458 return false;
1459 }
1460};
1461
1462inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1463
1464template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1465struct brc_match {
1466 Cond_t Cond;
1467 TrueBlock_t T;
1468 FalseBlock_t F;
1469
1470 brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1471 : Cond(C), T(t), F(f) {}
1472
1473 template <typename OpTy> bool match(OpTy *V) {
1474 if (auto *BI = dyn_cast<BranchInst>(V))
1475 if (BI->isConditional() && Cond.match(BI->getCondition()))
1476 return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1477 return false;
1478 }
1479};
1480
1481template <typename Cond_t>
1482inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1483m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1484 return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1485 C, m_BasicBlock(T), m_BasicBlock(F));
1486}
1487
1488template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1489inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1490m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1491 return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1492}
1493
1494//===----------------------------------------------------------------------===//
1495// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1496//
1497
1498template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1499 bool Commutable = false>
1500struct MaxMin_match {
1501 LHS_t L;
1502 RHS_t R;
1503
1504 // The evaluation order is always stable, regardless of Commutability.
1505 // The LHS is always matched first.
1506 MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1507
1508 template <typename OpTy> bool match(OpTy *V) {
1509 // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1510 auto *SI = dyn_cast<SelectInst>(V);
1511 if (!SI)
1512 return false;
1513 auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1514 if (!Cmp)
1515 return false;
1516 // At this point we have a select conditioned on a comparison. Check that
1517 // it is the values returned by the select that are being compared.
1518 Value *TrueVal = SI->getTrueValue();
1519 Value *FalseVal = SI->getFalseValue();
1520 Value *LHS = Cmp->getOperand(0);
1521 Value *RHS = Cmp->getOperand(1);
1522 if ((TrueVal != LHS || FalseVal != RHS) &&
1523 (TrueVal != RHS || FalseVal != LHS))
1524 return false;
1525 typename CmpInst_t::Predicate Pred =
1526 LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1527 // Does "(x pred y) ? x : y" represent the desired max/min operation?
1528 if (!Pred_t::match(Pred))
1529 return false;
1530 // It does! Bind the operands.
1531 return (L.match(LHS) && R.match(RHS)) ||
1532 (Commutable && L.match(RHS) && R.match(LHS));
1533 }
1534};
1535
1536/// Helper class for identifying signed max predicates.
1537struct smax_pred_ty {
1538 static bool match(ICmpInst::Predicate Pred) {
1539 return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1540 }
1541};
1542
1543/// Helper class for identifying signed min predicates.
1544struct smin_pred_ty {
1545 static bool match(ICmpInst::Predicate Pred) {
1546 return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1547 }
1548};
1549
1550/// Helper class for identifying unsigned max predicates.
1551struct umax_pred_ty {
1552 static bool match(ICmpInst::Predicate Pred) {
1553 return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1554 }
1555};
1556
1557/// Helper class for identifying unsigned min predicates.
1558struct umin_pred_ty {
1559 static bool match(ICmpInst::Predicate Pred) {
1560 return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1561 }
1562};
1563
1564/// Helper class for identifying ordered max predicates.
1565struct ofmax_pred_ty {
1566 static bool match(FCmpInst::Predicate Pred) {
1567 return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1568 }
1569};
1570
1571/// Helper class for identifying ordered min predicates.
1572struct ofmin_pred_ty {
1573 static bool match(FCmpInst::Predicate Pred) {
1574 return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1575 }
1576};
1577
1578/// Helper class for identifying unordered max predicates.
1579struct ufmax_pred_ty {
1580 static bool match(FCmpInst::Predicate Pred) {
1581 return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1582 }
1583};
1584
1585/// Helper class for identifying unordered min predicates.
1586struct ufmin_pred_ty {
1587 static bool match(FCmpInst::Predicate Pred) {
1588 return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1589 }
1590};
1591
1592template <typename LHS, typename RHS>
1593inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1594 const RHS &R) {
1595 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1596}
1597
1598template <typename LHS, typename RHS>
1599inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1600 const RHS &R) {
1601 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1602}
1603
1604template <typename LHS, typename RHS>
1605inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1606 const RHS &R) {
1607 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1608}
1609
1610template <typename LHS, typename RHS>
1611inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1612 const RHS &R) {
1613 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1614}
1615
1616/// Match an 'ordered' floating point maximum function.
1617/// Floating point has one special value 'NaN'. Therefore, there is no total
1618/// order. However, if we can ignore the 'NaN' value (for example, because of a
1619/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1620/// semantics. In the presence of 'NaN' we have to preserve the original
1621/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1622///
1623/// max(L, R) iff L and R are not NaN
1624/// m_OrdFMax(L, R) = R iff L or R are NaN
1625template <typename LHS, typename RHS>
1626inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1627 const RHS &R) {
1628 return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1629}
1630
1631/// Match an 'ordered' floating point minimum function.
1632/// Floating point has one special value 'NaN'. Therefore, there is no total
1633/// order. However, if we can ignore the 'NaN' value (for example, because of a
1634/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1635/// semantics. In the presence of 'NaN' we have to preserve the original
1636/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1637///
1638/// min(L, R) iff L and R are not NaN
1639/// m_OrdFMin(L, R) = R iff L or R are NaN
1640template <typename LHS, typename RHS>
1641inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1642 const RHS &R) {
1643 return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1644}
1645
1646/// Match an 'unordered' floating point maximum function.
1647/// Floating point has one special value 'NaN'. Therefore, there is no total
1648/// order. However, if we can ignore the 'NaN' value (for example, because of a
1649/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1650/// semantics. In the presence of 'NaN' we have to preserve the original
1651/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1652///
1653/// max(L, R) iff L and R are not NaN
1654/// m_UnordFMax(L, R) = L iff L or R are NaN
1655template <typename LHS, typename RHS>
1656inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1657m_UnordFMax(const LHS &L, const RHS &R) {
1658 return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1659}
1660
1661/// Match an 'unordered' floating point minimum function.
1662/// Floating point has one special value 'NaN'. Therefore, there is no total
1663/// order. However, if we can ignore the 'NaN' value (for example, because of a
1664/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1665/// semantics. In the presence of 'NaN' we have to preserve the original
1666/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1667///
1668/// min(L, R) iff L and R are not NaN
1669/// m_UnordFMin(L, R) = L iff L or R are NaN
1670template <typename LHS, typename RHS>
1671inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1672m_UnordFMin(const LHS &L, const RHS &R) {
1673 return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1674}
1675
1676//===----------------------------------------------------------------------===//
1677// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
1678// Note that S might be matched to other instructions than AddInst.
1679//
1680
1681template <typename LHS_t, typename RHS_t, typename Sum_t>
1682struct UAddWithOverflow_match {
1683 LHS_t L;
1684 RHS_t R;
1685 Sum_t S;
1686
1687 UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1688 : L(L), R(R), S(S) {}
1689
1690 template <typename OpTy> bool match(OpTy *V) {
1691 Value *ICmpLHS, *ICmpRHS;
1692 ICmpInst::Predicate Pred;
1693 if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1694 return false;
1695
1696 Value *AddLHS, *AddRHS;
1697 auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1698
1699 // (a + b) u< a, (a + b) u< b
1700 if (Pred == ICmpInst::ICMP_ULT)
1701 if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1702 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1703
1704 // a >u (a + b), b >u (a + b)
1705 if (Pred == ICmpInst::ICMP_UGT)
1706 if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1707 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1708
1709 Value *Op1;
1710 auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
1711 // (a ^ -1) <u b
1712 if (Pred == ICmpInst::ICMP_ULT) {
1713 if (XorExpr.match(ICmpLHS))
1714 return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
1715 }
1716 // b > u (a ^ -1)
1717 if (Pred == ICmpInst::ICMP_UGT) {
1718 if (XorExpr.match(ICmpRHS))
1719 return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
1720 }
1721
1722 // Match special-case for increment-by-1.
1723 if (Pred == ICmpInst::ICMP_EQ) {
1724 // (a + 1) == 0
1725 // (1 + a) == 0
1726 if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
1727 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1728 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1729 // 0 == (a + 1)
1730 // 0 == (1 + a)
1731 if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
1732 (m_One().match(AddLHS) || m_One().match(AddRHS)))
1733 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1734 }
1735
1736 return false;
1737 }
1738};
1739
1740/// Match an icmp instruction checking for unsigned overflow on addition.
1741///
1742/// S is matched to the addition whose result is being checked for overflow, and
1743/// L and R are matched to the LHS and RHS of S.
1744template <typename LHS_t, typename RHS_t, typename Sum_t>
1745UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
1746m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
1747 return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
1748}
1749
1750template <typename Opnd_t> struct Argument_match {
1751 unsigned OpI;
1752 Opnd_t Val;
1753
1754 Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
1755
1756 template <typename OpTy> bool match(OpTy *V) {
1757 // FIXME: Should likely be switched to use `CallBase`.
1758 if (const auto *CI = dyn_cast<CallInst>(V))
1759 return Val.match(CI->getArgOperand(OpI));
1760 return false;
1761 }
1762};
1763
1764/// Match an argument.
1765template <unsigned OpI, typename Opnd_t>
1766inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
1767 return Argument_match<Opnd_t>(OpI, Op);
1768}
1769
1770/// Intrinsic matchers.
1771struct IntrinsicID_match {
1772 unsigned ID;
1773
1774 IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
1775
1776 template <typename OpTy> bool match(OpTy *V) {
1777 if (const auto *CI = dyn_cast<CallInst>(V))
1778 if (const auto *F = CI->getCalledFunction())
1779 return F->getIntrinsicID() == ID;
1780 return false;
1781 }
1782};
1783
1784/// Intrinsic matches are combinations of ID matchers, and argument
1785/// matchers. Higher arity matcher are defined recursively in terms of and-ing
1786/// them with lower arity matchers. Here's some convenient typedefs for up to
1787/// several arguments, and more can be added as needed
1788template <typename T0 = void, typename T1 = void, typename T2 = void,
1789 typename T3 = void, typename T4 = void, typename T5 = void,
1790 typename T6 = void, typename T7 = void, typename T8 = void,
1791 typename T9 = void, typename T10 = void>
1792struct m_Intrinsic_Ty;
1793template <typename T0> struct m_Intrinsic_Ty<T0> {
1794 using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
1795};
1796template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
1797 using Ty =
1798 match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
1799};
1800template <typename T0, typename T1, typename T2>
1801struct m_Intrinsic_Ty<T0, T1, T2> {
1802 using Ty =
1803 match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
1804 Argument_match<T2>>;
1805};
1806template <typename T0, typename T1, typename T2, typename T3>
1807struct m_Intrinsic_Ty<T0, T1, T2, T3> {
1808 using Ty =
1809 match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
1810 Argument_match<T3>>;
1811};
1812
1813template <typename T0, typename T1, typename T2, typename T3, typename T4>
1814struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
1815 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
1816 Argument_match<T4>>;
1817};
1818
1819/// Match intrinsic calls like this:
1820/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
1821template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
1822 return IntrinsicID_match(IntrID);
1823}
1824
1825template <Intrinsic::ID IntrID, typename T0>
1826inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
1827 return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
1828}
1829
1830template <Intrinsic::ID IntrID, typename T0, typename T1>
1831inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
1832 const T1 &Op1) {
1833 return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
1834}
1835
1836template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
1837inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
1838m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
1839 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
1840}
1841
1842template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
1843 typename T3>
1844inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
1845m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
1846 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
1847}
1848
1849template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
1850 typename T3, typename T4>
1851inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
1852m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
1853 const T4 &Op4) {
1854 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
1855 m_Argument<4>(Op4));
1856}
1857
1858// Helper intrinsic matching specializations.
1859template <typename Opnd0>
1860inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
1861 return m_Intrinsic<Intrinsic::bitreverse>(Op0);
1862}
1863
1864template <typename Opnd0>
1865inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
1866 return m_Intrinsic<Intrinsic::bswap>(Op0);
1867}
1868
1869template <typename Opnd0>
1870inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
1871 return m_Intrinsic<Intrinsic::fabs>(Op0);
1872}
1873
1874template <typename Opnd0>
1875inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
1876 return m_Intrinsic<Intrinsic::canonicalize>(Op0);
1877}
1878
1879template <typename Opnd0, typename Opnd1>
1880inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
1881 const Opnd1 &Op1) {
1882 return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
1883}
1884
1885template <typename Opnd0, typename Opnd1>
1886inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
1887 const Opnd1 &Op1) {
1888 return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
1889}
1890
1891//===----------------------------------------------------------------------===//
1892// Matchers for two-operands operators with the operators in either order
1893//
1894
1895/// Matches a BinaryOperator with LHS and RHS in either order.
1896template <typename LHS, typename RHS>
1897inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
1898 return AnyBinaryOp_match<LHS, RHS, true>(L, R);
1899}
1900
1901/// Matches an ICmp with a predicate over LHS and RHS in either order.
1902/// Swaps the predicate if operands are commuted.
1903template <typename LHS, typename RHS>
1904inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
1905m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1906 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
1907 R);
1908}
1909
1910/// Matches a Add with LHS and RHS in either order.
1911template <typename LHS, typename RHS>
1912inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
1913 const RHS &R) {
1914 return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
1915}
1916
1917/// Matches a Mul with LHS and RHS in either order.
1918template <typename LHS, typename RHS>
1919inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
1920 const RHS &R) {
1921 return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
1922}
1923
1924/// Matches an And with LHS and RHS in either order.
1925template <typename LHS, typename RHS>
1926inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
1927 const RHS &R) {
1928 return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
1929}
1930
1931/// Matches an Or with LHS and RHS in either order.
1932template <typename LHS, typename RHS>
1933inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
1934 const RHS &R) {
1935 return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
1936}
1937
1938/// Matches an Xor with LHS and RHS in either order.
1939template <typename LHS, typename RHS>
1940inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
1941 const RHS &R) {
1942 return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
1943}
1944
1945/// Matches a 'Neg' as 'sub 0, V'.
1946template <typename ValTy>
1947inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
1948m_Neg(const ValTy &V) {
1949 return m_Sub(m_ZeroInt(), V);
1950}
1951
1952/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
1953template <typename ValTy>
1954inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
1955m_Not(const ValTy &V) {
1956 return m_c_Xor(V, m_AllOnes());
1957}
1958
1959/// Matches an SMin with LHS and RHS in either order.
1960template <typename LHS, typename RHS>
1961inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
1962m_c_SMin(const LHS &L, const RHS &R) {
1963 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
1964}
1965/// Matches an SMax with LHS and RHS in either order.
1966template <typename LHS, typename RHS>
1967inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
1968m_c_SMax(const LHS &L, const RHS &R) {
1969 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
1970}
1971/// Matches a UMin with LHS and RHS in either order.
1972template <typename LHS, typename RHS>
1973inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
1974m_c_UMin(const LHS &L, const RHS &R) {
1975 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
1976}
1977/// Matches a UMax with LHS and RHS in either order.
1978template <typename LHS, typename RHS>
1979inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
1980m_c_UMax(const LHS &L, const RHS &R) {
1981 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
1982}
1983
1984/// Matches FAdd with LHS and RHS in either order.
1985template <typename LHS, typename RHS>
1986inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
1987m_c_FAdd(const LHS &L, const RHS &R) {
1988 return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
1989}
1990
1991/// Matches FMul with LHS and RHS in either order.
1992template <typename LHS, typename RHS>
1993inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
1994m_c_FMul(const LHS &L, const RHS &R) {
1995 return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
1996}
1997
1998template <typename Opnd_t> struct Signum_match {
1999 Opnd_t Val;
2000 Signum_match(const Opnd_t &V) : Val(V) {}
2001
2002 template <typename OpTy> bool match(OpTy *V) {
2003 unsigned TypeSize = V->getType()->getScalarSizeInBits();
2004 if (TypeSize == 0)
2005 return false;
2006
2007 unsigned ShiftWidth = TypeSize - 1;
2008 Value *OpL = nullptr, *OpR = nullptr;
2009
2010 // This is the representation of signum we match:
2011 //
2012 // signum(x) == (x >> 63) | (-x >>u 63)
2013 //
2014 // An i1 value is its own signum, so it's correct to match
2015 //
2016 // signum(x) == (x >> 0) | (-x >>u 0)
2017 //
2018 // for i1 values.
2019
2020 auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
2021 auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
2022 auto Signum = m_Or(LHS, RHS);
2023
2024 return Signum.match(V) && OpL == OpR && Val.match(OpL);
2025 }
2026};
2027
2028/// Matches a signum pattern.
2029///
2030/// signum(x) =
2031/// x > 0 -> 1
2032/// x == 0 -> 0
2033/// x < 0 -> -1
2034template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
2035 return Signum_match<Val_t>(V);
2036}
2037
2038template <int Ind, typename Opnd_t> struct ExtractValue_match {
2039 Opnd_t Val;
2040 ExtractValue_match(const Opnd_t &V) : Val(V) {}
2041
2042 template <typename OpTy> bool match(OpTy *V) {
2043 if (auto *I = dyn_cast<ExtractValueInst>(V))
2044 return I->getNumIndices() == 1 && I->getIndices()[0] == Ind &&
2045 Val.match(I->getAggregateOperand());
2046 return false;
2047 }
2048};
2049
2050/// Match a single index ExtractValue instruction.
2051/// For example m_ExtractValue<1>(...)
2052template <int Ind, typename Val_t>
2053inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2054 return ExtractValue_match<Ind, Val_t>(V);
2055}
2056
2057/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
2058/// the constant expression
2059/// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
2060/// under the right conditions determined by DataLayout.
2061struct VScaleVal_match {
2062private:
2063 template <typename Base, typename Offset>
2064 inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr>
2065 m_OffsetGep(const Base &B, const Offset &O) {
2066 return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O);
2067 }
2068
2069public:
2070 const DataLayout &DL;
2071 VScaleVal_match(const DataLayout &DL) : DL(DL) {}
2072
2073 template <typename ITy> bool match(ITy *V) {
2074 if (m_Intrinsic<Intrinsic::vscale>().match(V))
2075 return true;
2076
2077 if (m_PtrToInt(m_OffsetGep(m_Zero(), m_SpecificInt(1))).match(V)) {
2078 Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType();
2079 Type *DerefTy = PtrTy->getPointerElementType();
2080 if (DerefTy->isVectorTy() && DerefTy->getVectorIsScalable() &&
2081 DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8)
2082 return true;
2083 }
2084
2085 return false;
2086 }
2087};
2088
2089inline VScaleVal_match m_VScale(const DataLayout &DL) {
2090 return VScaleVal_match(DL);
2091}
2092
2093} // end namespace PatternMatch
2094} // end namespace llvm
2095
2096#endif // LLVM_IR_PATTERNMATCH_H