Bug Summary

File:llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Warning:line 220, column 36
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InstCombineVectorOps.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/InstCombine -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/InstCombine -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/InstCombine -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallBitVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/VectorUtils.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/Operator.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
39#include "llvm/Transforms/InstCombine/InstCombiner.h"
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>
44
45using namespace llvm;
46using namespace PatternMatch;
47
48#define DEBUG_TYPE"instcombine" "instcombine"
49
50STATISTIC(NumAggregateReconstructionsSimplified,static llvm::Statistic NumAggregateReconstructionsSimplified =
{"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
"original aggregate"}
51 "Number of aggregate reconstructions turned into reuse of the "static llvm::Statistic NumAggregateReconstructionsSimplified =
{"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
"original aggregate"}
52 "original aggregate")static llvm::Statistic NumAggregateReconstructionsSimplified =
{"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the "
"original aggregate"}
;
53
54/// Return true if the value is cheaper to scalarize than it is to leave as a
55/// vector operation. If the extract index \p EI is a constant integer then
56/// some operations may be cheap to scalarize.
57///
58/// FIXME: It's possible to create more instructions than previously existed.
59static bool cheapToScalarize(Value *V, Value *EI) {
60 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
61
62 // If we can pick a scalar constant value out of a vector, that is free.
63 if (auto *C = dyn_cast<Constant>(V))
64 return CEI || C->getSplatValue();
65
66 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
67 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
68 // Index needs to be lower than the minimum size of the vector, because
69 // for scalable vector, the vector size is known at run time.
70 return CEI->getValue().ult(EC.getKnownMinValue());
71 }
72
73 // An insertelement to the same constant index as our extract will simplify
74 // to the scalar inserted element. An insertelement to a different constant
75 // index is irrelevant to our extract.
76 if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
77 return CEI;
78
79 if (match(V, m_OneUse(m_Load(m_Value()))))
80 return true;
81
82 if (match(V, m_OneUse(m_UnOp())))
83 return true;
84
85 Value *V0, *V1;
86 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
87 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
88 return true;
89
90 CmpInst::Predicate UnusedPred;
91 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
92 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
93 return true;
94
95 return false;
96}
97
98// If we have a PHI node with a vector type that is only used to feed
99// itself and be an operand of extractelement at a constant location,
100// try to replace the PHI of the vector type with a PHI of a scalar type.
101Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
102 PHINode *PN) {
103 SmallVector<Instruction *, 2> Extracts;
104 // The users we want the PHI to have are:
105 // 1) The EI ExtractElement (we already know this)
106 // 2) Possibly more ExtractElements with the same index.
107 // 3) Another operand, which will feed back into the PHI.
108 Instruction *PHIUser = nullptr;
109 for (auto U : PN->users()) {
110 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
111 if (EI.getIndexOperand() == EU->getIndexOperand())
112 Extracts.push_back(EU);
113 else
114 return nullptr;
115 } else if (!PHIUser) {
116 PHIUser = cast<Instruction>(U);
117 } else {
118 return nullptr;
119 }
120 }
121
122 if (!PHIUser)
123 return nullptr;
124
125 // Verify that this PHI user has one use, which is the PHI itself,
126 // and that it is a binary operation which is cheap to scalarize.
127 // otherwise return nullptr.
128 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
129 !(isa<BinaryOperator>(PHIUser)) ||
130 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
131 return nullptr;
132
133 // Create a scalar PHI node that will replace the vector PHI node
134 // just before the current PHI node.
135 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
137 // Scalarize each PHI operand.
138 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
139 Value *PHIInVal = PN->getIncomingValue(i);
140 BasicBlock *inBB = PN->getIncomingBlock(i);
141 Value *Elt = EI.getIndexOperand();
142 // If the operand is the PHI induction variable:
143 if (PHIInVal == PHIUser) {
144 // Scalarize the binary operation. Its first operand is the
145 // scalar PHI, and the second operand is extracted from the other
146 // vector operand.
147 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
148 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
149 Value *Op = InsertNewInstWith(
150 ExtractElementInst::Create(B0->getOperand(opId), Elt,
151 B0->getOperand(opId)->getName() + ".Elt"),
152 *B0);
153 Value *newPHIUser = InsertNewInstWith(
154 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
155 scalarPHI, Op, B0), *B0);
156 scalarPHI->addIncoming(newPHIUser, inBB);
157 } else {
158 // Scalarize PHI input:
159 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
160 // Insert the new instruction into the predecessor basic block.
161 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
162 BasicBlock::iterator InsertPos;
163 if (pos && !isa<PHINode>(pos)) {
164 InsertPos = ++pos->getIterator();
165 } else {
166 InsertPos = inBB->getFirstInsertionPt();
167 }
168
169 InsertNewInstWith(newEI, *InsertPos);
170
171 scalarPHI->addIncoming(newEI, inBB);
172 }
173 }
174
175 for (auto E : Extracts)
176 replaceInstUsesWith(*E, scalarPHI);
177
178 return &EI;
179}
180
181static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
182 InstCombiner::BuilderTy &Builder,
183 bool IsBigEndian) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
9
Taking false branch
187 !X->getType()->isVectorTy() ||
188 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
189 return nullptr;
190
191 // If this extractelement is using a bitcast from a vector of the same number
192 // of elements, see if we can find the source element from the source vector:
193 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
194 auto *SrcTy = cast<VectorType>(X->getType());
10
The object is a 'VectorType'
195 Type *DestTy = Ext.getType();
196 ElementCount NumSrcElts = SrcTy->getElementCount();
11
Calling 'VectorType::getElementCount'
17
Returning from 'VectorType::getElementCount'
197 ElementCount NumElts =
198 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
18
The object is a 'VectorType'
199 if (NumSrcElts == NumElts)
19
Calling 'UnivariateLinearPolyBase::operator=='
22
Returning from 'UnivariateLinearPolyBase::operator=='
23
Taking false branch
200 if (Value *Elt = findScalarElement(X, ExtIndexC))
201 return new BitCastInst(Elt, DestTy);
202
203 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&(static_cast<void> (0))
204 "Src and Dst must be the same sort of vector type")(static_cast<void> (0));
205
206 // If the source elements are wider than the destination, try to shift and
207 // truncate a subset of scalar bits of an insert op.
208 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
24
Assuming the condition is true
25
Taking true branch
209 Value *Scalar;
210 uint64_t InsIndexC;
211 if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
26
Calling 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'
34
Returning from 'match<llvm::Value, llvm::PatternMatch::ThreeOps_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::bind_ty<llvm::Value>, llvm::PatternMatch::bind_const_intval_ty, 62>>'
35
Taking false branch
212 m_ConstantInt(InsIndexC))))
213 return nullptr;
214
215 // The extract must be from the subset of vector elements that we inserted
216 // into. Example: if we inserted element 1 of a <2 x i64> and we are
217 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
218 // of elements 4-7 of the bitcasted vector.
219 unsigned NarrowingRatio =
220 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
36
Calling 'LinearPolySize::getKnownMinValue'
41
Returning from 'LinearPolySize::getKnownMinValue'
42
Division by zero
221 if (ExtIndexC / NarrowingRatio != InsIndexC)
222 return nullptr;
223
224 // We are extracting part of the original scalar. How that scalar is
225 // inserted into the vector depends on the endian-ness. Example:
226 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
227 // +--+--+--+--+--+--+--+--+
228 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
229 // extelt <4 x i16> V', 3: | |S2|S3|
230 // +--+--+--+--+--+--+--+--+
231 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
232 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
233 // In this example, we must right-shift little-endian. Big-endian is just a
234 // truncate.
235 unsigned Chunk = ExtIndexC % NarrowingRatio;
236 if (IsBigEndian)
237 Chunk = NarrowingRatio - 1 - Chunk;
238
239 // Bail out if this is an FP vector to FP vector sequence. That would take
240 // more instructions than we started with unless there is no shift, and it
241 // may not be handled as well in the backend.
242 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
243 bool NeedDestBitcast = DestTy->isFloatingPointTy();
244 if (NeedSrcBitcast && NeedDestBitcast)
245 return nullptr;
246
247 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
248 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
249 unsigned ShAmt = Chunk * DestWidth;
250
251 // TODO: This limitation is more strict than necessary. We could sum the
252 // number of new instructions and subtract the number eliminated to know if
253 // we can proceed.
254 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
255 if (NeedSrcBitcast || NeedDestBitcast)
256 return nullptr;
257
258 if (NeedSrcBitcast) {
259 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
260 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
261 }
262
263 if (ShAmt) {
264 // Bail out if we could end with more instructions than we started with.
265 if (!Ext.getVectorOperand()->hasOneUse())
266 return nullptr;
267 Scalar = Builder.CreateLShr(Scalar, ShAmt);
268 }
269
270 if (NeedDestBitcast) {
271 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
272 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
273 }
274 return new TruncInst(Scalar, DestTy);
275 }
276
277 return nullptr;
278}
279
280/// Find elements of V demanded by UserInstr.
281static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
282 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
283
284 // Conservatively assume that all elements are needed.
285 APInt UsedElts(APInt::getAllOnesValue(VWidth));
286
287 switch (UserInstr->getOpcode()) {
288 case Instruction::ExtractElement: {
289 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
290 assert(EEI->getVectorOperand() == V)(static_cast<void> (0));
291 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
292 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
293 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
294 }
295 break;
296 }
297 case Instruction::ShuffleVector: {
298 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
299 unsigned MaskNumElts =
300 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
301
302 UsedElts = APInt(VWidth, 0);
303 for (unsigned i = 0; i < MaskNumElts; i++) {
304 unsigned MaskVal = Shuffle->getMaskValue(i);
305 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
306 continue;
307 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
308 UsedElts.setBit(MaskVal);
309 if (Shuffle->getOperand(1) == V &&
310 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
311 UsedElts.setBit(MaskVal - VWidth);
312 }
313 break;
314 }
315 default:
316 break;
317 }
318 return UsedElts;
319}
320
321/// Find union of elements of V demanded by all its users.
322/// If it is known by querying findDemandedEltsBySingleUser that
323/// no user demands an element of V, then the corresponding bit
324/// remains unset in the returned value.
325static APInt findDemandedEltsByAllUsers(Value *V) {
326 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
327
328 APInt UnionUsedElts(VWidth, 0);
329 for (const Use &U : V->uses()) {
330 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
331 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
332 } else {
333 UnionUsedElts = APInt::getAllOnesValue(VWidth);
334 break;
335 }
336
337 if (UnionUsedElts.isAllOnesValue())
338 break;
339 }
340
341 return UnionUsedElts;
342}
343
344Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
345 Value *SrcVec = EI.getVectorOperand();
346 Value *Index = EI.getIndexOperand();
347 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
1
Assuming 'V' is null
2
Taking false branch
348 SQ.getWithInstruction(&EI)))
349 return replaceInstUsesWith(EI, V);
350
351 // If extracting a specified index from the vector, see if we can recursively
352 // find a previously computed scalar that was inserted into the vector.
353 auto *IndexC = dyn_cast<ConstantInt>(Index);
3
Assuming 'Index' is a 'ConstantInt'
354 if (IndexC
3.1
'IndexC' is non-null
3.1
'IndexC' is non-null
3.1
'IndexC' is non-null
3.1
'IndexC' is non-null
) {
4
Taking true branch
355 ElementCount EC = EI.getVectorOperandType()->getElementCount();
356 unsigned NumElts = EC.getKnownMinValue();
357
358 if (IntrinsicInst *II
5.1
'II' is null
5.1
'II' is null
5.1
'II' is null
5.1
'II' is null
= dyn_cast<IntrinsicInst>(SrcVec)) {
5
Assuming 'SrcVec' is not a 'IntrinsicInst'
359 Intrinsic::ID IID = II->getIntrinsicID();
360 // Index needs to be lower than the minimum size of the vector, because
361 // for scalable vector, the vector size is known at run time.
362 if (IID == Intrinsic::experimental_stepvector &&
363 IndexC->getValue().ult(NumElts)) {
364 Type *Ty = EI.getType();
365 unsigned BitWidth = Ty->getIntegerBitWidth();
366 Value *Idx;
367 // Return index when its value does not exceed the allowed limit
368 // for the element type of the vector, otherwise return undefined.
369 if (IndexC->getValue().getActiveBits() <= BitWidth)
370 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
371 else
372 Idx = UndefValue::get(Ty);
373 return replaceInstUsesWith(EI, Idx);
374 }
375 }
376
377 // InstSimplify should handle cases where the index is invalid.
378 // For fixed-length vector, it's invalid to extract out-of-range element.
379 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
380 return nullptr;
381
382 // This instruction only demands the single element from the input vector.
383 // Skip for scalable type, the number of elements is unknown at
384 // compile-time.
385 if (!EC.isScalable() && NumElts != 1) {
6
Assuming 'NumElts' is equal to 1
7
Taking false branch
386 // If the input vector has a single use, simplify it based on this use
387 // property.
388 if (SrcVec->hasOneUse()) {
389 APInt UndefElts(NumElts, 0);
390 APInt DemandedElts(NumElts, 0);
391 DemandedElts.setBit(IndexC->getZExtValue());
392 if (Value *V =
393 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
394 return replaceOperand(EI, 0, V);
395 } else {
396 // If the input vector has multiple uses, simplify it based on a union
397 // of all elements used.
398 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
399 if (!DemandedElts.isAllOnesValue()) {
400 APInt UndefElts(NumElts, 0);
401 if (Value *V = SimplifyDemandedVectorElts(
402 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
403 true /* AllowMultipleUsers */)) {
404 if (V != SrcVec) {
405 SrcVec->replaceAllUsesWith(V);
406 return &EI;
407 }
408 }
409 }
410 }
411 }
412
413 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
8
Calling 'foldBitcastExtElt'
414 return I;
415
416 // If there's a vector PHI feeding a scalar use through this extractelement
417 // instruction, try to scalarize the PHI.
418 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
419 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
420 return ScalarPHI;
421 }
422
423 // TODO come up with a n-ary matcher that subsumes both unary and
424 // binary matchers.
425 UnaryOperator *UO;
426 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
427 // extelt (unop X), Index --> unop (extelt X, Index)
428 Value *X = UO->getOperand(0);
429 Value *E = Builder.CreateExtractElement(X, Index);
430 return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
431 }
432
433 BinaryOperator *BO;
434 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
435 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
436 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
437 Value *E0 = Builder.CreateExtractElement(X, Index);
438 Value *E1 = Builder.CreateExtractElement(Y, Index);
439 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
440 }
441
442 Value *X, *Y;
443 CmpInst::Predicate Pred;
444 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
445 cheapToScalarize(SrcVec, Index)) {
446 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
447 Value *E0 = Builder.CreateExtractElement(X, Index);
448 Value *E1 = Builder.CreateExtractElement(Y, Index);
449 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
450 }
451
452 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
453 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
454 // Extracting the inserted element?
455 if (IE->getOperand(2) == Index)
456 return replaceInstUsesWith(EI, IE->getOperand(1));
457 // If the inserted and extracted elements are constants, they must not
458 // be the same value, extract from the pre-inserted value instead.
459 if (isa<Constant>(IE->getOperand(2)) && IndexC)
460 return replaceOperand(EI, 0, IE->getOperand(0));
461 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
462 auto *VecType = cast<VectorType>(GEP->getType());
463 ElementCount EC = VecType->getElementCount();
464 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
465 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
466 // Find out why we have a vector result - these are a few examples:
467 // 1. We have a scalar pointer and a vector of indices, or
468 // 2. We have a vector of pointers and a scalar index, or
469 // 3. We have a vector of pointers and a vector of indices, etc.
470 // Here we only consider combining when there is exactly one vector
471 // operand, since the optimization is less obviously a win due to
472 // needing more than one extractelements.
473
474 unsigned VectorOps =
475 llvm::count_if(GEP->operands(), [](const Value *V) {
476 return isa<VectorType>(V->getType());
477 });
478 if (VectorOps > 1)
479 return nullptr;
480 assert(VectorOps == 1 && "Expected exactly one vector GEP operand!")(static_cast<void> (0));
481
482 Value *NewPtr = GEP->getPointerOperand();
483 if (isa<VectorType>(NewPtr->getType()))
484 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
485
486 SmallVector<Value *> NewOps;
487 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
488 Value *Op = GEP->getOperand(I);
489 if (isa<VectorType>(Op->getType()))
490 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
491 else
492 NewOps.push_back(Op);
493 }
494
495 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
496 cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
497 NewOps);
498 NewGEP->setIsInBounds(GEP->isInBounds());
499 return NewGEP;
500 }
501 return nullptr;
502 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
503 // If this is extracting an element from a shufflevector, figure out where
504 // it came from and extract from the appropriate input element instead.
505 // Restrict the following transformation to fixed-length vector.
506 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
507 int SrcIdx =
508 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
509 Value *Src;
510 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
511 ->getNumElements();
512
513 if (SrcIdx < 0)
514 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
515 if (SrcIdx < (int)LHSWidth)
516 Src = SVI->getOperand(0);
517 else {
518 SrcIdx -= LHSWidth;
519 Src = SVI->getOperand(1);
520 }
521 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
522 return ExtractElementInst::Create(
523 Src, ConstantInt::get(Int32Ty, SrcIdx, false));
524 }
525 } else if (auto *CI = dyn_cast<CastInst>(I)) {
526 // Canonicalize extractelement(cast) -> cast(extractelement).
527 // Bitcasts can change the number of vector elements, and they cost
528 // nothing.
529 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
530 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
531 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
532 }
533 }
534 }
535 return nullptr;
536}
537
538/// If V is a shuffle of values that ONLY returns elements from either LHS or
539/// RHS, return the shuffle mask and true. Otherwise, return false.
540static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
541 SmallVectorImpl<int> &Mask) {
542 assert(LHS->getType() == RHS->getType() &&(static_cast<void> (0))
543 "Invalid CollectSingleShuffleElements")(static_cast<void> (0));
544 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
545
546 if (match(V, m_Undef())) {
547 Mask.assign(NumElts, -1);
548 return true;
549 }
550
551 if (V == LHS) {
552 for (unsigned i = 0; i != NumElts; ++i)
553 Mask.push_back(i);
554 return true;
555 }
556
557 if (V == RHS) {
558 for (unsigned i = 0; i != NumElts; ++i)
559 Mask.push_back(i + NumElts);
560 return true;
561 }
562
563 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
564 // If this is an insert of an extract from some other vector, include it.
565 Value *VecOp = IEI->getOperand(0);
566 Value *ScalarOp = IEI->getOperand(1);
567 Value *IdxOp = IEI->getOperand(2);
568
569 if (!isa<ConstantInt>(IdxOp))
570 return false;
571 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
572
573 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
574 // We can handle this if the vector we are inserting into is
575 // transitively ok.
576 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
577 // If so, update the mask to reflect the inserted undef.
578 Mask[InsertedIdx] = -1;
579 return true;
580 }
581 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
582 if (isa<ConstantInt>(EI->getOperand(1))) {
583 unsigned ExtractedIdx =
584 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
585 unsigned NumLHSElts =
586 cast<FixedVectorType>(LHS->getType())->getNumElements();
587
588 // This must be extracting from either LHS or RHS.
589 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
590 // We can handle this if the vector we are inserting into is
591 // transitively ok.
592 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
593 // If so, update the mask to reflect the inserted value.
594 if (EI->getOperand(0) == LHS) {
595 Mask[InsertedIdx % NumElts] = ExtractedIdx;
596 } else {
597 assert(EI->getOperand(0) == RHS)(static_cast<void> (0));
598 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
599 }
600 return true;
601 }
602 }
603 }
604 }
605 }
606
607 return false;
608}
609
610/// If we have insertion into a vector that is wider than the vector that we
611/// are extracting from, try to widen the source vector to allow a single
612/// shufflevector to replace one or more insert/extract pairs.
613static void replaceExtractElements(InsertElementInst *InsElt,
614 ExtractElementInst *ExtElt,
615 InstCombinerImpl &IC) {
616 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
617 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
618 unsigned NumInsElts = InsVecType->getNumElements();
619 unsigned NumExtElts = ExtVecType->getNumElements();
620
621 // The inserted-to vector must be wider than the extracted-from vector.
622 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
623 NumExtElts >= NumInsElts)
624 return;
625
626 // Create a shuffle mask to widen the extended-from vector using poison
627 // values. The mask selects all of the values of the original vector followed
628 // by as many poison values as needed to create a vector of the same length
629 // as the inserted-to vector.
630 SmallVector<int, 16> ExtendMask;
631 for (unsigned i = 0; i < NumExtElts; ++i)
632 ExtendMask.push_back(i);
633 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
634 ExtendMask.push_back(-1);
635
636 Value *ExtVecOp = ExtElt->getVectorOperand();
637 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
638 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
639 ? ExtVecOpInst->getParent()
640 : ExtElt->getParent();
641
642 // TODO: This restriction matches the basic block check below when creating
643 // new extractelement instructions. If that limitation is removed, this one
644 // could also be removed. But for now, we just bail out to ensure that we
645 // will replace the extractelement instruction that is feeding our
646 // insertelement instruction. This allows the insertelement to then be
647 // replaced by a shufflevector. If the insertelement is not replaced, we can
648 // induce infinite looping because there's an optimization for extractelement
649 // that will delete our widening shuffle. This would trigger another attempt
650 // here to create that shuffle, and we spin forever.
651 if (InsertionBlock != InsElt->getParent())
652 return;
653
654 // TODO: This restriction matches the check in visitInsertElementInst() and
655 // prevents an infinite loop caused by not turning the extract/insert pair
656 // into a shuffle. We really should not need either check, but we're lacking
657 // folds for shufflevectors because we're afraid to generate shuffle masks
658 // that the backend can't handle.
659 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
660 return;
661
662 auto *WideVec =
663 new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
664
665 // Insert the new shuffle after the vector operand of the extract is defined
666 // (as long as it's not a PHI) or at the start of the basic block of the
667 // extract, so any subsequent extracts in the same basic block can use it.
668 // TODO: Insert before the earliest ExtractElementInst that is replaced.
669 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
670 WideVec->insertAfter(ExtVecOpInst);
671 else
672 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
673
674 // Replace extracts from the original narrow vector with extracts from the new
675 // wide vector.
676 for (User *U : ExtVecOp->users()) {
677 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
678 if (!OldExt || OldExt->getParent() != WideVec->getParent())
679 continue;
680 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
681 NewExt->insertAfter(OldExt);
682 IC.replaceInstUsesWith(*OldExt, NewExt);
683 }
684}
685
686/// We are building a shuffle to create V, which is a sequence of insertelement,
687/// extractelement pairs. If PermittedRHS is set, then we must either use it or
688/// not rely on the second vector source. Return a std::pair containing the
689/// left and right vectors of the proposed shuffle (or 0), and set the Mask
690/// parameter as required.
691///
692/// Note: we intentionally don't try to fold earlier shuffles since they have
693/// often been chosen carefully to be efficiently implementable on the target.
694using ShuffleOps = std::pair<Value *, Value *>;
695
696static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
697 Value *PermittedRHS,
698 InstCombinerImpl &IC) {
699 assert(V->getType()->isVectorTy() && "Invalid shuffle!")(static_cast<void> (0));
700 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
701
702 if (match(V, m_Undef())) {
703 Mask.assign(NumElts, -1);
704 return std::make_pair(
705 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
706 }
707
708 if (isa<ConstantAggregateZero>(V)) {
709 Mask.assign(NumElts, 0);
710 return std::make_pair(V, nullptr);
711 }
712
713 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
714 // If this is an insert of an extract from some other vector, include it.
715 Value *VecOp = IEI->getOperand(0);
716 Value *ScalarOp = IEI->getOperand(1);
717 Value *IdxOp = IEI->getOperand(2);
718
719 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
720 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
721 unsigned ExtractedIdx =
722 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
723 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
724
725 // Either the extracted from or inserted into vector must be RHSVec,
726 // otherwise we'd end up with a shuffle of three inputs.
727 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
728 Value *RHS = EI->getOperand(0);
729 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
730 assert(LR.second == nullptr || LR.second == RHS)(static_cast<void> (0));
731
732 if (LR.first->getType() != RHS->getType()) {
733 // Although we are giving up for now, see if we can create extracts
734 // that match the inserts for another round of combining.
735 replaceExtractElements(IEI, EI, IC);
736
737 // We tried our best, but we can't find anything compatible with RHS
738 // further up the chain. Return a trivial shuffle.
739 for (unsigned i = 0; i < NumElts; ++i)
740 Mask[i] = i;
741 return std::make_pair(V, nullptr);
742 }
743
744 unsigned NumLHSElts =
745 cast<FixedVectorType>(RHS->getType())->getNumElements();
746 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
747 return std::make_pair(LR.first, RHS);
748 }
749
750 if (VecOp == PermittedRHS) {
751 // We've gone as far as we can: anything on the other side of the
752 // extractelement will already have been converted into a shuffle.
753 unsigned NumLHSElts =
754 cast<FixedVectorType>(EI->getOperand(0)->getType())
755 ->getNumElements();
756 for (unsigned i = 0; i != NumElts; ++i)
757 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
758 return std::make_pair(EI->getOperand(0), PermittedRHS);
759 }
760
761 // If this insertelement is a chain that comes from exactly these two
762 // vectors, return the vector and the effective shuffle.
763 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
764 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
765 Mask))
766 return std::make_pair(EI->getOperand(0), PermittedRHS);
767 }
768 }
769 }
770
771 // Otherwise, we can't do anything fancy. Return an identity vector.
772 for (unsigned i = 0; i != NumElts; ++i)
773 Mask.push_back(i);
774 return std::make_pair(V, nullptr);
775}
776
777/// Look for chain of insertvalue's that fully define an aggregate, and trace
778/// back the values inserted, see if they are all were extractvalue'd from
779/// the same source aggregate from the exact same element indexes.
780/// If they were, just reuse the source aggregate.
781/// This potentially deals with PHI indirections.
782Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
783 InsertValueInst &OrigIVI) {
784 Type *AggTy = OrigIVI.getType();
785 unsigned NumAggElts;
786 switch (AggTy->getTypeID()) {
787 case Type::StructTyID:
788 NumAggElts = AggTy->getStructNumElements();
789 break;
790 case Type::ArrayTyID:
791 NumAggElts = AggTy->getArrayNumElements();
792 break;
793 default:
794 llvm_unreachable("Unhandled aggregate type?")__builtin_unreachable();
795 }
796
797 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
798 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
799 // FIXME: any interesting patterns to be caught with larger limit?
800 assert(NumAggElts > 0 && "Aggregate should have elements.")(static_cast<void> (0));
801 if (NumAggElts > 2)
802 return nullptr;
803
804 static constexpr auto NotFound = None;
805 static constexpr auto FoundMismatch = nullptr;
806
807 // Try to find a value of each element of an aggregate.
808 // FIXME: deal with more complex, not one-dimensional, aggregate types
809 SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
810
811 // Do we know values for each element of the aggregate?
812 auto KnowAllElts = [&AggElts]() {
813 return all_of(AggElts,
814 [](Optional<Instruction *> Elt) { return Elt != NotFound; });
815 };
816
817 int Depth = 0;
818
819 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
820 // every element being overwritten twice, which should never happen.
821 static const int DepthLimit = 2 * NumAggElts;
822
823 // Recurse up the chain of `insertvalue` aggregate operands until either we've
824 // reconstructed full initializer or can't visit any more `insertvalue`'s.
825 for (InsertValueInst *CurrIVI = &OrigIVI;
826 Depth < DepthLimit && CurrIVI && !KnowAllElts();
827 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
828 ++Depth) {
829 auto *InsertedValue =
830 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
831 if (!InsertedValue)
832 return nullptr; // Inserted value must be produced by an instruction.
833
834 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
835
836 // Don't bother with more than single-level aggregates.
837 if (Indices.size() != 1)
838 return nullptr; // FIXME: deal with more complex aggregates?
839
840 // Now, we may have already previously recorded the value for this element
841 // of an aggregate. If we did, that means the CurrIVI will later be
842 // overwritten with the already-recorded value. But if not, let's record it!
843 Optional<Instruction *> &Elt = AggElts[Indices.front()];
844 Elt = Elt.getValueOr(InsertedValue);
845
846 // FIXME: should we handle chain-terminating undef base operand?
847 }
848
849 // Was that sufficient to deduce the full initializer for the aggregate?
850 if (!KnowAllElts())
851 return nullptr; // Give up then.
852
853 // We now want to find the source[s] of the aggregate elements we've found.
854 // And with "source" we mean the original aggregate[s] from which
855 // the inserted elements were extracted. This may require PHI translation.
856
857 enum class AggregateDescription {
858 /// When analyzing the value that was inserted into an aggregate, we did
859 /// not manage to find defining `extractvalue` instruction to analyze.
860 NotFound,
861 /// When analyzing the value that was inserted into an aggregate, we did
862 /// manage to find defining `extractvalue` instruction[s], and everything
863 /// matched perfectly - aggregate type, element insertion/extraction index.
864 Found,
865 /// When analyzing the value that was inserted into an aggregate, we did
866 /// manage to find defining `extractvalue` instruction, but there was
867 /// a mismatch: either the source type from which the extraction was didn't
868 /// match the aggregate type into which the insertion was,
869 /// or the extraction/insertion channels mismatched,
870 /// or different elements had different source aggregates.
871 FoundMismatch
872 };
873 auto Describe = [](Optional<Value *> SourceAggregate) {
874 if (SourceAggregate == NotFound)
875 return AggregateDescription::NotFound;
876 if (*SourceAggregate == FoundMismatch)
877 return AggregateDescription::FoundMismatch;
878 return AggregateDescription::Found;
879 };
880
881 // Given the value \p Elt that was being inserted into element \p EltIdx of an
882 // aggregate AggTy, see if \p Elt was originally defined by an
883 // appropriate extractvalue (same element index, same aggregate type).
884 // If found, return the source aggregate from which the extraction was.
885 // If \p PredBB is provided, does PHI translation of an \p Elt first.
886 auto FindSourceAggregate =
887 [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
888 Optional<BasicBlock *> PredBB) -> Optional<Value *> {
889 // For now(?), only deal with, at most, a single level of PHI indirection.
890 if (UseBB && PredBB)
891 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
892 // FIXME: deal with multiple levels of PHI indirection?
893
894 // Did we find an extraction?
895 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
896 if (!EVI)
897 return NotFound;
898
899 Value *SourceAggregate = EVI->getAggregateOperand();
900
901 // Is the extraction from the same type into which the insertion was?
902 if (SourceAggregate->getType() != AggTy)
903 return FoundMismatch;
904 // And the element index doesn't change between extraction and insertion?
905 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
906 return FoundMismatch;
907
908 return SourceAggregate; // AggregateDescription::Found
909 };
910
911 // Given elements AggElts that were constructing an aggregate OrigIVI,
912 // see if we can find appropriate source aggregate for each of the elements,
913 // and see it's the same aggregate for each element. If so, return it.
914 auto FindCommonSourceAggregate =
915 [&](Optional<BasicBlock *> UseBB,
916 Optional<BasicBlock *> PredBB) -> Optional<Value *> {
917 Optional<Value *> SourceAggregate;
918
919 for (auto I : enumerate(AggElts)) {
920 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&(static_cast<void> (0))
921 "We don't store nullptr in SourceAggregate!")(static_cast<void> (0));
922 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==(static_cast<void> (0))
923 (I.index() != 0) &&(static_cast<void> (0))
924 "SourceAggregate should be valid after the first element,")(static_cast<void> (0));
925
926 // For this element, is there a plausible source aggregate?
927 // FIXME: we could special-case undef element, IFF we know that in the
928 // source aggregate said element isn't poison.
929 Optional<Value *> SourceAggregateForElement =
930 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
931
932 // Okay, what have we found? Does that correlate with previous findings?
933
934 // Regardless of whether or not we have previously found source
935 // aggregate for previous elements (if any), if we didn't find one for
936 // this element, passthrough whatever we have just found.
937 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
938 return SourceAggregateForElement;
939
940 // Okay, we have found source aggregate for this element.
941 // Let's see what we already know from previous elements, if any.
942 switch (Describe(SourceAggregate)) {
943 case AggregateDescription::NotFound:
944 // This is apparently the first element that we have examined.
945 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
946 continue; // Great, now look at next element.
947 case AggregateDescription::Found:
948 // We have previously already successfully examined other elements.
949 // Is this the same source aggregate we've found for other elements?
950 if (*SourceAggregateForElement != *SourceAggregate)
951 return FoundMismatch;
952 continue; // Still the same aggregate, look at next element.
953 case AggregateDescription::FoundMismatch:
954 llvm_unreachable("Can't happen. We would have early-exited then.")__builtin_unreachable();
955 };
956 }
957
958 assert(Describe(SourceAggregate) == AggregateDescription::Found &&(static_cast<void> (0))
959 "Must be a valid Value")(static_cast<void> (0));
960 return *SourceAggregate;
961 };
962
963 Optional<Value *> SourceAggregate;
964
965 // Can we find the source aggregate without looking at predecessors?
966 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
967 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
968 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
969 return nullptr; // Conflicting source aggregates!
970 ++NumAggregateReconstructionsSimplified;
971 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
972 }
973
974 // Okay, apparently we need to look at predecessors.
975
976 // We should be smart about picking the "use" basic block, which will be the
977 // merge point for aggregate, where we'll insert the final PHI that will be
978 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
979 // We should look in which blocks each of the AggElts is being defined,
980 // they all should be defined in the same basic block.
981 BasicBlock *UseBB = nullptr;
982
983 for (const Optional<Instruction *> &I : AggElts) {
984 BasicBlock *BB = (*I)->getParent();
985 // If it's the first instruction we've encountered, record the basic block.
986 if (!UseBB) {
987 UseBB = BB;
988 continue;
989 }
990 // Otherwise, this must be the same basic block we've seen previously.
991 if (UseBB != BB)
992 return nullptr;
993 }
994
995 // If *all* of the elements are basic-block-independent, meaning they are
996 // either function arguments, or constant expressions, then if we didn't
997 // handle them without predecessor-aware handling, we won't handle them now.
998 if (!UseBB)
999 return nullptr;
1000
1001 // If we didn't manage to find source aggregate without looking at
1002 // predecessors, and there are no predecessors to look at, then we're done.
1003 if (pred_empty(UseBB))
1004 return nullptr;
1005
1006 // Arbitrary predecessor count limit.
1007 static const int PredCountLimit = 64;
1008
1009 // Cache the (non-uniqified!) list of predecessors in a vector,
1010 // checking the limit at the same time for efficiency.
1011 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1012 for (BasicBlock *Pred : predecessors(UseBB)) {
1013 // Don't bother if there are too many predecessors.
1014 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1015 return nullptr;
1016 Preds.emplace_back(Pred);
1017 }
1018
1019 // For each predecessor, what is the source aggregate,
1020 // from which all the elements were originally extracted from?
1021 // Note that we want for the map to have stable iteration order!
1022 SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1023 for (BasicBlock *Pred : Preds) {
1024 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1025 SourceAggregates.insert({Pred, nullptr});
1026 // Did we already evaluate this predecessor?
1027 if (!IV.second)
1028 continue;
1029
1030 // Let's hope that when coming from predecessor Pred, all elements of the
1031 // aggregate produced by OrigIVI must have been originally extracted from
1032 // the same aggregate. Is that so? Can we find said original aggregate?
1033 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1034 if (Describe(SourceAggregate) != AggregateDescription::Found)
1035 return nullptr; // Give up.
1036 IV.first->second = *SourceAggregate;
1037 }
1038
1039 // All good! Now we just need to thread the source aggregates here.
1040 // Note that we have to insert the new PHI here, ourselves, because we can't
1041 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1042 // Note that the same block can be a predecessor more than once,
1043 // and we need to preserve that invariant for the PHI node.
1044 BuilderTy::InsertPointGuard Guard(Builder);
1045 Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1046 auto *PHI =
1047 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1048 for (BasicBlock *Pred : Preds)
1049 PHI->addIncoming(SourceAggregates[Pred], Pred);
1050
1051 ++NumAggregateReconstructionsSimplified;
1052 return replaceInstUsesWith(OrigIVI, PHI);
1053}
1054
1055/// Try to find redundant insertvalue instructions, like the following ones:
1056/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1057/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1058/// Here the second instruction inserts values at the same indices, as the
1059/// first one, making the first one redundant.
1060/// It should be transformed to:
1061/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1062Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
1063 bool IsRedundant = false;
1064 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1065
1066 // If there is a chain of insertvalue instructions (each of them except the
1067 // last one has only one use and it's another insertvalue insn from this
1068 // chain), check if any of the 'children' uses the same indices as the first
1069 // instruction. In this case, the first one is redundant.
1070 Value *V = &I;
1071 unsigned Depth = 0;
1072 while (V->hasOneUse() && Depth < 10) {
1073 User *U = V->user_back();
1074 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1075 if (!UserInsInst || U->getOperand(0) != V)
1076 break;
1077 if (UserInsInst->getIndices() == FirstIndices) {
1078 IsRedundant = true;
1079 break;
1080 }
1081 V = UserInsInst;
1082 Depth++;
1083 }
1084
1085 if (IsRedundant)
1086 return replaceInstUsesWith(I, I.getOperand(0));
1087
1088 if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1089 return NewI;
1090
1091 return nullptr;
1092}
1093
1094static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
1095 // Can not analyze scalable type, the number of elements is not a compile-time
1096 // constant.
1097 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1098 return false;
1099
1100 int MaskSize = Shuf.getShuffleMask().size();
1101 int VecSize =
1102 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1103
1104 // A vector select does not change the size of the operands.
1105 if (MaskSize != VecSize)
1106 return false;
1107
1108 // Each mask element must be undefined or choose a vector element from one of
1109 // the source operands without crossing vector lanes.
1110 for (int i = 0; i != MaskSize; ++i) {
1111 int Elt = Shuf.getMaskValue(i);
1112 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1113 return false;
1114 }
1115
1116 return true;
1117}
1118
1119/// Turn a chain of inserts that splats a value into an insert + shuffle:
1120/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1121/// shufflevector(insertelt(X, %k, 0), poison, zero)
1122static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
1123 // We are interested in the last insert in a chain. So if this insert has a
1124 // single user and that user is an insert, bail.
1125 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1126 return nullptr;
1127
1128 VectorType *VecTy = InsElt.getType();
1129 // Can not handle scalable type, the number of elements is not a compile-time
1130 // constant.
1131 if (isa<ScalableVectorType>(VecTy))
1132 return nullptr;
1133 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1134
1135 // Do not try to do this for a one-element vector, since that's a nop,
1136 // and will cause an inf-loop.
1137 if (NumElements == 1)
1138 return nullptr;
1139
1140 Value *SplatVal = InsElt.getOperand(1);
1141 InsertElementInst *CurrIE = &InsElt;
1142 SmallBitVector ElementPresent(NumElements, false);
1143 InsertElementInst *FirstIE = nullptr;
1144
1145 // Walk the chain backwards, keeping track of which indices we inserted into,
1146 // until we hit something that isn't an insert of the splatted value.
1147 while (CurrIE) {
1148 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1149 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1150 return nullptr;
1151
1152 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1153 // Check none of the intermediate steps have any additional uses, except
1154 // for the root insertelement instruction, which can be re-used, if it
1155 // inserts at position 0.
1156 if (CurrIE != &InsElt &&
1157 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1158 return nullptr;
1159
1160 ElementPresent[Idx->getZExtValue()] = true;
1161 FirstIE = CurrIE;
1162 CurrIE = NextIE;
1163 }
1164
1165 // If this is just a single insertelement (not a sequence), we are done.
1166 if (FirstIE == &InsElt)
1167 return nullptr;
1168
1169 // If we are not inserting into an undef vector, make sure we've seen an
1170 // insert into every element.
1171 // TODO: If the base vector is not undef, it might be better to create a splat
1172 // and then a select-shuffle (blend) with the base vector.
1173 if (!match(FirstIE->getOperand(0), m_Undef()))
1174 if (!ElementPresent.all())
1175 return nullptr;
1176
1177 // Create the insert + shuffle.
1178 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1179 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1180 Constant *Zero = ConstantInt::get(Int32Ty, 0);
1181 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1182 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1183
1184 // Splat from element 0, but replace absent elements with undef in the mask.
1185 SmallVector<int, 16> Mask(NumElements, 0);
1186 for (unsigned i = 0; i != NumElements; ++i)
1187 if (!ElementPresent[i])
1188 Mask[i] = -1;
1189
1190 return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
1191}
1192
1193/// Try to fold an insert element into an existing splat shuffle by changing
1194/// the shuffle's mask to include the index of this insert element.
1195static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
1196 // Check if the vector operand of this insert is a canonical splat shuffle.
1197 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1198 if (!Shuf || !Shuf->isZeroEltSplat())
1199 return nullptr;
1200
1201 // Bail out early if shuffle is scalable type. The number of elements in
1202 // shuffle mask is unknown at compile-time.
1203 if (isa<ScalableVectorType>(Shuf->getType()))
1204 return nullptr;
1205
1206 // Check for a constant insertion index.
1207 uint64_t IdxC;
1208 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1209 return nullptr;
1210
1211 // Check if the splat shuffle's input is the same as this insert's scalar op.
1212 Value *X = InsElt.getOperand(1);
1213 Value *Op0 = Shuf->getOperand(0);
1214 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1215 return nullptr;
1216
1217 // Replace the shuffle mask element at the index of this insert with a zero.
1218 // For example:
1219 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
1220 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
1221 unsigned NumMaskElts =
1222 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1223 SmallVector<int, 16> NewMask(NumMaskElts);
1224 for (unsigned i = 0; i != NumMaskElts; ++i)
1225 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1226
1227 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
1228}
1229
1230/// Try to fold an extract+insert element into an existing identity shuffle by
1231/// changing the shuffle's mask to include the index of this insert element.
1232static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
1233 // Check if the vector operand of this insert is an identity shuffle.
1234 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1235 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1236 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1237 return nullptr;
1238
1239 // Bail out early if shuffle is scalable type. The number of elements in
1240 // shuffle mask is unknown at compile-time.
1241 if (isa<ScalableVectorType>(Shuf->getType()))
1242 return nullptr;
1243
1244 // Check for a constant insertion index.
1245 uint64_t IdxC;
1246 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1247 return nullptr;
1248
1249 // Check if this insert's scalar op is extracted from the identity shuffle's
1250 // input vector.
1251 Value *Scalar = InsElt.getOperand(1);
1252 Value *X = Shuf->getOperand(0);
1253 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1254 return nullptr;
1255
1256 // Replace the shuffle mask element at the index of this extract+insert with
1257 // that same index value.
1258 // For example:
1259 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1260 unsigned NumMaskElts =
1261 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1262 SmallVector<int, 16> NewMask(NumMaskElts);
1263 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1264 for (unsigned i = 0; i != NumMaskElts; ++i) {
1265 if (i != IdxC) {
1266 // All mask elements besides the inserted element remain the same.
1267 NewMask[i] = OldMask[i];
1268 } else if (OldMask[i] == (int)IdxC) {
1269 // If the mask element was already set, there's nothing to do
1270 // (demanded elements analysis may unset it later).
1271 return nullptr;
1272 } else {
1273 assert(OldMask[i] == UndefMaskElem &&(static_cast<void> (0))
1274 "Unexpected shuffle mask element for identity shuffle")(static_cast<void> (0));
1275 NewMask[i] = IdxC;
1276 }
1277 }
1278
1279 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1280}
1281
1282/// If we have an insertelement instruction feeding into another insertelement
1283/// and the 2nd is inserting a constant into the vector, canonicalize that
1284/// constant insertion before the insertion of a variable:
1285///
1286/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1287/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1288///
1289/// This has the potential of eliminating the 2nd insertelement instruction
1290/// via constant folding of the scalar constant into a vector constant.
1291static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
1292 InstCombiner::BuilderTy &Builder) {
1293 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1294 if (!InsElt1 || !InsElt1->hasOneUse())
1295 return nullptr;
1296
1297 Value *X, *Y;
1298 Constant *ScalarC;
1299 ConstantInt *IdxC1, *IdxC2;
1300 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1301 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1302 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1303 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1304 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1305 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1306 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1307 }
1308
1309 return nullptr;
1310}
1311
1312/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1313/// --> shufflevector X, CVec', Mask'
1314static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
1315 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1316 // Bail out if the parent has more than one use. In that case, we'd be
1317 // replacing the insertelt with a shuffle, and that's not a clear win.
1318 if (!Inst || !Inst->hasOneUse())
1319 return nullptr;
1320 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1321 // The shuffle must have a constant vector operand. The insertelt must have
1322 // a constant scalar being inserted at a constant position in the vector.
1323 Constant *ShufConstVec, *InsEltScalar;
1324 uint64_t InsEltIndex;
1325 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1326 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1327 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1328 return nullptr;
1329
1330 // Adding an element to an arbitrary shuffle could be expensive, but a
1331 // shuffle that selects elements from vectors without crossing lanes is
1332 // assumed cheap.
1333 // If we're just adding a constant into that shuffle, it will still be
1334 // cheap.
1335 if (!isShuffleEquivalentToSelect(*Shuf))
1336 return nullptr;
1337
1338 // From the above 'select' check, we know that the mask has the same number
1339 // of elements as the vector input operands. We also know that each constant
1340 // input element is used in its lane and can not be used more than once by
1341 // the shuffle. Therefore, replace the constant in the shuffle's constant
1342 // vector with the insertelt constant. Replace the constant in the shuffle's
1343 // mask vector with the insertelt index plus the length of the vector
1344 // (because the constant vector operand of a shuffle is always the 2nd
1345 // operand).
1346 ArrayRef<int> Mask = Shuf->getShuffleMask();
1347 unsigned NumElts = Mask.size();
1348 SmallVector<Constant *, 16> NewShufElts(NumElts);
1349 SmallVector<int, 16> NewMaskElts(NumElts);
1350 for (unsigned I = 0; I != NumElts; ++I) {
1351 if (I == InsEltIndex) {
1352 NewShufElts[I] = InsEltScalar;
1353 NewMaskElts[I] = InsEltIndex + NumElts;
1354 } else {
1355 // Copy over the existing values.
1356 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1357 NewMaskElts[I] = Mask[I];
1358 }
1359 }
1360
1361 // Create new operands for a shuffle that includes the constant of the
1362 // original insertelt. The old shuffle will be dead now.
1363 return new ShuffleVectorInst(Shuf->getOperand(0),
1364 ConstantVector::get(NewShufElts), NewMaskElts);
1365 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1366 // Transform sequences of insertelements ops with constant data/indexes into
1367 // a single shuffle op.
1368 // Can not handle scalable type, the number of elements needed to create
1369 // shuffle mask is not a compile-time constant.
1370 if (isa<ScalableVectorType>(InsElt.getType()))
1371 return nullptr;
1372 unsigned NumElts =
1373 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1374
1375 uint64_t InsertIdx[2];
1376 Constant *Val[2];
1377 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1378 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1379 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1380 !match(IEI->getOperand(1), m_Constant(Val[1])))
1381 return nullptr;
1382 SmallVector<Constant *, 16> Values(NumElts);
1383 SmallVector<int, 16> Mask(NumElts);
1384 auto ValI = std::begin(Val);
1385 // Generate new constant vector and mask.
1386 // We have 2 values/masks from the insertelements instructions. Insert them
1387 // into new value/mask vectors.
1388 for (uint64_t I : InsertIdx) {
1389 if (!Values[I]) {
1390 Values[I] = *ValI;
1391 Mask[I] = NumElts + I;
1392 }
1393 ++ValI;
1394 }
1395 // Remaining values are filled with 'undef' values.
1396 for (unsigned I = 0; I < NumElts; ++I) {
1397 if (!Values[I]) {
1398 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1399 Mask[I] = I;
1400 }
1401 }
1402 // Create new operands for a shuffle that includes the constant of the
1403 // original insertelt.
1404 return new ShuffleVectorInst(IEI->getOperand(0),
1405 ConstantVector::get(Values), Mask);
1406 }
1407 return nullptr;
1408}
1409
1410Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
1411 Value *VecOp = IE.getOperand(0);
1412 Value *ScalarOp = IE.getOperand(1);
1413 Value *IdxOp = IE.getOperand(2);
1414
1415 if (auto *V = SimplifyInsertElementInst(
1416 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1417 return replaceInstUsesWith(IE, V);
1418
1419 // If the scalar is bitcast and inserted into undef, do the insert in the
1420 // source type followed by bitcast.
1421 // TODO: Generalize for insert into any constant, not just undef?
1422 Value *ScalarSrc;
1423 if (match(VecOp, m_Undef()) &&
1424 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1425 (ScalarSrc->getType()->isIntegerTy() ||
1426 ScalarSrc->getType()->isFloatingPointTy())) {
1427 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1428 // bitcast (inselt undef, ScalarSrc, IdxOp)
1429 Type *ScalarTy = ScalarSrc->getType();
1430 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1431 UndefValue *NewUndef = UndefValue::get(VecTy);
1432 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1433 return new BitCastInst(NewInsElt, IE.getType());
1434 }
1435
1436 // If the vector and scalar are both bitcast from the same element type, do
1437 // the insert in that source type followed by bitcast.
1438 Value *VecSrc;
1439 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1440 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1441 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1442 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1443 cast<VectorType>(VecSrc->getType())->getElementType() ==
1444 ScalarSrc->getType()) {
1445 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1446 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1447 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1448 return new BitCastInst(NewInsElt, IE.getType());
1449 }
1450
1451 // If the inserted element was extracted from some other fixed-length vector
1452 // and both indexes are valid constants, try to turn this into a shuffle.
1453 // Can not handle scalable vector type, the number of elements needed to
1454 // create shuffle mask is not a compile-time constant.
1455 uint64_t InsertedIdx, ExtractedIdx;
1456 Value *ExtVecOp;
1457 if (isa<FixedVectorType>(IE.getType()) &&
1458 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1459 match(ScalarOp,
1460 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1461 isa<FixedVectorType>(ExtVecOp->getType()) &&
1462 ExtractedIdx <
1463 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1464 // TODO: Looking at the user(s) to determine if this insert is a
1465 // fold-to-shuffle opportunity does not match the usual instcombine
1466 // constraints. We should decide if the transform is worthy based only
1467 // on this instruction and its operands, but that may not work currently.
1468 //
1469 // Here, we are trying to avoid creating shuffles before reaching
1470 // the end of a chain of extract-insert pairs. This is complicated because
1471 // we do not generally form arbitrary shuffle masks in instcombine
1472 // (because those may codegen poorly), but collectShuffleElements() does
1473 // exactly that.
1474 //
1475 // The rules for determining what is an acceptable target-independent
1476 // shuffle mask are fuzzy because they evolve based on the backend's
1477 // capabilities and real-world impact.
1478 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1479 if (!Insert.hasOneUse())
1480 return true;
1481 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1482 if (!InsertUser)
1483 return true;
1484 return false;
1485 };
1486
1487 // Try to form a shuffle from a chain of extract-insert ops.
1488 if (isShuffleRootCandidate(IE)) {
1489 SmallVector<int, 16> Mask;
1490 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1491
1492 // The proposed shuffle may be trivial, in which case we shouldn't
1493 // perform the combine.
1494 if (LR.first != &IE && LR.second != &IE) {
1495 // We now have a shuffle of LHS, RHS, Mask.
1496 if (LR.second == nullptr)
1497 LR.second = UndefValue::get(LR.first->getType());
1498 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1499 }
1500 }
1501 }
1502
1503 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1504 unsigned VWidth = VecTy->getNumElements();
1505 APInt UndefElts(VWidth, 0);
1506 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1507 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1508 if (V != &IE)
1509 return replaceInstUsesWith(IE, V);
1510 return &IE;
1511 }
1512 }
1513
1514 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1515 return Shuf;
1516
1517 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1518 return NewInsElt;
1519
1520 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1521 return Broadcast;
1522
1523 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1524 return Splat;
1525
1526 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1527 return IdentityShuf;
1528
1529 return nullptr;
1530}
1531
1532/// Return true if we can evaluate the specified expression tree if the vector
1533/// elements were shuffled in a different order.
1534static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
1535 unsigned Depth = 5) {
1536 // We can always reorder the elements of a constant.
1537 if (isa<Constant>(V))
1538 return true;
1539
1540 // We won't reorder vector arguments. No IPO here.
1541 Instruction *I = dyn_cast<Instruction>(V);
1542 if (!I) return false;
1543
1544 // Two users may expect different orders of the elements. Don't try it.
1545 if (!I->hasOneUse())
1546 return false;
1547
1548 if (Depth == 0) return false;
1549
1550 switch (I->getOpcode()) {
1551 case Instruction::UDiv:
1552 case Instruction::SDiv:
1553 case Instruction::URem:
1554 case Instruction::SRem:
1555 // Propagating an undefined shuffle mask element to integer div/rem is not
1556 // allowed because those opcodes can create immediate undefined behavior
1557 // from an undefined element in an operand.
1558 if (llvm::is_contained(Mask, -1))
1559 return false;
1560 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1561 case Instruction::Add:
1562 case Instruction::FAdd:
1563 case Instruction::Sub:
1564 case Instruction::FSub:
1565 case Instruction::Mul:
1566 case Instruction::FMul:
1567 case Instruction::FDiv:
1568 case Instruction::FRem:
1569 case Instruction::Shl:
1570 case Instruction::LShr:
1571 case Instruction::AShr:
1572 case Instruction::And:
1573 case Instruction::Or:
1574 case Instruction::Xor:
1575 case Instruction::ICmp:
1576 case Instruction::FCmp:
1577 case Instruction::Trunc:
1578 case Instruction::ZExt:
1579 case Instruction::SExt:
1580 case Instruction::FPToUI:
1581 case Instruction::FPToSI:
1582 case Instruction::UIToFP:
1583 case Instruction::SIToFP:
1584 case Instruction::FPTrunc:
1585 case Instruction::FPExt:
1586 case Instruction::GetElementPtr: {
1587 // Bail out if we would create longer vector ops. We could allow creating
1588 // longer vector ops, but that may result in more expensive codegen.
1589 Type *ITy = I->getType();
1590 if (ITy->isVectorTy() &&
1591 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1592 return false;
1593 for (Value *Operand : I->operands()) {
1594 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1595 return false;
1596 }
1597 return true;
1598 }
1599 case Instruction::InsertElement: {
1600 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1601 if (!CI) return false;
1602 int ElementNumber = CI->getLimitedValue();
1603
1604 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1605 // can't put an element into multiple indices.
1606 bool SeenOnce = false;
1607 for (int i = 0, e = Mask.size(); i != e; ++i) {
1608 if (Mask[i] == ElementNumber) {
1609 if (SeenOnce)
1610 return false;
1611 SeenOnce = true;
1612 }
1613 }
1614 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1615 }
1616 }
1617 return false;
1618}
1619
1620/// Rebuild a new instruction just like 'I' but with the new operands given.
1621/// In the event of type mismatch, the type of the operands is correct.
1622static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
1623 // We don't want to use the IRBuilder here because we want the replacement
1624 // instructions to appear next to 'I', not the builder's insertion point.
1625 switch (I->getOpcode()) {
1626 case Instruction::Add:
1627 case Instruction::FAdd:
1628 case Instruction::Sub:
1629 case Instruction::FSub:
1630 case Instruction::Mul:
1631 case Instruction::FMul:
1632 case Instruction::UDiv:
1633 case Instruction::SDiv:
1634 case Instruction::FDiv:
1635 case Instruction::URem:
1636 case Instruction::SRem:
1637 case Instruction::FRem:
1638 case Instruction::Shl:
1639 case Instruction::LShr:
1640 case Instruction::AShr:
1641 case Instruction::And:
1642 case Instruction::Or:
1643 case Instruction::Xor: {
1644 BinaryOperator *BO = cast<BinaryOperator>(I);
1645 assert(NewOps.size() == 2 && "binary operator with #ops != 2")(static_cast<void> (0));
1646 BinaryOperator *New =
1647 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1648 NewOps[0], NewOps[1], "", BO);
1649 if (isa<OverflowingBinaryOperator>(BO)) {
1650 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1651 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1652 }
1653 if (isa<PossiblyExactOperator>(BO)) {
1654 New->setIsExact(BO->isExact());
1655 }
1656 if (isa<FPMathOperator>(BO))
1657 New->copyFastMathFlags(I);
1658 return New;
1659 }
1660 case Instruction::ICmp:
1661 assert(NewOps.size() == 2 && "icmp with #ops != 2")(static_cast<void> (0));
1662 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1663 NewOps[0], NewOps[1]);
1664 case Instruction::FCmp:
1665 assert(NewOps.size() == 2 && "fcmp with #ops != 2")(static_cast<void> (0));
1666 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1667 NewOps[0], NewOps[1]);
1668 case Instruction::Trunc:
1669 case Instruction::ZExt:
1670 case Instruction::SExt:
1671 case Instruction::FPToUI:
1672 case Instruction::FPToSI:
1673 case Instruction::UIToFP:
1674 case Instruction::SIToFP:
1675 case Instruction::FPTrunc:
1676 case Instruction::FPExt: {
1677 // It's possible that the mask has a different number of elements from
1678 // the original cast. We recompute the destination type to match the mask.
1679 Type *DestTy = VectorType::get(
1680 I->getType()->getScalarType(),
1681 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1682 assert(NewOps.size() == 1 && "cast with #ops != 1")(static_cast<void> (0));
1683 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1684 "", I);
1685 }
1686 case Instruction::GetElementPtr: {
1687 Value *Ptr = NewOps[0];
1688 ArrayRef<Value*> Idx = NewOps.slice(1);
1689 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1690 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1691 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1692 return GEP;
1693 }
1694 }
1695 llvm_unreachable("failed to rebuild vector instructions")__builtin_unreachable();
1696}
1697
1698static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1699 // Mask.size() does not need to be equal to the number of vector elements.
1700
1701 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")(static_cast<void> (0));
1702 Type *EltTy = V->getType()->getScalarType();
1703 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1704 if (match(V, m_Undef()))
1705 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1706
1707 if (isa<ConstantAggregateZero>(V))
1708 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1709
1710 if (Constant *C = dyn_cast<Constant>(V))
1711 return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1712 Mask);
1713
1714 Instruction *I = cast<Instruction>(V);
1715 switch (I->getOpcode()) {
1716 case Instruction::Add:
1717 case Instruction::FAdd:
1718 case Instruction::Sub:
1719 case Instruction::FSub:
1720 case Instruction::Mul:
1721 case Instruction::FMul:
1722 case Instruction::UDiv:
1723 case Instruction::SDiv:
1724 case Instruction::FDiv:
1725 case Instruction::URem:
1726 case Instruction::SRem:
1727 case Instruction::FRem:
1728 case Instruction::Shl:
1729 case Instruction::LShr:
1730 case Instruction::AShr:
1731 case Instruction::And:
1732 case Instruction::Or:
1733 case Instruction::Xor:
1734 case Instruction::ICmp:
1735 case Instruction::FCmp:
1736 case Instruction::Trunc:
1737 case Instruction::ZExt:
1738 case Instruction::SExt:
1739 case Instruction::FPToUI:
1740 case Instruction::FPToSI:
1741 case Instruction::UIToFP:
1742 case Instruction::SIToFP:
1743 case Instruction::FPTrunc:
1744 case Instruction::FPExt:
1745 case Instruction::Select:
1746 case Instruction::GetElementPtr: {
1747 SmallVector<Value*, 8> NewOps;
1748 bool NeedsRebuild =
1749 (Mask.size() !=
1750 cast<FixedVectorType>(I->getType())->getNumElements());
1751 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1752 Value *V;
1753 // Recursively call evaluateInDifferentElementOrder on vector arguments
1754 // as well. E.g. GetElementPtr may have scalar operands even if the
1755 // return value is a vector, so we need to examine the operand type.
1756 if (I->getOperand(i)->getType()->isVectorTy())
1757 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1758 else
1759 V = I->getOperand(i);
1760 NewOps.push_back(V);
1761 NeedsRebuild |= (V != I->getOperand(i));
1762 }
1763 if (NeedsRebuild) {
1764 return buildNew(I, NewOps);
1765 }
1766 return I;
1767 }
1768 case Instruction::InsertElement: {
1769 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1770
1771 // The insertelement was inserting at Element. Figure out which element
1772 // that becomes after shuffling. The answer is guaranteed to be unique
1773 // by CanEvaluateShuffled.
1774 bool Found = false;
1775 int Index = 0;
1776 for (int e = Mask.size(); Index != e; ++Index) {
1777 if (Mask[Index] == Element) {
1778 Found = true;
1779 break;
1780 }
1781 }
1782
1783 // If element is not in Mask, no need to handle the operand 1 (element to
1784 // be inserted). Just evaluate values in operand 0 according to Mask.
1785 if (!Found)
1786 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1787
1788 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1789 return InsertElementInst::Create(V, I->getOperand(1),
1790 ConstantInt::get(I32Ty, Index), "", I);
1791 }
1792 }
1793 llvm_unreachable("failed to reorder elements of vector instruction!")__builtin_unreachable();
1794}
1795
1796// Returns true if the shuffle is extracting a contiguous range of values from
1797// LHS, for example:
1798// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1799// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1800// Shuffles to: |EE|FF|GG|HH|
1801// +--+--+--+--+
1802static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1803 ArrayRef<int> Mask) {
1804 unsigned LHSElems =
1805 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1806 unsigned MaskElems = Mask.size();
1807 unsigned BegIdx = Mask.front();
1808 unsigned EndIdx = Mask.back();
1809 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1810 return false;
1811 for (unsigned I = 0; I != MaskElems; ++I)
1812 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1813 return false;
1814 return true;
1815}
1816
1817/// These are the ingredients in an alternate form binary operator as described
1818/// below.
1819struct BinopElts {
1820 BinaryOperator::BinaryOps Opcode;
1821 Value *Op0;
1822 Value *Op1;
1823 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1824 Value *V0 = nullptr, Value *V1 = nullptr) :
1825 Opcode(Opc), Op0(V0), Op1(V1) {}
1826 operator bool() const { return Opcode != 0; }
1827};
1828
1829/// Binops may be transformed into binops with different opcodes and operands.
1830/// Reverse the usual canonicalization to enable folds with the non-canonical
1831/// form of the binop. If a transform is possible, return the elements of the
1832/// new binop. If not, return invalid elements.
1833static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1834 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1835 Type *Ty = BO->getType();
1836 switch (BO->getOpcode()) {
1837 case Instruction::Shl: {
1838 // shl X, C --> mul X, (1 << C)
1839 Constant *C;
1840 if (match(BO1, m_Constant(C))) {
1841 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1842 return { Instruction::Mul, BO0, ShlOne };
1843 }
1844 break;
1845 }
1846 case Instruction::Or: {
1847 // or X, C --> add X, C (when X and C have no common bits set)
1848 const APInt *C;
1849 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1850 return { Instruction::Add, BO0, BO1 };
1851 break;
1852 }
1853 default:
1854 break;
1855 }
1856 return {};
1857}
1858
1859static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1860 assert(Shuf.isSelect() && "Must have select-equivalent shuffle")(static_cast<void> (0));
1861
1862 // Are we shuffling together some value and that same value after it has been
1863 // modified by a binop with a constant?
1864 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1865 Constant *C;
1866 bool Op0IsBinop;
1867 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1868 Op0IsBinop = true;
1869 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1870 Op0IsBinop = false;
1871 else
1872 return nullptr;
1873
1874 // The identity constant for a binop leaves a variable operand unchanged. For
1875 // a vector, this is a splat of something like 0, -1, or 1.
1876 // If there's no identity constant for this binop, we're done.
1877 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1878 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1879 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1880 if (!IdC)
1881 return nullptr;
1882
1883 // Shuffle identity constants into the lanes that return the original value.
1884 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1885 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1886 // The existing binop constant vector remains in the same operand position.
1887 ArrayRef<int> Mask = Shuf.getShuffleMask();
1888 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1889 ConstantExpr::getShuffleVector(IdC, C, Mask);
1890
1891 bool MightCreatePoisonOrUB =
1892 is_contained(Mask, UndefMaskElem) &&
1893 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1894 if (MightCreatePoisonOrUB)
1895 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
1896
1897 // shuf (bop X, C), X, M --> bop X, C'
1898 // shuf X, (bop X, C), M --> bop X, C'
1899 Value *X = Op0IsBinop ? Op1 : Op0;
1900 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1901 NewBO->copyIRFlags(BO);
1902
1903 // An undef shuffle mask element may propagate as an undef constant element in
1904 // the new binop. That would produce poison where the original code might not.
1905 // If we already made a safe constant, then there's no danger.
1906 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
1907 NewBO->dropPoisonGeneratingFlags();
1908 return NewBO;
1909}
1910
1911/// If we have an insert of a scalar to a non-zero element of an undefined
1912/// vector and then shuffle that value, that's the same as inserting to the zero
1913/// element and shuffling. Splatting from the zero element is recognized as the
1914/// canonical form of splat.
1915static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1916 InstCombiner::BuilderTy &Builder) {
1917 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1918 ArrayRef<int> Mask = Shuf.getShuffleMask();
1919 Value *X;
1920 uint64_t IndexC;
1921
1922 // Match a shuffle that is a splat to a non-zero element.
1923 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
1924 m_ConstantInt(IndexC)))) ||
1925 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
1926 return nullptr;
1927
1928 // Insert into element 0 of an undef vector.
1929 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1930 Constant *Zero = Builder.getInt32(0);
1931 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1932
1933 // Splat from element 0. Any mask element that is undefined remains undefined.
1934 // For example:
1935 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1936 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1937 unsigned NumMaskElts =
1938 cast<FixedVectorType>(Shuf.getType())->getNumElements();
1939 SmallVector<int, 16> NewMask(NumMaskElts, 0);
1940 for (unsigned i = 0; i != NumMaskElts; ++i)
1941 if (Mask[i] == UndefMaskElem)
1942 NewMask[i] = Mask[i];
1943
1944 return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
1945}
1946
1947/// Try to fold shuffles that are the equivalent of a vector select.
1948static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
1949 InstCombiner::BuilderTy &Builder,
1950 const DataLayout &DL) {
1951 if (!Shuf.isSelect())
1952 return nullptr;
1953
1954 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1955 // Commuting undef to operand 0 conflicts with another canonicalization.
1956 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
1957 if (!match(Shuf.getOperand(1), m_Undef()) &&
1958 Shuf.getMaskValue(0) >= (int)NumElts) {
1959 // TODO: Can we assert that both operands of a shuffle-select are not undef
1960 // (otherwise, it would have been folded by instsimplify?
1961 Shuf.commute();
1962 return &Shuf;
1963 }
1964
1965 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1966 return I;
1967
1968 BinaryOperator *B0, *B1;
1969 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1970 !match(Shuf.getOperand(1), m_BinOp(B1)))
1971 return nullptr;
1972
1973 Value *X, *Y;
1974 Constant *C0, *C1;
1975 bool ConstantsAreOp1;
1976 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1977 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
1978 ConstantsAreOp1 = true;
1979 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1980 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
1981 ConstantsAreOp1 = false;
1982 else
1983 return nullptr;
1984
1985 // We need matching binops to fold the lanes together.
1986 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1987 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1988 bool DropNSW = false;
1989 if (ConstantsAreOp1 && Opc0 != Opc1) {
1990 // TODO: We drop "nsw" if shift is converted into multiply because it may
1991 // not be correct when the shift amount is BitWidth - 1. We could examine
1992 // each vector element to determine if it is safe to keep that flag.
1993 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1994 DropNSW = true;
1995 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1996 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")(static_cast<void> (0));
1997 Opc0 = AltB0.Opcode;
1998 C0 = cast<Constant>(AltB0.Op1);
1999 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2000 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")(static_cast<void> (0));
2001 Opc1 = AltB1.Opcode;
2002 C1 = cast<Constant>(AltB1.Op1);
2003 }
2004 }
2005
2006 if (Opc0 != Opc1)
2007 return nullptr;
2008
2009 // The opcodes must be the same. Use a new name to make that clear.
2010 BinaryOperator::BinaryOps BOpc = Opc0;
2011
2012 // Select the constant elements needed for the single binop.
2013 ArrayRef<int> Mask = Shuf.getShuffleMask();
2014 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2015
2016 // We are moving a binop after a shuffle. When a shuffle has an undefined
2017 // mask element, the result is undefined, but it is not poison or undefined
2018 // behavior. That is not necessarily true for div/rem/shift.
2019 bool MightCreatePoisonOrUB =
2020 is_contained(Mask, UndefMaskElem) &&
2021 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
2022 if (MightCreatePoisonOrUB)
2023 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
2024 ConstantsAreOp1);
2025
2026 Value *V;
2027 if (X == Y) {
2028 // Remove a binop and the shuffle by rearranging the constant:
2029 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2030 // shuffle (op C0, V), (op C1, V), M --> op C', V
2031 V = X;
2032 } else {
2033 // If there are 2 different variable operands, we must create a new shuffle
2034 // (select) first, so check uses to ensure that we don't end up with more
2035 // instructions than we started with.
2036 if (!B0->hasOneUse() && !B1->hasOneUse())
2037 return nullptr;
2038
2039 // If we use the original shuffle mask and op1 is *variable*, we would be
2040 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2041 // poison. We do not have to guard against UB when *constants* are op1
2042 // because safe constants guarantee that we do not overflow sdiv/srem (and
2043 // there's no danger for other opcodes).
2044 // TODO: To allow this case, create a new shuffle mask with no undefs.
2045 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2046 return nullptr;
2047
2048 // Note: In general, we do not create new shuffles in InstCombine because we
2049 // do not know if a target can lower an arbitrary shuffle optimally. In this
2050 // case, the shuffle uses the existing mask, so there is no additional risk.
2051
2052 // Select the variable vectors first, then perform the binop:
2053 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2054 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2055 V = Builder.CreateShuffleVector(X, Y, Mask);
2056 }
2057
2058 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
2059 BinaryOperator::Create(BOpc, NewC, V);
2060
2061 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2062 // 1. If we changed an opcode, poison conditions might have changed.
2063 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2064 // where the original code did not. But if we already made a safe constant,
2065 // then there's no danger.
2066 NewBO->copyIRFlags(B0);
2067 NewBO->andIRFlags(B1);
2068 if (DropNSW)
2069 NewBO->setHasNoSignedWrap(false);
2070 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2071 NewBO->dropPoisonGeneratingFlags();
2072 return NewBO;
2073}
2074
2075/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2076/// Example (little endian):
2077/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2078static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
2079 bool IsBigEndian) {
2080 // This must be a bitcasted shuffle of 1 vector integer operand.
2081 Type *DestType = Shuf.getType();
2082 Value *X;
2083 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2084 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2085 return nullptr;
2086
2087 // The source type must have the same number of elements as the shuffle,
2088 // and the source element type must be larger than the shuffle element type.
2089 Type *SrcType = X->getType();
2090 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2091 cast<FixedVectorType>(SrcType)->getNumElements() !=
2092 cast<FixedVectorType>(DestType)->getNumElements() ||
2093 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2094 return nullptr;
2095
2096 assert(Shuf.changesLength() && !Shuf.increasesLength() &&(static_cast<void> (0))
2097 "Expected a shuffle that decreases length")(static_cast<void> (0));
2098
2099 // Last, check that the mask chooses the correct low bits for each narrow
2100 // element in the result.
2101 uint64_t TruncRatio =
2102 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2103 ArrayRef<int> Mask = Shuf.getShuffleMask();
2104 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2105 if (Mask[i] == UndefMaskElem)
2106 continue;
2107 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2108 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits")(static_cast<void> (0));
2109 if (Mask[i] != (int)LSBIndex)
2110 return nullptr;
2111 }
2112
2113 return new TruncInst(X, DestType);
2114}
2115
2116/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2117/// narrowing (concatenating with undef and extracting back to the original
2118/// length). This allows replacing the wide select with a narrow select.
2119static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
2120 InstCombiner::BuilderTy &Builder) {
2121 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2122 // of the 1st vector operand of a shuffle.
2123 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2124 return nullptr;
2125
2126 // The vector being shuffled must be a vector select that we can eliminate.
2127 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2128 Value *Cond, *X, *Y;
2129 if (!match(Shuf.getOperand(0),
2130 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
2131 return nullptr;
2132
2133 // We need a narrow condition value. It must be extended with undef elements
2134 // and have the same number of elements as this shuffle.
2135 unsigned NarrowNumElts =
2136 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2137 Value *NarrowCond;
2138 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2139 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2140 NarrowNumElts ||
2141 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2142 return nullptr;
2143
2144 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2145 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2146 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2147 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2148 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2149}
2150
2151/// Try to fold an extract subvector operation.
2152static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
2153 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2154 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2155 return nullptr;
2156
2157 // Check if we are extracting all bits of an inserted scalar:
2158 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2159 Value *X;
2160 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2161 X->getType()->getPrimitiveSizeInBits() ==
2162 Shuf.getType()->getPrimitiveSizeInBits())
2163 return new BitCastInst(X, Shuf.getType());
2164
2165 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2166 Value *Y;
2167 ArrayRef<int> Mask;
2168 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2169 return nullptr;
2170
2171 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2172 // then combining may result in worse codegen.
2173 if (!Op0->hasOneUse())
2174 return nullptr;
2175
2176 // We are extracting a subvector from a shuffle. Remove excess elements from
2177 // the 1st shuffle mask to eliminate the extract.
2178 //
2179 // This transform is conservatively limited to identity extracts because we do
2180 // not allow arbitrary shuffle mask creation as a target-independent transform
2181 // (because we can't guarantee that will lower efficiently).
2182 //
2183 // If the extracting shuffle has an undef mask element, it transfers to the
2184 // new shuffle mask. Otherwise, copy the original mask element. Example:
2185 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2186 // shuf X, Y, <C0, undef, C2, undef>
2187 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2188 SmallVector<int, 16> NewMask(NumElts);
2189 assert(NumElts < Mask.size() &&(static_cast<void> (0))
2190 "Identity with extract must have less elements than its inputs")(static_cast<void> (0));
2191
2192 for (unsigned i = 0; i != NumElts; ++i) {
2193 int ExtractMaskElt = Shuf.getMaskValue(i);
2194 int MaskElt = Mask[i];
2195 NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2196 }
2197 return new ShuffleVectorInst(X, Y, NewMask);
2198}
2199
2200/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2201/// operand with the operand of an insertelement.
2202static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2203 InstCombinerImpl &IC) {
2204 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2205 SmallVector<int, 16> Mask;
2206 Shuf.getShuffleMask(Mask);
2207
2208 // The shuffle must not change vector sizes.
2209 // TODO: This restriction could be removed if the insert has only one use
2210 // (because the transform would require a new length-changing shuffle).
2211 int NumElts = Mask.size();
2212 if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
2213 return nullptr;
2214
2215 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2216 // not be able to handle it there if the insertelement has >1 use.
2217 // If the shuffle has an insertelement operand but does not choose the
2218 // inserted scalar element from that value, then we can replace that shuffle
2219 // operand with the source vector of the insertelement.
2220 Value *X;
2221 uint64_t IdxC;
2222 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2223 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2224 if (!is_contained(Mask, (int)IdxC))
2225 return IC.replaceOperand(Shuf, 0, X);
2226 }
2227 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2228 // Offset the index constant by the vector width because we are checking for
2229 // accesses to the 2nd vector input of the shuffle.
2230 IdxC += NumElts;
2231 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2232 if (!is_contained(Mask, (int)IdxC))
2233 return IC.replaceOperand(Shuf, 1, X);
2234 }
2235
2236 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2237 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2238 // We need an insertelement with a constant index.
2239 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2240 m_ConstantInt(IndexC))))
2241 return false;
2242
2243 // Test the shuffle mask to see if it splices the inserted scalar into the
2244 // operand 1 vector of the shuffle.
2245 int NewInsIndex = -1;
2246 for (int i = 0; i != NumElts; ++i) {
2247 // Ignore undef mask elements.
2248 if (Mask[i] == -1)
2249 continue;
2250
2251 // The shuffle takes elements of operand 1 without lane changes.
2252 if (Mask[i] == NumElts + i)
2253 continue;
2254
2255 // The shuffle must choose the inserted scalar exactly once.
2256 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2257 return false;
2258
2259 // The shuffle is placing the inserted scalar into element i.
2260 NewInsIndex = i;
2261 }
2262
2263 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")(static_cast<void> (0));
2264
2265 // Index is updated to the potentially translated insertion lane.
2266 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2267 return true;
2268 };
2269
2270 // If the shuffle is unnecessary, insert the scalar operand directly into
2271 // operand 1 of the shuffle. Example:
2272 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2273 Value *Scalar;
2274 ConstantInt *IndexC;
2275 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2276 return InsertElementInst::Create(V1, Scalar, IndexC);
2277
2278 // Try again after commuting shuffle. Example:
2279 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2280 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2281 std::swap(V0, V1);
2282 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
2283 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2284 return InsertElementInst::Create(V1, Scalar, IndexC);
2285
2286 return nullptr;
2287}
2288
2289static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
2290 // Match the operands as identity with padding (also known as concatenation
2291 // with undef) shuffles of the same source type. The backend is expected to
2292 // recreate these concatenations from a shuffle of narrow operands.
2293 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2294 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2295 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2296 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2297 return nullptr;
2298
2299 // We limit this transform to power-of-2 types because we expect that the
2300 // backend can convert the simplified IR patterns to identical nodes as the
2301 // original IR.
2302 // TODO: If we can verify the same behavior for arbitrary types, the
2303 // power-of-2 checks can be removed.
2304 Value *X = Shuffle0->getOperand(0);
2305 Value *Y = Shuffle1->getOperand(0);
2306 if (X->getType() != Y->getType() ||
2307 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2308 !isPowerOf2_32(
2309 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2310 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2311 match(X, m_Undef()) || match(Y, m_Undef()))
2312 return nullptr;
2313 assert(match(Shuffle0->getOperand(1), m_Undef()) &&(static_cast<void> (0))
2314 match(Shuffle1->getOperand(1), m_Undef()) &&(static_cast<void> (0))
2315 "Unexpected operand for identity shuffle")(static_cast<void> (0));
2316
2317 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2318 // operands directly by adjusting the shuffle mask to account for the narrower
2319 // types:
2320 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2321 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2322 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2323 assert(WideElts > NarrowElts && "Unexpected types for identity with padding")(static_cast<void> (0));
2324
2325 ArrayRef<int> Mask = Shuf.getShuffleMask();
2326 SmallVector<int, 16> NewMask(Mask.size(), -1);
2327 for (int i = 0, e = Mask.size(); i != e; ++i) {
2328 if (Mask[i] == -1)
2329 continue;
2330
2331 // If this shuffle is choosing an undef element from 1 of the sources, that
2332 // element is undef.
2333 if (Mask[i] < WideElts) {
2334 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2335 continue;
2336 } else {
2337 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2338 continue;
2339 }
2340
2341 // If this shuffle is choosing from the 1st narrow op, the mask element is
2342 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2343 // element is offset down to adjust for the narrow vector widths.
2344 if (Mask[i] < WideElts) {
2345 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")(static_cast<void> (0));
2346 NewMask[i] = Mask[i];
2347 } else {
2348 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")(static_cast<void> (0));
2349 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2350 }
2351 }
2352 return new ShuffleVectorInst(X, Y, NewMask);
2353}
2354
2355Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
2356 Value *LHS = SVI.getOperand(0);
2357 Value *RHS = SVI.getOperand(1);
2358 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2359 if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2360 SVI.getType(), ShufQuery))
2361 return replaceInstUsesWith(SVI, V);
2362
2363 // Bail out for scalable vectors
2364 if (isa<ScalableVectorType>(LHS->getType()))
2365 return nullptr;
2366
2367 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2368 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2369
2370 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2371 //
2372 // if X and Y are of the same (vector) type, and the element size is not
2373 // changed by the bitcasts, we can distribute the bitcasts through the
2374 // shuffle, hopefully reducing the number of instructions. We make sure that
2375 // at least one bitcast only has one use, so we don't *increase* the number of
2376 // instructions here.
2377 Value *X, *Y;
2378 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2379 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2380 X->getType()->getScalarSizeInBits() ==
2381 SVI.getType()->getScalarSizeInBits() &&
2382 (LHS->hasOneUse() || RHS->hasOneUse())) {
2383 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2384 SVI.getName() + ".uncasted");
2385 return new BitCastInst(V, SVI.getType());
2386 }
2387
2388 ArrayRef<int> Mask = SVI.getShuffleMask();
2389 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
2390
2391 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2392 // simulated shuffle can simplify, then this shuffle is unnecessary:
2393 // shuf (bitcast X), undef, Mask --> bitcast X'
2394 // TODO: This could be extended to allow length-changing shuffles.
2395 // The transform might also be obsoleted if we allowed canonicalization
2396 // of bitcasted shuffles.
2397 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2398 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2399 // Try to create a scaled mask constant.
2400 auto *XType = cast<FixedVectorType>(X->getType());
2401 unsigned XNumElts = XType->getNumElements();
2402 SmallVector<int, 16> ScaledMask;
2403 if (XNumElts >= VWidth) {
2404 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast")(static_cast<void> (0));
2405 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2406 } else {
2407 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast")(static_cast<void> (0));
2408 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2409 ScaledMask.clear();
2410 }
2411 if (!ScaledMask.empty()) {
2412 // If the shuffled source vector simplifies, cast that value to this
2413 // shuffle's type.
2414 if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
2415 ScaledMask, XType, ShufQuery))
2416 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2417 }
2418 }
2419
2420 // shuffle x, x, mask --> shuffle x, undef, mask'
2421 if (LHS == RHS) {
2422 assert(!match(RHS, m_Undef()) &&(static_cast<void> (0))
2423 "Shuffle with 2 undef ops not simplified?")(static_cast<void> (0));
2424 // Remap any references to RHS to use LHS.
2425 SmallVector<int, 16> Elts;
2426 for (unsigned i = 0; i != VWidth; ++i) {
2427 // Propagate undef elements or force mask to LHS.
2428 if (Mask[i] < 0)
2429 Elts.push_back(UndefMaskElem);
2430 else
2431 Elts.push_back(Mask[i] % LHSWidth);
2432 }
2433 return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
2434 }
2435
2436 // shuffle undef, x, mask --> shuffle x, undef, mask'
2437 if (match(LHS, m_Undef())) {
2438 SVI.commute();
2439 return &SVI;
2440 }
2441
2442 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
2443 return I;
2444
2445 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
2446 return I;
2447
2448 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2449 return I;
2450
2451 if (Instruction *I = narrowVectorSelect(SVI, Builder))
2452 return I;
2453
2454 APInt UndefElts(VWidth, 0);
2455 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2456 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2457 if (V != &SVI)
2458 return replaceInstUsesWith(SVI, V);
2459 return &SVI;
2460 }
2461
2462 if (Instruction *I = foldIdentityExtractShuffle(SVI))
2463 return I;
2464
2465 // These transforms have the potential to lose undef knowledge, so they are
2466 // intentionally placed after SimplifyDemandedVectorElts().
2467 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2468 return I;
2469 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
2470 return I;
2471
2472 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2473 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
2474 return replaceInstUsesWith(SVI, V);
2475 }
2476
2477 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2478 // a non-vector type. We can instead bitcast the original vector followed by
2479 // an extract of the desired element:
2480 //
2481 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2482 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2483 // %1 = bitcast <4 x i8> %sroa to i32
2484 // Becomes:
2485 // %bc = bitcast <16 x i8> %in to <4 x i32>
2486 // %ext = extractelement <4 x i32> %bc, i32 0
2487 //
2488 // If the shuffle is extracting a contiguous range of values from the input
2489 // vector then each use which is a bitcast of the extracted size can be
2490 // replaced. This will work if the vector types are compatible, and the begin
2491 // index is aligned to a value in the casted vector type. If the begin index
2492 // isn't aligned then we can shuffle the original vector (keeping the same
2493 // vector type) before extracting.
2494 //
2495 // This code will bail out if the target type is fundamentally incompatible
2496 // with vectors of the source type.
2497 //
2498 // Example of <16 x i8>, target type i32:
2499 // Index range [4,8): v-----------v Will work.
2500 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2501 // <16 x i8>: | | | | | | | | | | | | | | | | |
2502 // <4 x i32>: | | | | |
2503 // +-----------+-----------+-----------+-----------+
2504 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2505 // Target type i40: ^--------------^ Won't work, bail.
2506 bool MadeChange = false;
2507 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2508 Value *V = LHS;
2509 unsigned MaskElems = Mask.size();
2510 auto *SrcTy = cast<FixedVectorType>(V->getType());
2511 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2512 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2513 assert(SrcElemBitWidth && "vector elements must have a bitwidth")(static_cast<void> (0));
2514 unsigned SrcNumElems = SrcTy->getNumElements();
2515 SmallVector<BitCastInst *, 8> BCs;
2516 DenseMap<Type *, Value *> NewBCs;
2517 for (User *U : SVI.users())
2518 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2519 if (!BC->use_empty())
2520 // Only visit bitcasts that weren't previously handled.
2521 BCs.push_back(BC);
2522 for (BitCastInst *BC : BCs) {
2523 unsigned BegIdx = Mask.front();
2524 Type *TgtTy = BC->getDestTy();
2525 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2526 if (!TgtElemBitWidth)
2527 continue;
2528 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2529 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2530 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2531 if (!VecBitWidthsEqual)
2532 continue;
2533 if (!VectorType::isValidElementType(TgtTy))
2534 continue;
2535 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2536 if (!BegIsAligned) {
2537 // Shuffle the input so [0,NumElements) contains the output, and
2538 // [NumElems,SrcNumElems) is undef.
2539 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2540 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2541 ShuffleMask[I] = Idx;
2542 V = Builder.CreateShuffleVector(V, ShuffleMask,
2543 SVI.getName() + ".extract");
2544 BegIdx = 0;
2545 }
2546 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2547 assert(SrcElemsPerTgtElem)(static_cast<void> (0));
2548 BegIdx /= SrcElemsPerTgtElem;
2549 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2550 auto *NewBC =
2551 BCAlreadyExists
2552 ? NewBCs[CastSrcTy]
2553 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2554 if (!BCAlreadyExists)
2555 NewBCs[CastSrcTy] = NewBC;
2556 auto *Ext = Builder.CreateExtractElement(
2557 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2558 // The shufflevector isn't being replaced: the bitcast that used it
2559 // is. InstCombine will visit the newly-created instructions.
2560 replaceInstUsesWith(*BC, Ext);
2561 MadeChange = true;
2562 }
2563 }
2564
2565 // If the LHS is a shufflevector itself, see if we can combine it with this
2566 // one without producing an unusual shuffle.
2567 // Cases that might be simplified:
2568 // 1.
2569 // x1=shuffle(v1,v2,mask1)
2570 // x=shuffle(x1,undef,mask)
2571 // ==>
2572 // x=shuffle(v1,undef,newMask)
2573 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2574 // 2.
2575 // x1=shuffle(v1,undef,mask1)
2576 // x=shuffle(x1,x2,mask)
2577 // where v1.size() == mask1.size()
2578 // ==>
2579 // x=shuffle(v1,x2,newMask)
2580 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2581 // 3.
2582 // x2=shuffle(v2,undef,mask2)
2583 // x=shuffle(x1,x2,mask)
2584 // where v2.size() == mask2.size()
2585 // ==>
2586 // x=shuffle(x1,v2,newMask)
2587 // newMask[i] = (mask[i] < x1.size())
2588 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2589 // 4.
2590 // x1=shuffle(v1,undef,mask1)
2591 // x2=shuffle(v2,undef,mask2)
2592 // x=shuffle(x1,x2,mask)
2593 // where v1.size() == v2.size()
2594 // ==>
2595 // x=shuffle(v1,v2,newMask)
2596 // newMask[i] = (mask[i] < x1.size())
2597 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2598 //
2599 // Here we are really conservative:
2600 // we are absolutely afraid of producing a shuffle mask not in the input
2601 // program, because the code gen may not be smart enough to turn a merged
2602 // shuffle into two specific shuffles: it may produce worse code. As such,
2603 // we only merge two shuffles if the result is either a splat or one of the
2604 // input shuffle masks. In this case, merging the shuffles just removes
2605 // one instruction, which we know is safe. This is good for things like
2606 // turning: (splat(splat)) -> splat, or
2607 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2608 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2609 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2610 if (LHSShuffle)
2611 if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2612 LHSShuffle = nullptr;
2613 if (RHSShuffle)
2614 if (!match(RHSShuffle->getOperand(1), m_Undef()))
2615 RHSShuffle = nullptr;
2616 if (!LHSShuffle && !RHSShuffle)
2617 return MadeChange ? &SVI : nullptr;
2618
2619 Value* LHSOp0 = nullptr;
2620 Value* LHSOp1 = nullptr;
2621 Value* RHSOp0 = nullptr;
2622 unsigned LHSOp0Width = 0;
2623 unsigned RHSOp0Width = 0;
2624 if (LHSShuffle) {
2625 LHSOp0 = LHSShuffle->getOperand(0);
2626 LHSOp1 = LHSShuffle->getOperand(1);
2627 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2628 }
2629 if (RHSShuffle) {
2630 RHSOp0 = RHSShuffle->getOperand(0);
2631 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2632 }
2633 Value* newLHS = LHS;
2634 Value* newRHS = RHS;
2635 if (LHSShuffle) {
2636 // case 1
2637 if (match(RHS, m_Undef())) {
2638 newLHS = LHSOp0;
2639 newRHS = LHSOp1;
2640 }
2641 // case 2 or 4
2642 else if (LHSOp0Width == LHSWidth) {
2643 newLHS = LHSOp0;
2644 }
2645 }
2646 // case 3 or 4
2647 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2648 newRHS = RHSOp0;
2649 }
2650 // case 4
2651 if (LHSOp0 == RHSOp0) {
2652 newLHS = LHSOp0;
2653 newRHS = nullptr;
2654 }
2655
2656 if (newLHS == LHS && newRHS == RHS)
2657 return MadeChange ? &SVI : nullptr;
2658
2659 ArrayRef<int> LHSMask;
2660 ArrayRef<int> RHSMask;
2661 if (newLHS != LHS)
2662 LHSMask = LHSShuffle->getShuffleMask();
2663 if (RHSShuffle && newRHS != RHS)
2664 RHSMask = RHSShuffle->getShuffleMask();
2665
2666 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2667 SmallVector<int, 16> newMask;
2668 bool isSplat = true;
2669 int SplatElt = -1;
2670 // Create a new mask for the new ShuffleVectorInst so that the new
2671 // ShuffleVectorInst is equivalent to the original one.
2672 for (unsigned i = 0; i < VWidth; ++i) {
2673 int eltMask;
2674 if (Mask[i] < 0) {
2675 // This element is an undef value.
2676 eltMask = -1;
2677 } else if (Mask[i] < (int)LHSWidth) {
2678 // This element is from left hand side vector operand.
2679 //
2680 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2681 // new mask value for the element.
2682 if (newLHS != LHS) {
2683 eltMask = LHSMask[Mask[i]];
2684 // If the value selected is an undef value, explicitly specify it
2685 // with a -1 mask value.
2686 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2687 eltMask = -1;
2688 } else
2689 eltMask = Mask[i];
2690 } else {
2691 // This element is from right hand side vector operand
2692 //
2693 // If the value selected is an undef value, explicitly specify it
2694 // with a -1 mask value. (case 1)
2695 if (match(RHS, m_Undef()))
2696 eltMask = -1;
2697 // If RHS is going to be replaced (case 3 or 4), calculate the
2698 // new mask value for the element.
2699 else if (newRHS != RHS) {
2700 eltMask = RHSMask[Mask[i]-LHSWidth];
2701 // If the value selected is an undef value, explicitly specify it
2702 // with a -1 mask value.
2703 if (eltMask >= (int)RHSOp0Width) {
2704 assert(match(RHSShuffle->getOperand(1), m_Undef()) &&(static_cast<void> (0))
2705 "should have been check above")(static_cast<void> (0));
2706 eltMask = -1;
2707 }
2708 } else
2709 eltMask = Mask[i]-LHSWidth;
2710
2711 // If LHS's width is changed, shift the mask value accordingly.
2712 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2713 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2714 // If newRHS == newLHS, we want to remap any references from newRHS to
2715 // newLHS so that we can properly identify splats that may occur due to
2716 // obfuscation across the two vectors.
2717 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2718 eltMask += newLHSWidth;
2719 }
2720
2721 // Check if this could still be a splat.
2722 if (eltMask >= 0) {
2723 if (SplatElt >= 0 && SplatElt != eltMask)
2724 isSplat = false;
2725 SplatElt = eltMask;
2726 }
2727
2728 newMask.push_back(eltMask);
2729 }
2730
2731 // If the result mask is equal to one of the original shuffle masks,
2732 // or is a splat, do the replacement.
2733 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2734 if (!newRHS)
2735 newRHS = UndefValue::get(newLHS->getType());
2736 return new ShuffleVectorInst(newLHS, newRHS, newMask);
2737 }
2738
2739 return MadeChange ? &SVI : nullptr;
2740}

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/IR/DerivedTypes.h

1//===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declarations of classes that represent "derived
10// types". These are things like "arrays of x" or "structure of x, y, z" or
11// "function returning x taking (y,z) as parameters", etc...
12//
13// The implementations of these classes live in the Type.cpp file.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_IR_DERIVEDTYPES_H
18#define LLVM_IR_DERIVEDTYPES_H
19
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Type.h"
24#include "llvm/Support/Casting.h"
25#include "llvm/Support/Compiler.h"
26#include "llvm/Support/TypeSize.h"
27#include <cassert>
28#include <cstdint>
29
30namespace llvm {
31
32class Value;
33class APInt;
34class LLVMContext;
35
36/// Class to represent integer types. Note that this class is also used to
37/// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
38/// Int64Ty.
39/// Integer representation type
40class IntegerType : public Type {
41 friend class LLVMContextImpl;
42
43protected:
44 explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
45 setSubclassData(NumBits);
46 }
47
48public:
49 /// This enum is just used to hold constants we need for IntegerType.
50 enum {
51 MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified
52 MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
53 ///< Note that bit width is stored in the Type classes SubclassData field
54 ///< which has 24 bits. This yields a maximum bit width of 16,777,215
55 ///< bits.
56 };
57
58 /// This static method is the primary way of constructing an IntegerType.
59 /// If an IntegerType with the same NumBits value was previously instantiated,
60 /// that instance will be returned. Otherwise a new one will be created. Only
61 /// one instance with a given NumBits value is ever created.
62 /// Get or create an IntegerType instance.
63 static IntegerType *get(LLVMContext &C, unsigned NumBits);
64
65 /// Returns type twice as wide the input type.
66 IntegerType *getExtendedType() const {
67 return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits());
68 }
69
70 /// Get the number of bits in this IntegerType
71 unsigned getBitWidth() const { return getSubclassData(); }
72
73 /// Return a bitmask with ones set for all of the bits that can be set by an
74 /// unsigned version of this type. This is 0xFF for i8, 0xFFFF for i16, etc.
75 uint64_t getBitMask() const {
76 return ~uint64_t(0UL) >> (64-getBitWidth());
77 }
78
79 /// Return a uint64_t with just the most significant bit set (the sign bit, if
80 /// the value is treated as a signed number).
81 uint64_t getSignBit() const {
82 return 1ULL << (getBitWidth()-1);
83 }
84
85 /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
86 /// @returns a bit mask with ones set for all the bits of this type.
87 /// Get a bit mask for this type.
88 APInt getMask() const;
89
90 /// Methods for support type inquiry through isa, cast, and dyn_cast.
91 static bool classof(const Type *T) {
92 return T->getTypeID() == IntegerTyID;
93 }
94};
95
96unsigned Type::getIntegerBitWidth() const {
97 return cast<IntegerType>(this)->getBitWidth();
98}
99
100/// Class to represent function types
101///
102class FunctionType : public Type {
103 FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);
104
105public:
106 FunctionType(const FunctionType &) = delete;
107 FunctionType &operator=(const FunctionType &) = delete;
108
109 /// This static method is the primary way of constructing a FunctionType.
110 static FunctionType *get(Type *Result,
111 ArrayRef<Type*> Params, bool isVarArg);
112
113 /// Create a FunctionType taking no parameters.
114 static FunctionType *get(Type *Result, bool isVarArg);
115
116 /// Return true if the specified type is valid as a return type.
117 static bool isValidReturnType(Type *RetTy);
118
119 /// Return true if the specified type is valid as an argument type.
120 static bool isValidArgumentType(Type *ArgTy);
121
122 bool isVarArg() const { return getSubclassData()!=0; }
123 Type *getReturnType() const { return ContainedTys[0]; }
124
125 using param_iterator = Type::subtype_iterator;
126
127 param_iterator param_begin() const { return ContainedTys + 1; }
128 param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
129 ArrayRef<Type *> params() const {
130 return makeArrayRef(param_begin(), param_end());
131 }
132
133 /// Parameter type accessors.
134 Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
135
136 /// Return the number of fixed parameters this function type requires.
137 /// This does not consider varargs.
138 unsigned getNumParams() const { return NumContainedTys - 1; }
139
140 /// Methods for support type inquiry through isa, cast, and dyn_cast.
141 static bool classof(const Type *T) {
142 return T->getTypeID() == FunctionTyID;
143 }
144};
145static_assert(alignof(FunctionType) >= alignof(Type *),
146 "Alignment sufficient for objects appended to FunctionType");
147
148bool Type::isFunctionVarArg() const {
149 return cast<FunctionType>(this)->isVarArg();
150}
151
152Type *Type::getFunctionParamType(unsigned i) const {
153 return cast<FunctionType>(this)->getParamType(i);
154}
155
156unsigned Type::getFunctionNumParams() const {
157 return cast<FunctionType>(this)->getNumParams();
158}
159
160/// A handy container for a FunctionType+Callee-pointer pair, which can be
161/// passed around as a single entity. This assists in replacing the use of
162/// PointerType::getElementType() to access the function's type, since that's
163/// slated for removal as part of the [opaque pointer types] project.
164class FunctionCallee {
165public:
166 // Allow implicit conversion from types which have a getFunctionType member
167 // (e.g. Function and InlineAsm).
168 template <typename T, typename U = decltype(&T::getFunctionType)>
169 FunctionCallee(T *Fn)
170 : FnTy(Fn ? Fn->getFunctionType() : nullptr), Callee(Fn) {}
171
172 FunctionCallee(FunctionType *FnTy, Value *Callee)
173 : FnTy(FnTy), Callee(Callee) {
174 assert((FnTy == nullptr) == (Callee == nullptr))(static_cast<void> (0));
175 }
176
177 FunctionCallee(std::nullptr_t) {}
178
179 FunctionCallee() = default;
180
181 FunctionType *getFunctionType() { return FnTy; }
182
183 Value *getCallee() { return Callee; }
184
185 explicit operator bool() { return Callee; }
186
187private:
188 FunctionType *FnTy = nullptr;
189 Value *Callee = nullptr;
190};
191
192/// Class to represent struct types. There are two different kinds of struct
193/// types: Literal structs and Identified structs.
194///
195/// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
196/// always have a body when created. You can get one of these by using one of
197/// the StructType::get() forms.
198///
199/// Identified structs (e.g. %foo or %42) may optionally have a name and are not
200/// uniqued. The names for identified structs are managed at the LLVMContext
201/// level, so there can only be a single identified struct with a given name in
202/// a particular LLVMContext. Identified structs may also optionally be opaque
203/// (have no body specified). You get one of these by using one of the
204/// StructType::create() forms.
205///
206/// Independent of what kind of struct you have, the body of a struct type are
207/// laid out in memory consecutively with the elements directly one after the
208/// other (if the struct is packed) or (if not packed) with padding between the
209/// elements as defined by DataLayout (which is required to match what the code
210/// generator for a target expects).
211///
212class StructType : public Type {
213 StructType(LLVMContext &C) : Type(C, StructTyID) {}
214
215 enum {
216 /// This is the contents of the SubClassData field.
217 SCDB_HasBody = 1,
218 SCDB_Packed = 2,
219 SCDB_IsLiteral = 4,
220 SCDB_IsSized = 8
221 };
222
223 /// For a named struct that actually has a name, this is a pointer to the
224 /// symbol table entry (maintained by LLVMContext) for the struct.
225 /// This is null if the type is an literal struct or if it is a identified
226 /// type that has an empty name.
227 void *SymbolTableEntry = nullptr;
228
229public:
230 StructType(const StructType &) = delete;
231 StructType &operator=(const StructType &) = delete;
232
233 /// This creates an identified struct.
234 static StructType *create(LLVMContext &Context, StringRef Name);
235 static StructType *create(LLVMContext &Context);
236
237 static StructType *create(ArrayRef<Type *> Elements, StringRef Name,
238 bool isPacked = false);
239 static StructType *create(ArrayRef<Type *> Elements);
240 static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements,
241 StringRef Name, bool isPacked = false);
242 static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements);
243 template <class... Tys>
244 static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *>
245 create(StringRef Name, Type *elt1, Tys *... elts) {
246 assert(elt1 && "Cannot create a struct type with no elements with this")(static_cast<void> (0));
247 return create(ArrayRef<Type *>({elt1, elts...}), Name);
248 }
249
250 /// This static method is the primary way to create a literal StructType.
251 static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements,
252 bool isPacked = false);
253
254 /// Create an empty structure type.
255 static StructType *get(LLVMContext &Context, bool isPacked = false);
256
257 /// This static method is a convenience method for creating structure types by
258 /// specifying the elements as arguments. Note that this method always returns
259 /// a non-packed struct, and requires at least one element type.
260 template <class... Tys>
261 static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *>
262 get(Type *elt1, Tys *... elts) {
263 assert(elt1 && "Cannot create a struct type with no elements with this")(static_cast<void> (0));
264 LLVMContext &Ctx = elt1->getContext();
265 return StructType::get(Ctx, ArrayRef<Type *>({elt1, elts...}));
266 }
267
268 /// Return the type with the specified name, or null if there is none by that
269 /// name.
270 static StructType *getTypeByName(LLVMContext &C, StringRef Name);
271
272 bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
273
274 /// Return true if this type is uniqued by structural equivalence, false if it
275 /// is a struct definition.
276 bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }
277
278 /// Return true if this is a type with an identity that has no body specified
279 /// yet. These prints as 'opaque' in .ll files.
280 bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
281
282 /// isSized - Return true if this is a sized type.
283 bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
284
285 /// Returns true if this struct contains a scalable vector.
286 bool containsScalableVectorType() const;
287
288 /// Return true if this is a named struct that has a non-empty name.
289 bool hasName() const { return SymbolTableEntry != nullptr; }
290
291 /// Return the name for this struct type if it has an identity.
292 /// This may return an empty string for an unnamed struct type. Do not call
293 /// this on an literal type.
294 StringRef getName() const;
295
296 /// Change the name of this type to the specified name, or to a name with a
297 /// suffix if there is a collision. Do not call this on an literal type.
298 void setName(StringRef Name);
299
300 /// Specify a body for an opaque identified type.
301 void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
302
303 template <typename... Tys>
304 std::enable_if_t<are_base_of<Type, Tys...>::value, void>
305 setBody(Type *elt1, Tys *... elts) {
306 assert(elt1 && "Cannot create a struct type with no elements with this")(static_cast<void> (0));
307 setBody(ArrayRef<Type *>({elt1, elts...}));
308 }
309
310 /// Return true if the specified type is valid as a element type.
311 static bool isValidElementType(Type *ElemTy);
312
313 // Iterator access to the elements.
314 using element_iterator = Type::subtype_iterator;
315
316 element_iterator element_begin() const { return ContainedTys; }
317 element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
318 ArrayRef<Type *> elements() const {
319 return makeArrayRef(element_begin(), element_end());
320 }
321
322 /// Return true if this is layout identical to the specified struct.
323 bool isLayoutIdentical(StructType *Other) const;
324
325 /// Random access to the elements
326 unsigned getNumElements() const { return NumContainedTys; }
327 Type *getElementType(unsigned N) const {
328 assert(N < NumContainedTys && "Element number out of range!")(static_cast<void> (0));
329 return ContainedTys[N];
330 }
331 /// Given an index value into the type, return the type of the element.
332 Type *getTypeAtIndex(const Value *V) const;
333 Type *getTypeAtIndex(unsigned N) const { return getElementType(N); }
334 bool indexValid(const Value *V) const;
335 bool indexValid(unsigned Idx) const { return Idx < getNumElements(); }
336
337 /// Methods for support type inquiry through isa, cast, and dyn_cast.
338 static bool classof(const Type *T) {
339 return T->getTypeID() == StructTyID;
340 }
341};
342
343StringRef Type::getStructName() const {
344 return cast<StructType>(this)->getName();
345}
346
347unsigned Type::getStructNumElements() const {
348 return cast<StructType>(this)->getNumElements();
349}
350
351Type *Type::getStructElementType(unsigned N) const {
352 return cast<StructType>(this)->getElementType(N);
353}
354
355/// Class to represent array types.
356class ArrayType : public Type {
357 /// The element type of the array.
358 Type *ContainedType;
359 /// Number of elements in the array.
360 uint64_t NumElements;
361
362 ArrayType(Type *ElType, uint64_t NumEl);
363
364public:
365 ArrayType(const ArrayType &) = delete;
366 ArrayType &operator=(const ArrayType &) = delete;
367
368 uint64_t getNumElements() const { return NumElements; }
369 Type *getElementType() const { return ContainedType; }
370
371 /// This static method is the primary way to construct an ArrayType
372 static ArrayType *get(Type *ElementType, uint64_t NumElements);
373
374 /// Return true if the specified type is valid as a element type.
375 static bool isValidElementType(Type *ElemTy);
376
377 /// Methods for support type inquiry through isa, cast, and dyn_cast.
378 static bool classof(const Type *T) {
379 return T->getTypeID() == ArrayTyID;
380 }
381};
382
383uint64_t Type::getArrayNumElements() const {
384 return cast<ArrayType>(this)->getNumElements();
385}
386
387/// Base class of all SIMD vector types
388class VectorType : public Type {
389 /// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the
390 /// minimum number of elements of type Ty contained within the vector, and
391 /// 'vscale x' indicates that the total element count is an integer multiple
392 /// of 'n', where the multiple is either guaranteed to be one, or is
393 /// statically unknown at compile time.
394 ///
395 /// If the multiple is known to be 1, then the extra term is discarded in
396 /// textual IR:
397 ///
398 /// <4 x i32> - a vector containing 4 i32s
399 /// <vscale x 4 x i32> - a vector containing an unknown integer multiple
400 /// of 4 i32s
401
402 /// The element type of the vector.
403 Type *ContainedType;
404
405protected:
406 /// The element quantity of this vector. The meaning of this value depends
407 /// on the type of vector:
408 /// - For FixedVectorType = <ElementQuantity x ty>, there are
409 /// exactly ElementQuantity elements in this vector.
410 /// - For ScalableVectorType = <vscale x ElementQuantity x ty>,
411 /// there are vscale * ElementQuantity elements in this vector, where
412 /// vscale is a runtime-constant integer greater than 0.
413 const unsigned ElementQuantity;
414
415 VectorType(Type *ElType, unsigned EQ, Type::TypeID TID);
416
417public:
418 VectorType(const VectorType &) = delete;
419 VectorType &operator=(const VectorType &) = delete;
420
421 Type *getElementType() const { return ContainedType; }
422
423 /// This static method is the primary way to construct an VectorType.
424 static VectorType *get(Type *ElementType, ElementCount EC);
425
426 static VectorType *get(Type *ElementType, unsigned NumElements,
427 bool Scalable) {
428 return VectorType::get(ElementType,
429 ElementCount::get(NumElements, Scalable));
430 }
431
432 static VectorType *get(Type *ElementType, const VectorType *Other) {
433 return VectorType::get(ElementType, Other->getElementCount());
434 }
435
436 /// This static method gets a VectorType with the same number of elements as
437 /// the input type, and the element type is an integer type of the same width
438 /// as the input element type.
439 static VectorType *getInteger(VectorType *VTy) {
440 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
441 assert(EltBits && "Element size must be of a non-zero size")(static_cast<void> (0));
442 Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
443 return VectorType::get(EltTy, VTy->getElementCount());
444 }
445
446 /// This static method is like getInteger except that the element types are
447 /// twice as wide as the elements in the input type.
448 static VectorType *getExtendedElementVectorType(VectorType *VTy) {
449 assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints.")(static_cast<void> (0));
450 auto *EltTy = cast<IntegerType>(VTy->getElementType());
451 return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount());
452 }
453
454 // This static method gets a VectorType with the same number of elements as
455 // the input type, and the element type is an integer or float type which
456 // is half as wide as the elements in the input type.
457 static VectorType *getTruncatedElementVectorType(VectorType *VTy) {
458 Type *EltTy;
459 if (VTy->getElementType()->isFloatingPointTy()) {
460 switch(VTy->getElementType()->getTypeID()) {
461 case DoubleTyID:
462 EltTy = Type::getFloatTy(VTy->getContext());
463 break;
464 case FloatTyID:
465 EltTy = Type::getHalfTy(VTy->getContext());
466 break;
467 default:
468 llvm_unreachable("Cannot create narrower fp vector element type")__builtin_unreachable();
469 }
470 } else {
471 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
472 assert((EltBits & 1) == 0 &&(static_cast<void> (0))
473 "Cannot truncate vector element with odd bit-width")(static_cast<void> (0));
474 EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
475 }
476 return VectorType::get(EltTy, VTy->getElementCount());
477 }
478
479 // This static method returns a VectorType with a smaller number of elements
480 // of a larger type than the input element type. For example, a <16 x i8>
481 // subdivided twice would return <4 x i32>
482 static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) {
483 for (int i = 0; i < NumSubdivs; ++i) {
484 VTy = VectorType::getDoubleElementsVectorType(VTy);
485 VTy = VectorType::getTruncatedElementVectorType(VTy);
486 }
487 return VTy;
488 }
489
490 /// This static method returns a VectorType with half as many elements as the
491 /// input type and the same element type.
492 static VectorType *getHalfElementsVectorType(VectorType *VTy) {
493 auto EltCnt = VTy->getElementCount();
494 assert(EltCnt.isKnownEven() &&(static_cast<void> (0))
495 "Cannot halve vector with odd number of elements.")(static_cast<void> (0));
496 return VectorType::get(VTy->getElementType(),
497 EltCnt.divideCoefficientBy(2));
498 }
499
500 /// This static method returns a VectorType with twice as many elements as the
501 /// input type and the same element type.
502 static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
503 auto EltCnt = VTy->getElementCount();
504 assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX &&(static_cast<void> (0))
505 "Too many elements in vector")(static_cast<void> (0));
506 return VectorType::get(VTy->getElementType(), EltCnt * 2);
507 }
508
509 /// Return true if the specified type is valid as a element type.
510 static bool isValidElementType(Type *ElemTy);
511
512 /// Return an ElementCount instance to represent the (possibly scalable)
513 /// number of elements in the vector.
514 inline ElementCount getElementCount() const;
515
516 /// Methods for support type inquiry through isa, cast, and dyn_cast.
517 static bool classof(const Type *T) {
518 return T->getTypeID() == FixedVectorTyID ||
519 T->getTypeID() == ScalableVectorTyID;
520 }
521};
522
523/// Class to represent fixed width SIMD vectors
524class FixedVectorType : public VectorType {
525protected:
526 FixedVectorType(Type *ElTy, unsigned NumElts)
527 : VectorType(ElTy, NumElts, FixedVectorTyID) {}
528
529public:
530 static FixedVectorType *get(Type *ElementType, unsigned NumElts);
531
532 static FixedVectorType *get(Type *ElementType, const FixedVectorType *FVTy) {
533 return get(ElementType, FVTy->getNumElements());
534 }
535
536 static FixedVectorType *getInteger(FixedVectorType *VTy) {
537 return cast<FixedVectorType>(VectorType::getInteger(VTy));
538 }
539
540 static FixedVectorType *getExtendedElementVectorType(FixedVectorType *VTy) {
541 return cast<FixedVectorType>(VectorType::getExtendedElementVectorType(VTy));
542 }
543
544 static FixedVectorType *getTruncatedElementVectorType(FixedVectorType *VTy) {
545 return cast<FixedVectorType>(
546 VectorType::getTruncatedElementVectorType(VTy));
547 }
548
549 static FixedVectorType *getSubdividedVectorType(FixedVectorType *VTy,
550 int NumSubdivs) {
551 return cast<FixedVectorType>(
552 VectorType::getSubdividedVectorType(VTy, NumSubdivs));
553 }
554
555 static FixedVectorType *getHalfElementsVectorType(FixedVectorType *VTy) {
556 return cast<FixedVectorType>(VectorType::getHalfElementsVectorType(VTy));
557 }
558
559 static FixedVectorType *getDoubleElementsVectorType(FixedVectorType *VTy) {
560 return cast<FixedVectorType>(VectorType::getDoubleElementsVectorType(VTy));
561 }
562
563 static bool classof(const Type *T) {
564 return T->getTypeID() == FixedVectorTyID;
565 }
566
567 unsigned getNumElements() const { return ElementQuantity; }
568};
569
570/// Class to represent scalable SIMD vectors
571class ScalableVectorType : public VectorType {
572protected:
573 ScalableVectorType(Type *ElTy, unsigned MinNumElts)
574 : VectorType(ElTy, MinNumElts, ScalableVectorTyID) {}
575
576public:
577 static ScalableVectorType *get(Type *ElementType, unsigned MinNumElts);
578
579 static ScalableVectorType *get(Type *ElementType,
580 const ScalableVectorType *SVTy) {
581 return get(ElementType, SVTy->getMinNumElements());
582 }
583
584 static ScalableVectorType *getInteger(ScalableVectorType *VTy) {
585 return cast<ScalableVectorType>(VectorType::getInteger(VTy));
586 }
587
588 static ScalableVectorType *
589 getExtendedElementVectorType(ScalableVectorType *VTy) {
590 return cast<ScalableVectorType>(
591 VectorType::getExtendedElementVectorType(VTy));
592 }
593
594 static ScalableVectorType *
595 getTruncatedElementVectorType(ScalableVectorType *VTy) {
596 return cast<ScalableVectorType>(
597 VectorType::getTruncatedElementVectorType(VTy));
598 }
599
600 static ScalableVectorType *getSubdividedVectorType(ScalableVectorType *VTy,
601 int NumSubdivs) {
602 return cast<ScalableVectorType>(
603 VectorType::getSubdividedVectorType(VTy, NumSubdivs));
604 }
605
606 static ScalableVectorType *
607 getHalfElementsVectorType(ScalableVectorType *VTy) {
608 return cast<ScalableVectorType>(VectorType::getHalfElementsVectorType(VTy));
609 }
610
611 static ScalableVectorType *
612 getDoubleElementsVectorType(ScalableVectorType *VTy) {
613 return cast<ScalableVectorType>(
614 VectorType::getDoubleElementsVectorType(VTy));
615 }
616
617 /// Get the minimum number of elements in this vector. The actual number of
618 /// elements in the vector is an integer multiple of this value.
619 uint64_t getMinNumElements() const { return ElementQuantity; }
620
621 static bool classof(const Type *T) {
622 return T->getTypeID() == ScalableVectorTyID;
623 }
624};
625
626inline ElementCount VectorType::getElementCount() const {
627 return ElementCount::get(ElementQuantity, isa<ScalableVectorType>(this));
12
Assuming the object is not a 'ScalableVectorType'
13
Calling 'LinearPolySize::get'
16
Returning from 'LinearPolySize::get'
628}
629
630/// Class to represent pointers.
631class PointerType : public Type {
632 explicit PointerType(Type *ElType, unsigned AddrSpace);
633 explicit PointerType(LLVMContext &C, unsigned AddrSpace);
634
635 Type *PointeeTy;
636
637public:
638 PointerType(const PointerType &) = delete;
639 PointerType &operator=(const PointerType &) = delete;
640
641 /// This constructs a pointer to an object of the specified type in a numbered
642 /// address space.
643 static PointerType *get(Type *ElementType, unsigned AddressSpace);
644 /// This constructs an opaque pointer to an object in a numbered address
645 /// space.
646 static PointerType *get(LLVMContext &C, unsigned AddressSpace);
647
648 /// This constructs a pointer to an object of the specified type in the
649 /// default address space (address space zero).
650 static PointerType *getUnqual(Type *ElementType) {
651 return PointerType::get(ElementType, 0);
652 }
653
654 /// This constructs an opaque pointer to an object in the
655 /// default address space (address space zero).
656 static PointerType *getUnqual(LLVMContext &C) {
657 return PointerType::get(C, 0);
658 }
659
660 /// This constructs a pointer type with the same pointee type as input
661 /// PointerType (or opaque pointer is the input PointerType is opaque) and the
662 /// given address space. This is only useful during the opaque pointer
663 /// transition.
664 /// TODO: remove after opaque pointer transition is complete.
665 static PointerType *getWithSamePointeeType(PointerType *PT,
666 unsigned AddressSpace) {
667 if (PT->isOpaque())
668 return get(PT->getContext(), AddressSpace);
669 return get(PT->getElementType(), AddressSpace);
670 }
671
672 Type *getElementType() const {
673 assert(!isOpaque() && "Attempting to get element type of opaque pointer")(static_cast<void> (0));
674 return PointeeTy;
675 }
676
677 bool isOpaque() const { return !PointeeTy; }
678
679 /// Return true if the specified type is valid as a element type.
680 static bool isValidElementType(Type *ElemTy);
681
682 /// Return true if we can load or store from a pointer to this type.
683 static bool isLoadableOrStorableType(Type *ElemTy);
684
685 /// Return the address space of the Pointer type.
686 inline unsigned getAddressSpace() const { return getSubclassData(); }
687
688 /// Return true if either this is an opaque pointer type or if this pointee
689 /// type matches Ty. Primarily used for checking if an instruction's pointer
690 /// operands are valid types. Will be useless after non-opaque pointers are
691 /// removed.
692 bool isOpaqueOrPointeeTypeMatches(Type *Ty) {
693 return isOpaque() || PointeeTy == Ty;
694 }
695
696 /// Return true if both pointer types have the same element type. Two opaque
697 /// pointers are considered to have the same element type, while an opaque
698 /// and a non-opaque pointer have different element types.
699 /// TODO: Remove after opaque pointer transition is complete.
700 bool hasSameElementTypeAs(PointerType *Other) {
701 return PointeeTy == Other->PointeeTy;
702 }
703
704 /// Implement support type inquiry through isa, cast, and dyn_cast.
705 static bool classof(const Type *T) {
706 return T->getTypeID() == PointerTyID;
707 }
708};
709
710Type *Type::getExtendedType() const {
711 assert((static_cast<void> (0))
712 isIntOrIntVectorTy() &&(static_cast<void> (0))
713 "Original type expected to be a vector of integers or a scalar integer.")(static_cast<void> (0));
714 if (auto *VTy = dyn_cast<VectorType>(this))
715 return VectorType::getExtendedElementVectorType(
716 const_cast<VectorType *>(VTy));
717 return cast<IntegerType>(this)->getExtendedType();
718}
719
720Type *Type::getWithNewType(Type *EltTy) const {
721 if (auto *VTy = dyn_cast<VectorType>(this))
722 return VectorType::get(EltTy, VTy->getElementCount());
723 return EltTy;
724}
725
726Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const {
727 assert((static_cast<void> (0))
728 isIntOrIntVectorTy() &&(static_cast<void> (0))
729 "Original type expected to be a vector of integers or a scalar integer.")(static_cast<void> (0));
730 return getWithNewType(getIntNTy(getContext(), NewBitWidth));
731}
732
733unsigned Type::getPointerAddressSpace() const {
734 return cast<PointerType>(getScalarType())->getAddressSpace();
735}
736
737} // end namespace llvm
738
739#endif // LLVM_IR_DERIVEDTYPES_H

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/Support/TypeSize.h

1//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a struct that can be used to query the size of IR types
10// which may be scalable vectors. It provides convenience operators so that
11// it can be used in much the same way as a single scalar value.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_SUPPORT_TYPESIZE_H
16#define LLVM_SUPPORT_TYPESIZE_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/WithColor.h"
21
22#include <algorithm>
23#include <array>
24#include <cassert>
25#include <cstdint>
26#include <type_traits>
27
28namespace llvm {
29
30/// Reports a diagnostic message to indicate an invalid size request has been
31/// done on a scalable vector. This function may not return.
32void reportInvalidSizeRequest(const char *Msg);
33
34template <typename LeafTy> struct LinearPolyBaseTypeTraits {};
35
36//===----------------------------------------------------------------------===//
37// LinearPolyBase - a base class for linear polynomials with multiple
38// dimensions. This can e.g. be used to describe offsets that are have both a
39// fixed and scalable component.
40//===----------------------------------------------------------------------===//
41
42/// LinearPolyBase describes a linear polynomial:
43/// c0 * scale0 + c1 * scale1 + ... + cK * scaleK
44/// where the scale is implicit, so only the coefficients are encoded.
45template <typename LeafTy>
46class LinearPolyBase {
47public:
48 using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
49 static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
50 static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
51 "Dimensions out of range");
52
53private:
54 std::array<ScalarTy, Dimensions> Coefficients;
55
56protected:
57 LinearPolyBase(ArrayRef<ScalarTy> Values) {
58 std::copy(Values.begin(), Values.end(), Coefficients.begin());
59 }
60
61public:
62 friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
63 for (unsigned I=0; I<Dimensions; ++I)
64 LHS.Coefficients[I] += RHS.Coefficients[I];
65 return LHS;
66 }
67
68 friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
69 for (unsigned I=0; I<Dimensions; ++I)
70 LHS.Coefficients[I] -= RHS.Coefficients[I];
71 return LHS;
72 }
73
74 friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
75 for (auto &C : LHS.Coefficients)
76 C *= RHS;
77 return LHS;
78 }
79
80 friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
81 LeafTy Copy = LHS;
82 return Copy += RHS;
83 }
84
85 friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
86 LeafTy Copy = LHS;
87 return Copy -= RHS;
88 }
89
90 friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
91 LeafTy Copy = LHS;
92 return Copy *= RHS;
93 }
94
95 template <typename U = ScalarTy>
96 friend typename std::enable_if_t<std::is_signed<U>::value, LeafTy>
97 operator-(const LeafTy &LHS) {
98 LeafTy Copy = LHS;
99 return Copy *= -1;
100 }
101
102 bool operator==(const LinearPolyBase &RHS) const {
103 return std::equal(Coefficients.begin(), Coefficients.end(),
104 RHS.Coefficients.begin());
105 }
106
107 bool operator!=(const LinearPolyBase &RHS) const {
108 return !(*this == RHS);
109 }
110
111 bool isZero() const {
112 return all_of(Coefficients, [](const ScalarTy &C) { return C == 0; });
113 }
114 bool isNonZero() const { return !isZero(); }
115 explicit operator bool() const { return isNonZero(); }
116
117 ScalarTy getValue(unsigned Dim) const { return Coefficients[Dim]; }
118};
119
120//===----------------------------------------------------------------------===//
121// StackOffset - Represent an offset with named fixed and scalable components.
122//===----------------------------------------------------------------------===//
123
124class StackOffset;
125template <> struct LinearPolyBaseTypeTraits<StackOffset> {
126 using ScalarTy = int64_t;
127 static constexpr unsigned Dimensions = 2;
128};
129
130/// StackOffset is a class to represent an offset with 2 dimensions,
131/// named fixed and scalable, respectively. This class allows a value for both
132/// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed
133/// to represent stack offsets.
134class StackOffset : public LinearPolyBase<StackOffset> {
135protected:
136 StackOffset(ScalarTy Fixed, ScalarTy Scalable)
137 : LinearPolyBase<StackOffset>({Fixed, Scalable}) {}
138
139public:
140 StackOffset() : StackOffset({0, 0}) {}
141 StackOffset(const LinearPolyBase<StackOffset> &Other)
142 : LinearPolyBase<StackOffset>(Other) {}
143 static StackOffset getFixed(ScalarTy Fixed) { return {Fixed, 0}; }
144 static StackOffset getScalable(ScalarTy Scalable) { return {0, Scalable}; }
145 static StackOffset get(ScalarTy Fixed, ScalarTy Scalable) {
146 return {Fixed, Scalable};
147 }
148
149 ScalarTy getFixed() const { return this->getValue(0); }
150 ScalarTy getScalable() const { return this->getValue(1); }
151};
152
153//===----------------------------------------------------------------------===//
154// UnivariateLinearPolyBase - a base class for linear polynomials with multiple
155// dimensions, but where only one dimension can be set at any time.
156// This can e.g. be used to describe sizes that are either fixed or scalable.
157//===----------------------------------------------------------------------===//
158
159/// UnivariateLinearPolyBase is a base class for ElementCount and TypeSize.
160/// Like LinearPolyBase it tries to represent a linear polynomial
161/// where only one dimension can be set at any time, e.g.
162/// 0 * scale0 + 0 * scale1 + ... + cJ * scaleJ + ... + 0 * scaleK
163/// The dimension that is set is the univariate dimension.
164template <typename LeafTy>
165class UnivariateLinearPolyBase {
166public:
167 using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy;
168 static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions;
169 static_assert(Dimensions != std::numeric_limits<unsigned>::max(),
170 "Dimensions out of range");
171
172protected:
173 ScalarTy Value; // The value at the univeriate dimension.
174 unsigned UnivariateDim; // The univeriate dimension.
175
176 UnivariateLinearPolyBase(ScalarTy Val, unsigned UnivariateDim)
177 : Value(Val), UnivariateDim(UnivariateDim) {
178 assert(UnivariateDim < Dimensions && "Dimension out of range")(static_cast<void> (0));
179 }
180
181 friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) {
182 assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")(static_cast<void> (0));
183 LHS.Value += RHS.Value;
184 return LHS;
185 }
186
187 friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) {
188 assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")(static_cast<void> (0));
189 LHS.Value -= RHS.Value;
190 return LHS;
191 }
192
193 friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) {
194 LHS.Value *= RHS;
195 return LHS;
196 }
197
198 friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) {
199 LeafTy Copy = LHS;
200 return Copy += RHS;
201 }
202
203 friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) {
204 LeafTy Copy = LHS;
205 return Copy -= RHS;
206 }
207
208 friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) {
209 LeafTy Copy = LHS;
210 return Copy *= RHS;
211 }
212
213 template <typename U = ScalarTy>
214 friend typename std::enable_if<std::is_signed<U>::value, LeafTy>::type
215 operator-(const LeafTy &LHS) {
216 LeafTy Copy = LHS;
217 return Copy *= -1;
218 }
219
220public:
221 bool operator==(const UnivariateLinearPolyBase &RHS) const {
222 return Value == RHS.Value && UnivariateDim == RHS.UnivariateDim;
20
Assuming 'Value' is not equal to 'RHS.Value'
21
Returning zero, which participates in a condition later
223 }
224
225 bool operator!=(const UnivariateLinearPolyBase &RHS) const {
226 return !(*this == RHS);
227 }
228
229 bool isZero() const { return !Value; }
230 bool isNonZero() const { return !isZero(); }
231 explicit operator bool() const { return isNonZero(); }
232 ScalarTy getValue() const { return Value; }
38
Returning zero
233 ScalarTy getValue(unsigned Dim) const {
234 return Dim == UnivariateDim ? Value : 0;
235 }
236
237 /// Add \p RHS to the value at the univariate dimension.
238 LeafTy getWithIncrement(ScalarTy RHS) const {
239 return static_cast<LeafTy>(
240 UnivariateLinearPolyBase(Value + RHS, UnivariateDim));
241 }
242
243 /// Subtract \p RHS from the value at the univariate dimension.
244 LeafTy getWithDecrement(ScalarTy RHS) const {
245 return static_cast<LeafTy>(
246 UnivariateLinearPolyBase(Value - RHS, UnivariateDim));
247 }
248};
249
250
251//===----------------------------------------------------------------------===//
252// LinearPolySize - base class for fixed- or scalable sizes.
253// ^ ^
254// | |
255// | +----- ElementCount - Leaf class to represent an element count
256// | (vscale x unsigned)
257// |
258// +-------- TypeSize - Leaf class to represent a type size
259// (vscale x uint64_t)
260//===----------------------------------------------------------------------===//
261
262/// LinearPolySize is a base class to represent sizes. It is either
263/// fixed-sized or it is scalable-sized, but it cannot be both.
264template <typename LeafTy>
265class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> {
266 // Make the parent class a friend, so that it can access the protected
267 // conversion/copy-constructor for UnivariatePolyBase<LeafTy> ->
268 // LinearPolySize<LeafTy>.
269 friend class UnivariateLinearPolyBase<LeafTy>;
270
271public:
272 using ScalarTy = typename UnivariateLinearPolyBase<LeafTy>::ScalarTy;
273 enum Dims : unsigned { FixedDim = 0, ScalableDim = 1 };
274
275protected:
276 LinearPolySize(ScalarTy MinVal, Dims D)
277 : UnivariateLinearPolyBase<LeafTy>(MinVal, D) {}
278
279 LinearPolySize(const UnivariateLinearPolyBase<LeafTy> &V)
280 : UnivariateLinearPolyBase<LeafTy>(V) {}
281
282public:
283
284 static LeafTy getFixed(ScalarTy MinVal) {
285 return static_cast<LeafTy>(LinearPolySize(MinVal, FixedDim));
286 }
287 static LeafTy getScalable(ScalarTy MinVal) {
288 return static_cast<LeafTy>(LinearPolySize(MinVal, ScalableDim));
289 }
290 static LeafTy get(ScalarTy MinVal, bool Scalable) {
291 return static_cast<LeafTy>(
15
Value assigned to 'NumSrcElts.Value'
292 LinearPolySize(MinVal, Scalable
13.1
'Scalable' is false
13.1
'Scalable' is false
13.1
'Scalable' is false
13.1
'Scalable' is false
? ScalableDim : FixedDim))
;
14
'?' condition is false
293 }
294 static LeafTy getNull() { return get(0, false); }
295
296 /// Returns the minimum value this size can represent.
297 ScalarTy getKnownMinValue() const { return this->getValue(); }
37
Calling 'UnivariateLinearPolyBase::getValue'
39
Returning from 'UnivariateLinearPolyBase::getValue'
40
Returning zero
298 /// Returns whether the size is scaled by a runtime quantity (vscale).
299 bool isScalable() const { return this->UnivariateDim == ScalableDim; }
300 /// A return value of true indicates we know at compile time that the number
301 /// of elements (vscale * Min) is definitely even. However, returning false
302 /// does not guarantee that the total number of elements is odd.
303 bool isKnownEven() const { return (getKnownMinValue() & 0x1) == 0; }
304 /// This function tells the caller whether the element count is known at
305 /// compile time to be a multiple of the scalar value RHS.
306 bool isKnownMultipleOf(ScalarTy RHS) const {
307 return getKnownMinValue() % RHS == 0;
308 }
309
310 // Return the minimum value with the assumption that the count is exact.
311 // Use in places where a scalable count doesn't make sense (e.g. non-vector
312 // types, or vectors in backends which don't support scalable vectors).
313 ScalarTy getFixedValue() const {
314 assert(!isScalable() &&(static_cast<void> (0))
315 "Request for a fixed element count on a scalable object")(static_cast<void> (0));
316 return getKnownMinValue();
317 }
318
319 // For some cases, size ordering between scalable and fixed size types cannot
320 // be determined at compile time, so such comparisons aren't allowed.
321 //
322 // e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime
323 // vscale >= 5, equal sized with a vscale of 4, and smaller with
324 // a vscale <= 3.
325 //
326 // All the functions below make use of the fact vscale is always >= 1, which
327 // means that <vscale x 4 x i32> is guaranteed to be >= <4 x i32>, etc.
328
329 static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
330 if (!LHS.isScalable() || RHS.isScalable())
331 return LHS.getKnownMinValue() < RHS.getKnownMinValue();
332 return false;
333 }
334
335 static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS) {
336 if (LHS.isScalable() || !RHS.isScalable())
337 return LHS.getKnownMinValue() > RHS.getKnownMinValue();
338 return false;
339 }
340
341 static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
342 if (!LHS.isScalable() || RHS.isScalable())
343 return LHS.getKnownMinValue() <= RHS.getKnownMinValue();
344 return false;
345 }
346
347 static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS) {
348 if (LHS.isScalable() || !RHS.isScalable())
349 return LHS.getKnownMinValue() >= RHS.getKnownMinValue();
350 return false;
351 }
352
353 /// We do not provide the '/' operator here because division for polynomial
354 /// types does not work in the same way as for normal integer types. We can
355 /// only divide the minimum value (or coefficient) by RHS, which is not the
356 /// same as
357 /// (Min * Vscale) / RHS
358 /// The caller is recommended to use this function in combination with
359 /// isKnownMultipleOf(RHS), which lets the caller know if it's possible to
360 /// perform a lossless divide by RHS.
361 LeafTy divideCoefficientBy(ScalarTy RHS) const {
362 return static_cast<LeafTy>(
363 LinearPolySize::get(getKnownMinValue() / RHS, isScalable()));
364 }
365
366 LeafTy coefficientNextPowerOf2() const {
367 return static_cast<LeafTy>(LinearPolySize::get(
368 static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())),
369 isScalable()));
370 }
371
372 /// Printing function.
373 void print(raw_ostream &OS) const {
374 if (isScalable())
375 OS << "vscale x ";
376 OS << getKnownMinValue();
377 }
378};
379
380class ElementCount;
381template <> struct LinearPolyBaseTypeTraits<ElementCount> {
382 using ScalarTy = unsigned;
383 static constexpr unsigned Dimensions = 2;
384};
385
386class ElementCount : public LinearPolySize<ElementCount> {
387public:
388 ElementCount() : LinearPolySize(LinearPolySize::getNull()) {}
389
390 ElementCount(const LinearPolySize<ElementCount> &V) : LinearPolySize(V) {}
391
392 /// Counting predicates.
393 ///
394 ///@{ Number of elements..
395 /// Exactly one element.
396 bool isScalar() const { return !isScalable() && getKnownMinValue() == 1; }
397 /// One or more elements.
398 bool isVector() const {
399 return (isScalable() && getKnownMinValue() != 0) || getKnownMinValue() > 1;
400 }
401 ///@}
402};
403
404// This class is used to represent the size of types. If the type is of fixed
405class TypeSize;
406template <> struct LinearPolyBaseTypeTraits<TypeSize> {
407 using ScalarTy = uint64_t;
408 static constexpr unsigned Dimensions = 2;
409};
410
411// TODO: Most functionality in this class will gradually be phased out
412// so it will resemble LinearPolySize as much as possible.
413//
414// TypeSize is used to represent the size of types. If the type is of fixed
415// size, it will represent the exact size. If the type is a scalable vector,
416// it will represent the known minimum size.
417class TypeSize : public LinearPolySize<TypeSize> {
418public:
419 TypeSize(const LinearPolySize<TypeSize> &V) : LinearPolySize(V) {}
420 TypeSize(ScalarTy MinVal, bool IsScalable)
421 : LinearPolySize(LinearPolySize::get(MinVal, IsScalable)) {}
422
423 static TypeSize Fixed(ScalarTy MinVal) { return TypeSize(MinVal, false); }
424 static TypeSize Scalable(ScalarTy MinVal) { return TypeSize(MinVal, true); }
425
426 ScalarTy getFixedSize() const { return getFixedValue(); }
427 ScalarTy getKnownMinSize() const { return getKnownMinValue(); }
428
429 // All code for this class below this point is needed because of the
430 // temporary implicit conversion to uint64_t. The operator overloads are
431 // needed because otherwise the conversion of the parent class
432 // UnivariateLinearPolyBase -> TypeSize is ambiguous.
433 // TODO: Remove the implicit conversion.
434
435 // Casts to a uint64_t if this is a fixed-width size.
436 //
437 // This interface is deprecated and will be removed in a future version
438 // of LLVM in favour of upgrading uses that rely on this implicit conversion
439 // to uint64_t. Calls to functions that return a TypeSize should use the
440 // proper interfaces to TypeSize.
441 // In practice this is mostly calls to MVT/EVT::getSizeInBits().
442 //
443 // To determine how to upgrade the code:
444 //
445 // if (<algorithm works for both scalable and fixed-width vectors>)
446 // use getKnownMinValue()
447 // else if (<algorithm works only for fixed-width vectors>) {
448 // if <algorithm can be adapted for both scalable and fixed-width vectors>
449 // update the algorithm and use getKnownMinValue()
450 // else
451 // bail out early for scalable vectors and use getFixedValue()
452 // }
453 operator ScalarTy() const;
454
455 // Additional operators needed to avoid ambiguous parses
456 // because of the implicit conversion hack.
457 friend TypeSize operator*(const TypeSize &LHS, const int RHS) {
458 return LHS * (ScalarTy)RHS;
459 }
460 friend TypeSize operator*(const TypeSize &LHS, const unsigned RHS) {
461 return LHS * (ScalarTy)RHS;
462 }
463 friend TypeSize operator*(const TypeSize &LHS, const int64_t RHS) {
464 return LHS * (ScalarTy)RHS;
465 }
466 friend TypeSize operator*(const int LHS, const TypeSize &RHS) {
467 return RHS * LHS;
468 }
469 friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) {
470 return RHS * LHS;
471 }
472 friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) {
473 return RHS * LHS;
474 }
475 friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) {
476 return RHS * LHS;
477 }
478};
479
480//===----------------------------------------------------------------------===//
481// Utilities
482//===----------------------------------------------------------------------===//
483
484/// Returns a TypeSize with a known minimum size that is the next integer
485/// (mod 2**64) that is greater than or equal to \p Value and is a multiple
486/// of \p Align. \p Align must be non-zero.
487///
488/// Similar to the alignTo functions in MathExtras.h
489inline TypeSize alignTo(TypeSize Size, uint64_t Align) {
490 assert(Align != 0u && "Align must be non-zero")(static_cast<void> (0));
491 return {(Size.getKnownMinValue() + Align - 1) / Align * Align,
492 Size.isScalable()};
493}
494
495/// Stream operator function for `LinearPolySize`.
496template <typename LeafTy>
497inline raw_ostream &operator<<(raw_ostream &OS,
498 const LinearPolySize<LeafTy> &PS) {
499 PS.print(OS);
500 return OS;
501}
502
503template <typename T> struct DenseMapInfo;
504template <> struct DenseMapInfo<ElementCount> {
505 static inline ElementCount getEmptyKey() {
506 return ElementCount::getScalable(~0U);
507 }
508 static inline ElementCount getTombstoneKey() {
509 return ElementCount::getFixed(~0U - 1);
510 }
511 static unsigned getHashValue(const ElementCount &EltCnt) {
512 unsigned HashVal = EltCnt.getKnownMinValue() * 37U;
513 if (EltCnt.isScalable())
514 return (HashVal - 1U);
515
516 return HashVal;
517 }
518
519 static bool isEqual(const ElementCount &LHS, const ElementCount &RHS) {
520 return LHS == RHS;
521 }
522};
523
524} // end namespace llvm
525
526#endif // LLVM_SUPPORT_TYPESIZE_H

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45
46namespace llvm {
47namespace PatternMatch {
48
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50 return const_cast<Pattern &>(P).match(V);
27
Calling 'ThreeOps_match::match'
32
Returning from 'ThreeOps_match::match'
33
Returning the value 1, which participates in a condition later
51}
52
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54 return const_cast<Pattern &>(P).match(Mask);
55}
56
57template <typename SubPattern_t> struct OneUse_match {
58 SubPattern_t SubPattern;
59
60 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61
62 template <typename OpTy> bool match(OpTy *V) {
63 return V->hasOneUse() && SubPattern.match(V);
64 }
65};
66
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68 return SubPattern;
69}
70
71template <typename Class> struct class_match {
72 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80 return class_match<UnaryOperator>();
81}
82
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85 return class_match<BinaryOperator>();
86}
87
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90
91struct undef_match {
92 static bool check(const Value *V) {
93 if (isa<UndefValue>(V))
94 return true;
95
96 const auto *CA = dyn_cast<ConstantAggregate>(V);
97 if (!CA)
98 return false;
99
100 SmallPtrSet<const ConstantAggregate *, 8> Seen;
101 SmallVector<const ConstantAggregate *, 8> Worklist;
102
103 // Either UndefValue, PoisonValue, or an aggregate that only contains
104 // these is accepted by matcher.
105 // CheckValue returns false if CA cannot satisfy this constraint.
106 auto CheckValue = [&](const ConstantAggregate *CA) {
107 for (const Value *Op : CA->operand_values()) {
108 if (isa<UndefValue>(Op))
109 continue;
110
111 const auto *CA = dyn_cast<ConstantAggregate>(Op);
112 if (!CA)
113 return false;
114 if (Seen.insert(CA).second)
115 Worklist.emplace_back(CA);
116 }
117
118 return true;
119 };
120
121 if (!CheckValue(CA))
122 return false;
123
124 while (!Worklist.empty()) {
125 if (!CheckValue(Worklist.pop_back_val()))
126 return false;
127 }
128 return true;
129 }
130 template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143
144/// Match an arbitrary ConstantInt and ignore it.
145inline class_match<ConstantInt> m_ConstantInt() {
146 return class_match<ConstantInt>();
147}
148
149/// Match an arbitrary ConstantFP and ignore it.
150inline class_match<ConstantFP> m_ConstantFP() {
151 return class_match<ConstantFP>();
152}
153
154/// Match an arbitrary ConstantExpr and ignore it.
155inline class_match<ConstantExpr> m_ConstantExpr() {
156 return class_match<ConstantExpr>();
157}
158
159/// Match an arbitrary basic block value and ignore it.
160inline class_match<BasicBlock> m_BasicBlock() {
161 return class_match<BasicBlock>();
162}
163
164/// Inverting matcher
165template <typename Ty> struct match_unless {
166 Ty M;
167
168 match_unless(const Ty &Matcher) : M(Matcher) {}
169
170 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
171};
172
173/// Match if the inner matcher does *NOT* match.
174template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
175 return match_unless<Ty>(M);
176}
177
178/// Matching combinators
179template <typename LTy, typename RTy> struct match_combine_or {
180 LTy L;
181 RTy R;
182
183 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
184
185 template <typename ITy> bool match(ITy *V) {
186 if (L.match(V))
187 return true;
188 if (R.match(V))
189 return true;
190 return false;
191 }
192};
193
194template <typename LTy, typename RTy> struct match_combine_and {
195 LTy L;
196 RTy R;
197
198 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
199
200 template <typename ITy> bool match(ITy *V) {
201 if (L.match(V))
202 if (R.match(V))
203 return true;
204 return false;
205 }
206};
207
208/// Combine two pattern matchers matching L || R
209template <typename LTy, typename RTy>
210inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
211 return match_combine_or<LTy, RTy>(L, R);
212}
213
214/// Combine two pattern matchers matching L && R
215template <typename LTy, typename RTy>
216inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
217 return match_combine_and<LTy, RTy>(L, R);
218}
219
220struct apint_match {
221 const APInt *&Res;
222 bool AllowUndef;
223
224 apint_match(const APInt *&Res, bool AllowUndef)
225 : Res(Res), AllowUndef(AllowUndef) {}
226
227 template <typename ITy> bool match(ITy *V) {
228 if (auto *CI = dyn_cast<ConstantInt>(V)) {
229 Res = &CI->getValue();
230 return true;
231 }
232 if (V->getType()->isVectorTy())
233 if (const auto *C = dyn_cast<Constant>(V))
234 if (auto *CI = dyn_cast_or_null<ConstantInt>(
235 C->getSplatValue(AllowUndef))) {
236 Res = &CI->getValue();
237 return true;
238 }
239 return false;
240 }
241};
242// Either constexpr if or renaming ConstantFP::getValueAPF to
243// ConstantFP::getValue is needed to do it via single template
244// function for both apint/apfloat.
245struct apfloat_match {
246 const APFloat *&Res;
247 bool AllowUndef;
248
249 apfloat_match(const APFloat *&Res, bool AllowUndef)
250 : Res(Res), AllowUndef(AllowUndef) {}
251
252 template <typename ITy> bool match(ITy *V) {
253 if (auto *CI = dyn_cast<ConstantFP>(V)) {
254 Res = &CI->getValueAPF();
255 return true;
256 }
257 if (V->getType()->isVectorTy())
258 if (const auto *C = dyn_cast<Constant>(V))
259 if (auto *CI = dyn_cast_or_null<ConstantFP>(
260 C->getSplatValue(AllowUndef))) {
261 Res = &CI->getValueAPF();
262 return true;
263 }
264 return false;
265 }
266};
267
268/// Match a ConstantInt or splatted ConstantVector, binding the
269/// specified pointer to the contained APInt.
270inline apint_match m_APInt(const APInt *&Res) {
271 // Forbid undefs by default to maintain previous behavior.
272 return apint_match(Res, /* AllowUndef */ false);
273}
274
275/// Match APInt while allowing undefs in splat vector constants.
276inline apint_match m_APIntAllowUndef(const APInt *&Res) {
277 return apint_match(Res, /* AllowUndef */ true);
278}
279
280/// Match APInt while forbidding undefs in splat vector constants.
281inline apint_match m_APIntForbidUndef(const APInt *&Res) {
282 return apint_match(Res, /* AllowUndef */ false);
283}
284
285/// Match a ConstantFP or splatted ConstantVector, binding the
286/// specified pointer to the contained APFloat.
287inline apfloat_match m_APFloat(const APFloat *&Res) {
288 // Forbid undefs by default to maintain previous behavior.
289 return apfloat_match(Res, /* AllowUndef */ false);
290}
291
292/// Match APFloat while allowing undefs in splat vector constants.
293inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
294 return apfloat_match(Res, /* AllowUndef */ true);
295}
296
297/// Match APFloat while forbidding undefs in splat vector constants.
298inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
299 return apfloat_match(Res, /* AllowUndef */ false);
300}
301
302template <int64_t Val> struct constantint_match {
303 template <typename ITy> bool match(ITy *V) {
304 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
305 const APInt &CIV = CI->getValue();
306 if (Val >= 0)
307 return CIV == static_cast<uint64_t>(Val);
308 // If Val is negative, and CI is shorter than it, truncate to the right
309 // number of bits. If it is larger, then we have to sign extend. Just
310 // compare their negated values.
311 return -CIV == -Val;
312 }
313 return false;
314 }
315};
316
317/// Match a ConstantInt with a specific value.
318template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
319 return constantint_match<Val>();
320}
321
322/// This helper class is used to match constant scalars, vector splats,
323/// and fixed width vectors that satisfy a specified predicate.
324/// For fixed width vector constants, undefined elements are ignored.
325template <typename Predicate, typename ConstantVal>
326struct cstval_pred_ty : public Predicate {
327 template <typename ITy> bool match(ITy *V) {
328 if (const auto *CV = dyn_cast<ConstantVal>(V))
329 return this->isValue(CV->getValue());
330 if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
331 if (const auto *C = dyn_cast<Constant>(V)) {
332 if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
333 return this->isValue(CV->getValue());
334
335 // Number of elements of a scalable vector unknown at compile time
336 auto *FVTy = dyn_cast<FixedVectorType>(VTy);
337 if (!FVTy)
338 return false;
339
340 // Non-splat vector constant: check each element for a match.
341 unsigned NumElts = FVTy->getNumElements();
342 assert(NumElts != 0 && "Constant vector with no elements?")(static_cast<void> (0));
343 bool HasNonUndefElements = false;
344 for (unsigned i = 0; i != NumElts; ++i) {
345 Constant *Elt = C->getAggregateElement(i);
346 if (!Elt)
347 return false;
348 if (isa<UndefValue>(Elt))
349 continue;
350 auto *CV = dyn_cast<ConstantVal>(Elt);
351 if (!CV || !this->isValue(CV->getValue()))
352 return false;
353 HasNonUndefElements = true;
354 }
355 return HasNonUndefElements;
356 }
357 }
358 return false;
359 }
360};
361
362/// specialization of cstval_pred_ty for ConstantInt
363template <typename Predicate>
364using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
365
366/// specialization of cstval_pred_ty for ConstantFP
367template <typename Predicate>
368using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
369
370/// This helper class is used to match scalar and vector constants that
371/// satisfy a specified predicate, and bind them to an APInt.
372template <typename Predicate> struct api_pred_ty : public Predicate {
373 const APInt *&Res;
374
375 api_pred_ty(const APInt *&R) : Res(R) {}
376
377 template <typename ITy> bool match(ITy *V) {
378 if (const auto *CI = dyn_cast<ConstantInt>(V))
379 if (this->isValue(CI->getValue())) {
380 Res = &CI->getValue();
381 return true;
382 }
383 if (V->getType()->isVectorTy())
384 if (const auto *C = dyn_cast<Constant>(V))
385 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
386 if (this->isValue(CI->getValue())) {
387 Res = &CI->getValue();
388 return true;
389 }
390
391 return false;
392 }
393};
394
395/// This helper class is used to match scalar and vector constants that
396/// satisfy a specified predicate, and bind them to an APFloat.
397/// Undefs are allowed in splat vector constants.
398template <typename Predicate> struct apf_pred_ty : public Predicate {
399 const APFloat *&Res;
400
401 apf_pred_ty(const APFloat *&R) : Res(R) {}
402
403 template <typename ITy> bool match(ITy *V) {
404 if (const auto *CI = dyn_cast<ConstantFP>(V))
405 if (this->isValue(CI->getValue())) {
406 Res = &CI->getValue();
407 return true;
408 }
409 if (V->getType()->isVectorTy())
410 if (const auto *C = dyn_cast<Constant>(V))
411 if (auto *CI = dyn_cast_or_null<ConstantFP>(
412 C->getSplatValue(/* AllowUndef */ true)))
413 if (this->isValue(CI->getValue())) {
414 Res = &CI->getValue();
415 return true;
416 }
417
418 return false;
419 }
420};
421
422///////////////////////////////////////////////////////////////////////////////
423//
424// Encapsulate constant value queries for use in templated predicate matchers.
425// This allows checking if constants match using compound predicates and works
426// with vector constants, possibly with relaxed constraints. For example, ignore
427// undef values.
428//
429///////////////////////////////////////////////////////////////////////////////
430
431struct is_any_apint {
432 bool isValue(const APInt &C) { return true; }
433};
434/// Match an integer or vector with any integral constant.
435/// For vectors, this includes constants with undefined elements.
436inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
437 return cst_pred_ty<is_any_apint>();
438}
439
440struct is_all_ones {
441 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
442};
443/// Match an integer or vector with all bits set.
444/// For vectors, this includes constants with undefined elements.
445inline cst_pred_ty<is_all_ones> m_AllOnes() {
446 return cst_pred_ty<is_all_ones>();
447}
448
449struct is_maxsignedvalue {
450 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
451};
452/// Match an integer or vector with values having all bits except for the high
453/// bit set (0x7f...).
454/// For vectors, this includes constants with undefined elements.
455inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
456 return cst_pred_ty<is_maxsignedvalue>();
457}
458inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
459 return V;
460}
461
462struct is_negative {
463 bool isValue(const APInt &C) { return C.isNegative(); }
464};
465/// Match an integer or vector of negative values.
466/// For vectors, this includes constants with undefined elements.
467inline cst_pred_ty<is_negative> m_Negative() {
468 return cst_pred_ty<is_negative>();
469}
470inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
471 return V;
472}
473
474struct is_nonnegative {
475 bool isValue(const APInt &C) { return C.isNonNegative(); }
476};
477/// Match an integer or vector of non-negative values.
478/// For vectors, this includes constants with undefined elements.
479inline cst_pred_ty<is_nonnegative> m_NonNegative() {
480 return cst_pred_ty<is_nonnegative>();
481}
482inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
483 return V;
484}
485
486struct is_strictlypositive {
487 bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
488};
489/// Match an integer or vector of strictly positive values.
490/// For vectors, this includes constants with undefined elements.
491inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
492 return cst_pred_ty<is_strictlypositive>();
493}
494inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
495 return V;
496}
497
498struct is_nonpositive {
499 bool isValue(const APInt &C) { return C.isNonPositive(); }
500};
501/// Match an integer or vector of non-positive values.
502/// For vectors, this includes constants with undefined elements.
503inline cst_pred_ty<is_nonpositive> m_NonPositive() {
504 return cst_pred_ty<is_nonpositive>();
505}
506inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
507
508struct is_one {
509 bool isValue(const APInt &C) { return C.isOneValue(); }
510};
511/// Match an integer 1 or a vector with all elements equal to 1.
512/// For vectors, this includes constants with undefined elements.
513inline cst_pred_ty<is_one> m_One() {
514 return cst_pred_ty<is_one>();
515}
516
517struct is_zero_int {
518 bool isValue(const APInt &C) { return C.isNullValue(); }
519};
520/// Match an integer 0 or a vector with all elements equal to 0.
521/// For vectors, this includes constants with undefined elements.
522inline cst_pred_ty<is_zero_int> m_ZeroInt() {
523 return cst_pred_ty<is_zero_int>();
524}
525
526struct is_zero {
527 template <typename ITy> bool match(ITy *V) {
528 auto *C = dyn_cast<Constant>(V);
529 // FIXME: this should be able to do something for scalable vectors
530 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
531 }
532};
533/// Match any null constant or a vector with all elements equal to 0.
534/// For vectors, this includes constants with undefined elements.
535inline is_zero m_Zero() {
536 return is_zero();
537}
538
539struct is_power2 {
540 bool isValue(const APInt &C) { return C.isPowerOf2(); }
541};
542/// Match an integer or vector power-of-2.
543/// For vectors, this includes constants with undefined elements.
544inline cst_pred_ty<is_power2> m_Power2() {
545 return cst_pred_ty<is_power2>();
546}
547inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
548 return V;
549}
550
551struct is_negated_power2 {
552 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
553};
554/// Match a integer or vector negated power-of-2.
555/// For vectors, this includes constants with undefined elements.
556inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
557 return cst_pred_ty<is_negated_power2>();
558}
559inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
560 return V;
561}
562
563struct is_power2_or_zero {
564 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
565};
566/// Match an integer or vector of 0 or power-of-2 values.
567/// For vectors, this includes constants with undefined elements.
568inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
569 return cst_pred_ty<is_power2_or_zero>();
570}
571inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
572 return V;
573}
574
575struct is_sign_mask {
576 bool isValue(const APInt &C) { return C.isSignMask(); }
577};
578/// Match an integer or vector with only the sign bit(s) set.
579/// For vectors, this includes constants with undefined elements.
580inline cst_pred_ty<is_sign_mask> m_SignMask() {
581 return cst_pred_ty<is_sign_mask>();
582}
583
584struct is_lowbit_mask {
585 bool isValue(const APInt &C) { return C.isMask(); }
586};
587/// Match an integer or vector with only the low bit(s) set.
588/// For vectors, this includes constants with undefined elements.
589inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
590 return cst_pred_ty<is_lowbit_mask>();
591}
592
593struct icmp_pred_with_threshold {
594 ICmpInst::Predicate Pred;
595 const APInt *Thr;
596 bool isValue(const APInt &C) {
597 switch (Pred) {
598 case ICmpInst::Predicate::ICMP_EQ:
599 return C.eq(*Thr);
600 case ICmpInst::Predicate::ICMP_NE:
601 return C.ne(*Thr);
602 case ICmpInst::Predicate::ICMP_UGT:
603 return C.ugt(*Thr);
604 case ICmpInst::Predicate::ICMP_UGE:
605 return C.uge(*Thr);
606 case ICmpInst::Predicate::ICMP_ULT:
607 return C.ult(*Thr);
608 case ICmpInst::Predicate::ICMP_ULE:
609 return C.ule(*Thr);
610 case ICmpInst::Predicate::ICMP_SGT:
611 return C.sgt(*Thr);
612 case ICmpInst::Predicate::ICMP_SGE:
613 return C.sge(*Thr);
614 case ICmpInst::Predicate::ICMP_SLT:
615 return C.slt(*Thr);
616 case ICmpInst::Predicate::ICMP_SLE:
617 return C.sle(*Thr);
618 default:
619 llvm_unreachable("Unhandled ICmp predicate")__builtin_unreachable();
620 }
621 }
622};
623/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
624/// to Threshold. For vectors, this includes constants with undefined elements.
625inline cst_pred_ty<icmp_pred_with_threshold>
626m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
627 cst_pred_ty<icmp_pred_with_threshold> P;
628 P.Pred = Predicate;
629 P.Thr = &Threshold;
630 return P;
631}
632
633struct is_nan {
634 bool isValue(const APFloat &C) { return C.isNaN(); }
635};
636/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
637/// For vectors, this includes constants with undefined elements.
638inline cstfp_pred_ty<is_nan> m_NaN() {
639 return cstfp_pred_ty<is_nan>();
640}
641
642struct is_nonnan {
643 bool isValue(const APFloat &C) { return !C.isNaN(); }
644};
645/// Match a non-NaN FP constant.
646/// For vectors, this includes constants with undefined elements.
647inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
648 return cstfp_pred_ty<is_nonnan>();
649}
650
651struct is_inf {
652 bool isValue(const APFloat &C) { return C.isInfinity(); }
653};
654/// Match a positive or negative infinity FP constant.
655/// For vectors, this includes constants with undefined elements.
656inline cstfp_pred_ty<is_inf> m_Inf() {
657 return cstfp_pred_ty<is_inf>();
658}
659
660struct is_noninf {
661 bool isValue(const APFloat &C) { return !C.isInfinity(); }
662};
663/// Match a non-infinity FP constant, i.e. finite or NaN.
664/// For vectors, this includes constants with undefined elements.
665inline cstfp_pred_ty<is_noninf> m_NonInf() {
666 return cstfp_pred_ty<is_noninf>();
667}
668
669struct is_finite {
670 bool isValue(const APFloat &C) { return C.isFinite(); }
671};
672/// Match a finite FP constant, i.e. not infinity or NaN.
673/// For vectors, this includes constants with undefined elements.
674inline cstfp_pred_ty<is_finite> m_Finite() {
675 return cstfp_pred_ty<is_finite>();
676}
677inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
678
679struct is_finitenonzero {
680 bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
681};
682/// Match a finite non-zero FP constant.
683/// For vectors, this includes constants with undefined elements.
684inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
685 return cstfp_pred_ty<is_finitenonzero>();
686}
687inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
688 return V;
689}
690
691struct is_any_zero_fp {
692 bool isValue(const APFloat &C) { return C.isZero(); }
693};
694/// Match a floating-point negative zero or positive zero.
695/// For vectors, this includes constants with undefined elements.
696inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
697 return cstfp_pred_ty<is_any_zero_fp>();
698}
699
700struct is_pos_zero_fp {
701 bool isValue(const APFloat &C) { return C.isPosZero(); }
702};
703/// Match a floating-point positive zero.
704/// For vectors, this includes constants with undefined elements.
705inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
706 return cstfp_pred_ty<is_pos_zero_fp>();
707}
708
709struct is_neg_zero_fp {
710 bool isValue(const APFloat &C) { return C.isNegZero(); }
711};
712/// Match a floating-point negative zero.
713/// For vectors, this includes constants with undefined elements.
714inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
715 return cstfp_pred_ty<is_neg_zero_fp>();
716}
717
718struct is_non_zero_fp {
719 bool isValue(const APFloat &C) { return C.isNonZero(); }
720};
721/// Match a floating-point non-zero.
722/// For vectors, this includes constants with undefined elements.
723inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
724 return cstfp_pred_ty<is_non_zero_fp>();
725}
726
727///////////////////////////////////////////////////////////////////////////////
728
729template <typename Class> struct bind_ty {
730 Class *&VR;
731
732 bind_ty(Class *&V) : VR(V) {}
733
734 template <typename ITy> bool match(ITy *V) {
735 if (auto *CV = dyn_cast<Class>(V)) {
736 VR = CV;
737 return true;
738 }
739 return false;
740 }
741};
742
743/// Match a value, capturing it if we match.
744inline bind_ty<Value> m_Value(Value *&V) { return V; }
745inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
746
747/// Match an instruction, capturing it if we match.
748inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
749/// Match a unary operator, capturing it if we match.
750inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
751/// Match a binary operator, capturing it if we match.
752inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
753/// Match a with overflow intrinsic, capturing it if we match.
754inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
755inline bind_ty<const WithOverflowInst>
756m_WithOverflowInst(const WithOverflowInst *&I) {
757 return I;
758}
759
760/// Match a Constant, capturing the value if we match.
761inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
762
763/// Match a ConstantInt, capturing the value if we match.
764inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
765
766/// Match a ConstantFP, capturing the value if we match.
767inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
768
769/// Match a ConstantExpr, capturing the value if we match.
770inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
771
772/// Match a basic block value, capturing it if we match.
773inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
774inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
775 return V;
776}
777
778/// Match an arbitrary immediate Constant and ignore it.
779inline match_combine_and<class_match<Constant>,
780 match_unless<class_match<ConstantExpr>>>
781m_ImmConstant() {
782 return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
783}
784
785/// Match an immediate Constant, capturing the value if we match.
786inline match_combine_and<bind_ty<Constant>,
787 match_unless<class_match<ConstantExpr>>>
788m_ImmConstant(Constant *&C) {
789 return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
790}
791
792/// Match a specified Value*.
793struct specificval_ty {
794 const Value *Val;
795
796 specificval_ty(const Value *V) : Val(V) {}
797
798 template <typename ITy> bool match(ITy *V) { return V == Val; }
799};
800
801/// Match if we have a specific specified value.
802inline specificval_ty m_Specific(const Value *V) { return V; }
803
804/// Stores a reference to the Value *, not the Value * itself,
805/// thus can be used in commutative matchers.
806template <typename Class> struct deferredval_ty {
807 Class *const &Val;
808
809 deferredval_ty(Class *const &V) : Val(V) {}
810
811 template <typename ITy> bool match(ITy *const V) { return V == Val; }
812};
813
814/// Like m_Specific(), but works if the specific value to match is determined
815/// as part of the same match() expression. For example:
816/// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will
817/// bind X before the pattern match starts.
818/// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against
819/// whichever value m_Value(X) populated.
820inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
821inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
822 return V;
823}
824
825/// Match a specified floating point value or vector of all elements of
826/// that value.
827struct specific_fpval {
828 double Val;
829
830 specific_fpval(double V) : Val(V) {}
831
832 template <typename ITy> bool match(ITy *V) {
833 if (const auto *CFP = dyn_cast<ConstantFP>(V))
834 return CFP->isExactlyValue(Val);
835 if (V->getType()->isVectorTy())
836 if (const auto *C = dyn_cast<Constant>(V))
837 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
838 return CFP->isExactlyValue(Val);
839 return false;
840 }
841};
842
843/// Match a specific floating point value or vector with all elements
844/// equal to the value.
845inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
846
847/// Match a float 1.0 or vector with all elements equal to 1.0.
848inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
849
850struct bind_const_intval_ty {
851 uint64_t &VR;
852
853 bind_const_intval_ty(uint64_t &V) : VR(V) {}
854
855 template <typename ITy> bool match(ITy *V) {
856 if (const auto *CV = dyn_cast<ConstantInt>(V))
857 if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) {
858 VR = CV->getZExtValue();
859 return true;
860 }
861 return false;
862 }
863};
864
865/// Match a specified integer value or vector of all elements of that
866/// value.
867template <bool AllowUndefs>
868struct specific_intval {
869 APInt Val;
870
871 specific_intval(APInt V) : Val(std::move(V)) {}
872
873 template <typename ITy> bool match(ITy *V) {
874 const auto *CI = dyn_cast<ConstantInt>(V);
875 if (!CI && V->getType()->isVectorTy())
876 if (const auto *C = dyn_cast<Constant>(V))
877 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
878
879 return CI && APInt::isSameValue(CI->getValue(), Val);
880 }
881};
882
883/// Match a specific integer value or vector with all elements equal to
884/// the value.
885inline specific_intval<false> m_SpecificInt(APInt V) {
886 return specific_intval<false>(std::move(V));
887}
888
889inline specific_intval<false> m_SpecificInt(uint64_t V) {
890 return m_SpecificInt(APInt(64, V));
891}
892
893inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
894 return specific_intval<true>(std::move(V));
895}
896
897inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
898 return m_SpecificIntAllowUndef(APInt(64, V));
899}
900
901/// Match a ConstantInt and bind to its value. This does not match
902/// ConstantInts wider than 64-bits.
903inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
904
905/// Match a specified basic block value.
906struct specific_bbval {
907 BasicBlock *Val;
908
909 specific_bbval(BasicBlock *Val) : Val(Val) {}
910
911 template <typename ITy> bool match(ITy *V) {
912 const auto *BB = dyn_cast<BasicBlock>(V);
913 return BB && BB == Val;
914 }
915};
916
917/// Match a specific basic block value.
918inline specific_bbval m_SpecificBB(BasicBlock *BB) {
919 return specific_bbval(BB);
920}
921
922/// A commutative-friendly version of m_Specific().
923inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
924 return BB;
925}
926inline deferredval_ty<const BasicBlock>
927m_Deferred(const BasicBlock *const &BB) {
928 return BB;
929}
930
931//===----------------------------------------------------------------------===//
932// Matcher for any binary operator.
933//
934template <typename LHS_t, typename RHS_t, bool Commutable = false>
935struct AnyBinaryOp_match {
936 LHS_t L;
937 RHS_t R;
938
939 // The evaluation order is always stable, regardless of Commutability.
940 // The LHS is always matched first.
941 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
942
943 template <typename OpTy> bool match(OpTy *V) {
944 if (auto *I = dyn_cast<BinaryOperator>(V))
945 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
946 (Commutable && L.match(I->getOperand(1)) &&
947 R.match(I->getOperand(0)));
948 return false;
949 }
950};
951
952template <typename LHS, typename RHS>
953inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
954 return AnyBinaryOp_match<LHS, RHS>(L, R);
955}
956
957//===----------------------------------------------------------------------===//
958// Matcher for any unary operator.
959// TODO fuse unary, binary matcher into n-ary matcher
960//
961template <typename OP_t> struct AnyUnaryOp_match {
962 OP_t X;
963
964 AnyUnaryOp_match(const OP_t &X) : X(X) {}
965
966 template <typename OpTy> bool match(OpTy *V) {
967 if (auto *I = dyn_cast<UnaryOperator>(V))
968 return X.match(I->getOperand(0));
969 return false;
970 }
971};
972
973template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) {
974 return AnyUnaryOp_match<OP_t>(X);
975}
976
977//===----------------------------------------------------------------------===//
978// Matchers for specific binary operators.
979//
980
981template <typename LHS_t, typename RHS_t, unsigned Opcode,
982 bool Commutable = false>
983struct BinaryOp_match {
984 LHS_t L;
985 RHS_t R;
986
987 // The evaluation order is always stable, regardless of Commutability.
988 // The LHS is always matched first.
989 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
990
991 template <typename OpTy> bool match(OpTy *V) {
992 if (V->getValueID() == Value::InstructionVal + Opcode) {
993 auto *I = cast<BinaryOperator>(V);
994 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
995 (Commutable && L.match(I->getOperand(1)) &&
996 R.match(I->getOperand(0)));
997 }
998 if (auto *CE = dyn_cast<ConstantExpr>(V))
999 return CE->getOpcode() == Opcode &&
1000 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
1001 (Commutable && L.match(CE->getOperand(1)) &&
1002 R.match(CE->getOperand(0))));
1003 return false;
1004 }
1005};
1006
1007template <typename LHS, typename RHS>
1008inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
1009 const RHS &R) {
1010 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
1011}
1012
1013template <typename LHS, typename RHS>
1014inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
1015 const RHS &R) {
1016 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
1017}
1018
1019template <typename LHS, typename RHS>
1020inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
1021 const RHS &R) {
1022 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
1023}
1024
1025template <typename LHS, typename RHS>
1026inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
1027 const RHS &R) {
1028 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
1029}
1030
1031template <typename Op_t> struct FNeg_match {
1032 Op_t X;
1033
1034 FNeg_match(const Op_t &Op) : X(Op) {}
1035 template <typename OpTy> bool match(OpTy *V) {
1036 auto *FPMO = dyn_cast<FPMathOperator>(V);
1037 if (!FPMO) return false;
1038
1039 if (FPMO->getOpcode() == Instruction::FNeg)
1040 return X.match(FPMO->getOperand(0));
1041
1042 if (FPMO->getOpcode() == Instruction::FSub) {
1043 if (FPMO->hasNoSignedZeros()) {
1044 // With 'nsz', any zero goes.
1045 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
1046 return false;
1047 } else {
1048 // Without 'nsz', we need fsub -0.0, X exactly.
1049 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
1050 return false;
1051 }
1052
1053 return X.match(FPMO->getOperand(1));
1054 }
1055
1056 return false;
1057 }
1058};
1059
1060/// Match 'fneg X' as 'fsub -0.0, X'.
1061template <typename OpTy>
1062inline FNeg_match<OpTy>
1063m_FNeg(const OpTy &X) {
1064 return FNeg_match<OpTy>(X);
1065}
1066
1067/// Match 'fneg X' as 'fsub +-0.0, X'.
1068template <typename RHS>
1069inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
1070m_FNegNSZ(const RHS &X) {
1071 return m_FSub(m_AnyZeroFP(), X);
1072}
1073
1074template <typename LHS, typename RHS>
1075inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
1076 const RHS &R) {
1077 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
1078}
1079
1080template <typename LHS, typename RHS>
1081inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
1082 const RHS &R) {
1083 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
1084}
1085
1086template <typename LHS, typename RHS>
1087inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
1088 const RHS &R) {
1089 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
1090}
1091
1092template <typename LHS, typename RHS>
1093inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
1094 const RHS &R) {
1095 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
1096}
1097
1098template <typename LHS, typename RHS>
1099inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
1100 const RHS &R) {
1101 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
1102}
1103
1104template <typename LHS, typename RHS>
1105inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
1106 const RHS &R) {
1107 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
1108}
1109
1110template <typename LHS, typename RHS>
1111inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
1112 const RHS &R) {
1113 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
1114}
1115
1116template <typename LHS, typename RHS>
1117inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
1118 const RHS &R) {
1119 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
1120}
1121
1122template <typename LHS, typename RHS>
1123inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
1124 const RHS &R) {
1125 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
1126}
1127
1128template <typename LHS, typename RHS>
1129inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
1130 const RHS &R) {
1131 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
1132}
1133
1134template <typename LHS, typename RHS>
1135inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
1136 const RHS &R) {
1137 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
1138}
1139
1140template <typename LHS, typename RHS>
1141inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
1142 const RHS &R) {
1143 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
1144}
1145
1146template <typename LHS, typename RHS>
1147inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
1148 const RHS &R) {
1149 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
1150}
1151
1152template <typename LHS, typename RHS>
1153inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
1154 const RHS &R) {
1155 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
1156}
1157
1158template <typename LHS_t, typename RHS_t, unsigned Opcode,
1159 unsigned WrapFlags = 0>
1160struct OverflowingBinaryOp_match {
1161 LHS_t L;
1162 RHS_t R;
1163
1164 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
1165 : L(LHS), R(RHS) {}
1166
1167 template <typename OpTy> bool match(OpTy *V) {
1168 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
1169 if (Op->getOpcode() != Opcode)
1170 return false;
1171 if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) &&
1172 !Op->hasNoUnsignedWrap())
1173 return false;
1174 if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) &&
1175 !Op->hasNoSignedWrap())
1176 return false;
1177 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
1178 }
1179 return false;
1180 }
1181};
1182
1183template <typename LHS, typename RHS>
1184inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1185 OverflowingBinaryOperator::NoSignedWrap>
1186m_NSWAdd(const LHS &L, const RHS &R) {
1187 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1188 OverflowingBinaryOperator::NoSignedWrap>(
1189 L, R);
1190}
1191template <typename LHS, typename RHS>
1192inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1193 OverflowingBinaryOperator::NoSignedWrap>
1194m_NSWSub(const LHS &L, const RHS &R) {
1195 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1196 OverflowingBinaryOperator::NoSignedWrap>(
1197 L, R);
1198}
1199template <typename LHS, typename RHS>
1200inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1201 OverflowingBinaryOperator::NoSignedWrap>
1202m_NSWMul(const LHS &L, const RHS &R) {
1203 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1204 OverflowingBinaryOperator::NoSignedWrap>(
1205 L, R);
1206}
1207template <typename LHS, typename RHS>
1208inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1209 OverflowingBinaryOperator::NoSignedWrap>
1210m_NSWShl(const LHS &L, const RHS &R) {
1211 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1212 OverflowingBinaryOperator::NoSignedWrap>(
1213 L, R);
1214}
1215
1216template <typename LHS, typename RHS>
1217inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1218 OverflowingBinaryOperator::NoUnsignedWrap>
1219m_NUWAdd(const LHS &L, const RHS &R) {
1220 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1221 OverflowingBinaryOperator::NoUnsignedWrap>(
1222 L, R);
1223}
1224template <typename LHS, typename RHS>
1225inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1226 OverflowingBinaryOperator::NoUnsignedWrap>
1227m_NUWSub(const LHS &L, const RHS &R) {
1228 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1229 OverflowingBinaryOperator::NoUnsignedWrap>(
1230 L, R);
1231}
1232template <typename LHS, typename RHS>
1233inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1234 OverflowingBinaryOperator::NoUnsignedWrap>
1235m_NUWMul(const LHS &L, const RHS &R) {
1236 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1237 OverflowingBinaryOperator::NoUnsignedWrap>(
1238 L, R);
1239}
1240template <typename LHS, typename RHS>
1241inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1242 OverflowingBinaryOperator::NoUnsignedWrap>
1243m_NUWShl(const LHS &L, const RHS &R) {
1244 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1245 OverflowingBinaryOperator::NoUnsignedWrap>(
1246 L, R);
1247}
1248
1249//===----------------------------------------------------------------------===//
1250// Class that matches a group of binary opcodes.
1251//
1252template <typename LHS_t, typename RHS_t, typename Predicate>
1253struct BinOpPred_match : Predicate {
1254 LHS_t L;
1255 RHS_t R;
1256
1257 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1258
1259 template <typename OpTy> bool match(OpTy *V) {
1260 if (auto *I = dyn_cast<Instruction>(V))
1261 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1262 R.match(I->getOperand(1));
1263 if (auto *CE = dyn_cast<ConstantExpr>(V))
1264 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1265 R.match(CE->getOperand(1));
1266 return false;
1267 }
1268};
1269
1270struct is_shift_op {
1271 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1272};
1273
1274struct is_right_shift_op {
1275 bool isOpType(unsigned Opcode) {
1276 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1277 }
1278};
1279
1280struct is_logical_shift_op {
1281 bool isOpType(unsigned Opcode) {
1282 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1283 }
1284};
1285
1286struct is_bitwiselogic_op {
1287 bool isOpType(unsigned Opcode) {
1288 return Instruction::isBitwiseLogicOp(Opcode);
1289 }
1290};
1291
1292struct is_idiv_op {
1293 bool isOpType(unsigned Opcode) {
1294 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1295 }
1296};
1297
1298struct is_irem_op {
1299 bool isOpType(unsigned Opcode) {
1300 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1301 }
1302};
1303
1304/// Matches shift operations.
1305template <typename LHS, typename RHS>
1306inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1307 const RHS &R) {
1308 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1309}
1310
1311/// Matches logical shift operations.
1312template <typename LHS, typename RHS>
1313inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1314 const RHS &R) {
1315 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1316}
1317
1318/// Matches logical shift operations.
1319template <typename LHS, typename RHS>
1320inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1321m_LogicalShift(const LHS &L, const RHS &R) {
1322 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1323}
1324
1325/// Matches bitwise logic operations.
1326template <typename LHS, typename RHS>
1327inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1328m_BitwiseLogic(const LHS &L, const RHS &R) {
1329 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1330}
1331
1332/// Matches integer division operations.
1333template <typename LHS, typename RHS>
1334inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1335 const RHS &R) {
1336 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1337}
1338
1339/// Matches integer remainder operations.
1340template <typename LHS, typename RHS>
1341inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1342 const RHS &R) {
1343 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1344}
1345
1346//===----------------------------------------------------------------------===//
1347// Class that matches exact binary ops.
1348//
1349template <typename SubPattern_t> struct Exact_match {
1350 SubPattern_t SubPattern;
1351
1352 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1353
1354 template <typename OpTy> bool match(OpTy *V) {
1355 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1356 return PEO->isExact() && SubPattern.match(V);
1357 return false;
1358 }
1359};
1360
1361template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1362 return SubPattern;
1363}
1364
1365//===----------------------------------------------------------------------===//
1366// Matchers for CmpInst classes
1367//
1368
1369template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1370 bool Commutable = false>
1371struct CmpClass_match {
1372 PredicateTy &Predicate;
1373 LHS_t L;
1374 RHS_t R;
1375
1376 // The evaluation order is always stable, regardless of Commutability.
1377 // The LHS is always matched first.
1378 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1379 : Predicate(Pred), L(LHS), R(RHS) {}
1380
1381 template <typename OpTy> bool match(OpTy *V) {
1382 if (auto *I = dyn_cast<Class>(V)) {
1383 if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1384 Predicate = I->getPredicate();
1385 return true;
1386 } else if (Commutable && L.match(I->getOperand(1)) &&
1387 R.match(I->getOperand(0))) {
1388 Predicate = I->getSwappedPredicate();
1389 return true;
1390 }
1391 }
1392 return false;
1393 }
1394};
1395
1396template <typename LHS, typename RHS>
1397inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1398m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1399 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1400}
1401
1402template <typename LHS, typename RHS>
1403inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1404m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1405 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1406}
1407
1408template <typename LHS, typename RHS>
1409inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1410m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1411 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1412}
1413
1414//===----------------------------------------------------------------------===//
1415// Matchers for instructions with a given opcode and number of operands.
1416//
1417
1418/// Matches instructions with Opcode and three operands.
1419template <typename T0, unsigned Opcode> struct OneOps_match {
1420 T0 Op1;
1421
1422 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1423
1424 template <typename OpTy> bool match(OpTy *V) {
1425 if (V->getValueID() == Value::InstructionVal + Opcode) {
1426 auto *I = cast<Instruction>(V);
1427 return Op1.match(I->getOperand(0));
1428 }
1429 return false;
1430 }
1431};
1432
1433/// Matches instructions with Opcode and three operands.
1434template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1435 T0 Op1;
1436 T1 Op2;
1437
1438 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1439
1440 template <typename OpTy> bool match(OpTy *V) {
1441 if (V->getValueID() == Value::InstructionVal + Opcode) {
1442 auto *I = cast<Instruction>(V);
1443 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1444 }
1445 return false;
1446 }
1447};
1448
1449/// Matches instructions with Opcode and three operands.
1450template <typename T0, typename T1, typename T2, unsigned Opcode>
1451struct ThreeOps_match {
1452 T0 Op1;
1453 T1 Op2;
1454 T2 Op3;
1455
1456 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1457 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1458
1459 template <typename OpTy> bool match(OpTy *V) {
1460 if (V->getValueID() == Value::InstructionVal + Opcode) {
28
Assuming the condition is true
29
Taking true branch
1461 auto *I = cast<Instruction>(V);
30
'V' is a 'Instruction'
1462 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
31
Returning the value 1, which participates in a condition later
1463 Op3.match(I->getOperand(2));
1464 }
1465 return false;
1466 }
1467};
1468
1469/// Matches SelectInst.
1470template <typename Cond, typename LHS, typename RHS>
1471inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1472m_Select(const Cond &C, const LHS &L, const RHS &R) {
1473 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1474}
1475
1476/// This matches a select of two constants, e.g.:
1477/// m_SelectCst<-1, 0>(m_Value(V))
1478template <int64_t L, int64_t R, typename Cond>
1479inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1480 Instruction::Select>
1481m_SelectCst(const Cond &C) {
1482 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1483}
1484
1485/// Matches FreezeInst.
1486template <typename OpTy>
1487inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1488 return OneOps_match<OpTy, Instruction::Freeze>(Op);
1489}
1490
1491/// Matches InsertElementInst.
1492template <typename Val_t, typename Elt_t, typename Idx_t>
1493inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1494m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1495 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1496 Val, Elt, Idx);
1497}
1498
1499/// Matches ExtractElementInst.
1500template <typename Val_t, typename Idx_t>
1501inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1502m_ExtractElt(const Val_t &Val, const Idx_t &Idx) {
1503 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1504}
1505
1506/// Matches shuffle.
1507template <typename T0, typename T1, typename T2> struct Shuffle_match {
1508 T0 Op1;
1509 T1 Op2;
1510 T2 Mask;
1511
1512 Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask)
1513 : Op1(Op1), Op2(Op2), Mask(Mask) {}
1514
1515 template <typename OpTy> bool match(OpTy *V) {
1516 if (auto *I = dyn_cast<ShuffleVectorInst>(V)) {
1517 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1518 Mask.match(I->getShuffleMask());
1519 }
1520 return false;
1521 }
1522};
1523
1524struct m_Mask {
1525 ArrayRef<int> &MaskRef;
1526 m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1527 bool match(ArrayRef<int> Mask) {
1528 MaskRef = Mask;
1529 return true;
1530 }
1531};
1532
1533struct m_ZeroMask {
1534 bool match(ArrayRef<int> Mask) {
1535 return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; });
1536 }
1537};
1538
1539struct m_SpecificMask {
1540 ArrayRef<int> &MaskRef;
1541 m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1542 bool match(ArrayRef<int> Mask) { return MaskRef == Mask; }
1543};
1544
1545struct m_SplatOrUndefMask {
1546 int &SplatIndex;
1547 m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
1548 bool match(ArrayRef<int> Mask) {
1549 auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
1550 if (First == Mask.end())
1551 return false;
1552 SplatIndex = *First;
1553 return all_of(Mask,
1554 [First](int Elem) { return Elem == *First || Elem == -1; });
1555 }
1556};
1557
1558/// Matches ShuffleVectorInst independently of mask value.
1559template <typename V1_t, typename V2_t>
1560inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>
1561m_Shuffle(const V1_t &v1, const V2_t &v2) {
1562 return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2);
1563}
1564
1565template <typename V1_t, typename V2_t, typename Mask_t>
1566inline Shuffle_match<V1_t, V2_t, Mask_t>
1567m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) {
1568 return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask);
1569}
1570
1571/// Matches LoadInst.
1572template <typename OpTy>
1573inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1574 return OneOps_match<OpTy, Instruction::Load>(Op);
1575}
1576
1577/// Matches StoreInst.
1578template <typename ValueOpTy, typename PointerOpTy>
1579inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1580m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1581 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1582 PointerOp);
1583}
1584
1585//===----------------------------------------------------------------------===//
1586// Matchers for CastInst classes
1587//
1588
1589template <typename Op_t, unsigned Opcode> struct CastClass_match {
1590 Op_t Op;
1591
1592 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1593
1594 template <typename OpTy> bool match(OpTy *V) {
1595 if (auto *O = dyn_cast<Operator>(V))
1596 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1597 return false;
1598 }
1599};
1600
1601/// Matches BitCast.
1602template <typename OpTy>
1603inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1604 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1605}
1606
1607/// Matches PtrToInt.
1608template <typename OpTy>
1609inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1610 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1611}
1612
1613/// Matches IntToPtr.
1614template <typename OpTy>
1615inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
1616 return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
1617}
1618
1619/// Matches Trunc.
1620template <typename OpTy>
1621inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1622 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1623}
1624
1625template <typename OpTy>
1626inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1627m_TruncOrSelf(const OpTy &Op) {
1628 return m_CombineOr(m_Trunc(Op), Op);
1629}
1630
1631/// Matches SExt.
1632template <typename OpTy>
1633inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1634 return CastClass_match<OpTy, Instruction::SExt>(Op);
1635}
1636
1637/// Matches ZExt.
1638template <typename OpTy>
1639inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1640 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1641}
1642
1643template <typename OpTy>
1644inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1645m_ZExtOrSelf(const OpTy &Op) {
1646 return m_CombineOr(m_ZExt(Op), Op);
1647}
1648
1649template <typename OpTy>
1650inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1651m_SExtOrSelf(const OpTy &Op) {
1652 return m_CombineOr(m_SExt(Op), Op);
1653}
1654
1655template <typename OpTy>
1656inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1657 CastClass_match<OpTy, Instruction::SExt>>
1658m_ZExtOrSExt(const OpTy &Op) {
1659 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1660}
1661
1662template <typename OpTy>
1663inline match_combine_or<
1664 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1665 CastClass_match<OpTy, Instruction::SExt>>,
1666 OpTy>
1667m_ZExtOrSExtOrSelf(const OpTy &Op) {
1668 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1669}
1670
1671template <typename OpTy>
1672inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1673 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1674}
1675
1676template <typename OpTy>
1677inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1678 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1679}
1680
1681template <typename OpTy>
1682inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) {
1683 return CastClass_match<OpTy, Instruction::FPToUI>(Op);
1684}
1685
1686template <typename OpTy>
1687inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) {
1688 return CastClass_match<OpTy, Instruction::FPToSI>(Op);
1689}
1690
1691template <typename OpTy>
1692inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1693 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1694}
1695
1696template <typename OpTy>
1697inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1698 return CastClass_match<OpTy, Instruction::FPExt>(Op);