File: | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp |
Warning: | line 196, column 39 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InstCombineVectorOps.cpp -------------------------------------------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file implements instcombine for ExtractElement, InsertElement and | ||||
10 | // ShuffleVector. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "InstCombineInternal.h" | ||||
15 | #include "llvm/ADT/APInt.h" | ||||
16 | #include "llvm/ADT/ArrayRef.h" | ||||
17 | #include "llvm/ADT/DenseMap.h" | ||||
18 | #include "llvm/ADT/STLExtras.h" | ||||
19 | #include "llvm/ADT/SmallVector.h" | ||||
20 | #include "llvm/Analysis/InstructionSimplify.h" | ||||
21 | #include "llvm/Analysis/VectorUtils.h" | ||||
22 | #include "llvm/IR/BasicBlock.h" | ||||
23 | #include "llvm/IR/Constant.h" | ||||
24 | #include "llvm/IR/Constants.h" | ||||
25 | #include "llvm/IR/DerivedTypes.h" | ||||
26 | #include "llvm/IR/InstrTypes.h" | ||||
27 | #include "llvm/IR/Instruction.h" | ||||
28 | #include "llvm/IR/Instructions.h" | ||||
29 | #include "llvm/IR/Operator.h" | ||||
30 | #include "llvm/IR/PatternMatch.h" | ||||
31 | #include "llvm/IR/Type.h" | ||||
32 | #include "llvm/IR/User.h" | ||||
33 | #include "llvm/IR/Value.h" | ||||
34 | #include "llvm/Support/Casting.h" | ||||
35 | #include "llvm/Support/ErrorHandling.h" | ||||
36 | #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" | ||||
37 | #include <cassert> | ||||
38 | #include <cstdint> | ||||
39 | #include <iterator> | ||||
40 | #include <utility> | ||||
41 | |||||
42 | using namespace llvm; | ||||
43 | using namespace PatternMatch; | ||||
44 | |||||
45 | #define DEBUG_TYPE"instcombine" "instcombine" | ||||
46 | |||||
47 | /// Return true if the value is cheaper to scalarize than it is to leave as a | ||||
48 | /// vector operation. IsConstantExtractIndex indicates whether we are extracting | ||||
49 | /// one known element from a vector constant. | ||||
50 | /// | ||||
51 | /// FIXME: It's possible to create more instructions than previously existed. | ||||
52 | static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) { | ||||
53 | // If we can pick a scalar constant value out of a vector, that is free. | ||||
54 | if (auto *C = dyn_cast<Constant>(V)) | ||||
55 | return IsConstantExtractIndex || C->getSplatValue(); | ||||
56 | |||||
57 | // An insertelement to the same constant index as our extract will simplify | ||||
58 | // to the scalar inserted element. An insertelement to a different constant | ||||
59 | // index is irrelevant to our extract. | ||||
60 | if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt()))) | ||||
61 | return IsConstantExtractIndex; | ||||
62 | |||||
63 | if (match(V, m_OneUse(m_Load(m_Value())))) | ||||
64 | return true; | ||||
65 | |||||
66 | Value *V0, *V1; | ||||
67 | if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1))))) | ||||
68 | if (cheapToScalarize(V0, IsConstantExtractIndex) || | ||||
69 | cheapToScalarize(V1, IsConstantExtractIndex)) | ||||
70 | return true; | ||||
71 | |||||
72 | CmpInst::Predicate UnusedPred; | ||||
73 | if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1))))) | ||||
74 | if (cheapToScalarize(V0, IsConstantExtractIndex) || | ||||
75 | cheapToScalarize(V1, IsConstantExtractIndex)) | ||||
76 | return true; | ||||
77 | |||||
78 | return false; | ||||
79 | } | ||||
80 | |||||
81 | // If we have a PHI node with a vector type that is only used to feed | ||||
82 | // itself and be an operand of extractelement at a constant location, | ||||
83 | // try to replace the PHI of the vector type with a PHI of a scalar type. | ||||
84 | Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { | ||||
85 | SmallVector<Instruction *, 2> Extracts; | ||||
86 | // The users we want the PHI to have are: | ||||
87 | // 1) The EI ExtractElement (we already know this) | ||||
88 | // 2) Possibly more ExtractElements with the same index. | ||||
89 | // 3) Another operand, which will feed back into the PHI. | ||||
90 | Instruction *PHIUser = nullptr; | ||||
91 | for (auto U : PN->users()) { | ||||
92 | if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) { | ||||
93 | if (EI.getIndexOperand() == EU->getIndexOperand()) | ||||
94 | Extracts.push_back(EU); | ||||
95 | else | ||||
96 | return nullptr; | ||||
97 | } else if (!PHIUser) { | ||||
98 | PHIUser = cast<Instruction>(U); | ||||
99 | } else { | ||||
100 | return nullptr; | ||||
101 | } | ||||
102 | } | ||||
103 | |||||
104 | if (!PHIUser) | ||||
105 | return nullptr; | ||||
106 | |||||
107 | // Verify that this PHI user has one use, which is the PHI itself, | ||||
108 | // and that it is a binary operation which is cheap to scalarize. | ||||
109 | // otherwise return nullptr. | ||||
110 | if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || | ||||
111 | !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) | ||||
112 | return nullptr; | ||||
113 | |||||
114 | // Create a scalar PHI node that will replace the vector PHI node | ||||
115 | // just before the current PHI node. | ||||
116 | PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( | ||||
117 | PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); | ||||
118 | // Scalarize each PHI operand. | ||||
119 | for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { | ||||
120 | Value *PHIInVal = PN->getIncomingValue(i); | ||||
121 | BasicBlock *inBB = PN->getIncomingBlock(i); | ||||
122 | Value *Elt = EI.getIndexOperand(); | ||||
123 | // If the operand is the PHI induction variable: | ||||
124 | if (PHIInVal == PHIUser) { | ||||
125 | // Scalarize the binary operation. Its first operand is the | ||||
126 | // scalar PHI, and the second operand is extracted from the other | ||||
127 | // vector operand. | ||||
128 | BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); | ||||
129 | unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; | ||||
130 | Value *Op = InsertNewInstWith( | ||||
131 | ExtractElementInst::Create(B0->getOperand(opId), Elt, | ||||
132 | B0->getOperand(opId)->getName() + ".Elt"), | ||||
133 | *B0); | ||||
134 | Value *newPHIUser = InsertNewInstWith( | ||||
135 | BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(), | ||||
136 | scalarPHI, Op, B0), *B0); | ||||
137 | scalarPHI->addIncoming(newPHIUser, inBB); | ||||
138 | } else { | ||||
139 | // Scalarize PHI input: | ||||
140 | Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); | ||||
141 | // Insert the new instruction into the predecessor basic block. | ||||
142 | Instruction *pos = dyn_cast<Instruction>(PHIInVal); | ||||
143 | BasicBlock::iterator InsertPos; | ||||
144 | if (pos && !isa<PHINode>(pos)) { | ||||
145 | InsertPos = ++pos->getIterator(); | ||||
146 | } else { | ||||
147 | InsertPos = inBB->getFirstInsertionPt(); | ||||
148 | } | ||||
149 | |||||
150 | InsertNewInstWith(newEI, *InsertPos); | ||||
151 | |||||
152 | scalarPHI->addIncoming(newEI, inBB); | ||||
153 | } | ||||
154 | } | ||||
155 | |||||
156 | for (auto E : Extracts) | ||||
157 | replaceInstUsesWith(*E, scalarPHI); | ||||
158 | |||||
159 | return &EI; | ||||
160 | } | ||||
161 | |||||
162 | static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, | ||||
163 | InstCombiner::BuilderTy &Builder, | ||||
164 | bool IsBigEndian) { | ||||
165 | Value *X; | ||||
166 | uint64_t ExtIndexC; | ||||
167 | if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || | ||||
168 | !X->getType()->isVectorTy() || | ||||
169 | !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))) | ||||
170 | return nullptr; | ||||
171 | |||||
172 | // If this extractelement is using a bitcast from a vector of the same number | ||||
173 | // of elements, see if we can find the source element from the source vector: | ||||
174 | // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] | ||||
175 | Type *SrcTy = X->getType(); | ||||
176 | Type *DestTy = Ext.getType(); | ||||
177 | unsigned NumSrcElts = SrcTy->getVectorNumElements(); | ||||
178 | unsigned NumElts = Ext.getVectorOperandType()->getNumElements(); | ||||
179 | if (NumSrcElts == NumElts) | ||||
180 | if (Value *Elt = findScalarElement(X, ExtIndexC)) | ||||
181 | return new BitCastInst(Elt, DestTy); | ||||
182 | |||||
183 | // If the source elements are wider than the destination, try to shift and | ||||
184 | // truncate a subset of scalar bits of an insert op. | ||||
185 | if (NumSrcElts < NumElts) { | ||||
186 | Value *Scalar; | ||||
187 | uint64_t InsIndexC; | ||||
188 | if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar), | ||||
189 | m_ConstantInt(InsIndexC)))) | ||||
190 | return nullptr; | ||||
191 | |||||
192 | // The extract must be from the subset of vector elements that we inserted | ||||
193 | // into. Example: if we inserted element 1 of a <2 x i64> and we are | ||||
194 | // extracting an i16 (narrowing ratio = 4), then this extract must be from 1 | ||||
195 | // of elements 4-7 of the bitcasted vector. | ||||
196 | unsigned NarrowingRatio = NumElts / NumSrcElts; | ||||
| |||||
197 | if (ExtIndexC / NarrowingRatio != InsIndexC) | ||||
198 | return nullptr; | ||||
199 | |||||
200 | // We are extracting part of the original scalar. How that scalar is | ||||
201 | // inserted into the vector depends on the endian-ness. Example: | ||||
202 | // Vector Byte Elt Index: 0 1 2 3 4 5 6 7 | ||||
203 | // +--+--+--+--+--+--+--+--+ | ||||
204 | // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3| | ||||
205 | // extelt <4 x i16> V', 3: | |S2|S3| | ||||
206 | // +--+--+--+--+--+--+--+--+ | ||||
207 | // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value. | ||||
208 | // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value. | ||||
209 | // In this example, we must right-shift little-endian. Big-endian is just a | ||||
210 | // truncate. | ||||
211 | unsigned Chunk = ExtIndexC % NarrowingRatio; | ||||
212 | if (IsBigEndian) | ||||
213 | Chunk = NarrowingRatio - 1 - Chunk; | ||||
214 | |||||
215 | // Bail out if this is an FP vector to FP vector sequence. That would take | ||||
216 | // more instructions than we started with unless there is no shift, and it | ||||
217 | // may not be handled as well in the backend. | ||||
218 | bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy(); | ||||
219 | bool NeedDestBitcast = DestTy->isFloatingPointTy(); | ||||
220 | if (NeedSrcBitcast && NeedDestBitcast) | ||||
221 | return nullptr; | ||||
222 | |||||
223 | unsigned SrcWidth = SrcTy->getScalarSizeInBits(); | ||||
224 | unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); | ||||
225 | unsigned ShAmt = Chunk * DestWidth; | ||||
226 | |||||
227 | // TODO: This limitation is more strict than necessary. We could sum the | ||||
228 | // number of new instructions and subtract the number eliminated to know if | ||||
229 | // we can proceed. | ||||
230 | if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse()) | ||||
231 | if (NeedSrcBitcast || NeedDestBitcast) | ||||
232 | return nullptr; | ||||
233 | |||||
234 | if (NeedSrcBitcast) { | ||||
235 | Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth); | ||||
236 | Scalar = Builder.CreateBitCast(Scalar, SrcIntTy); | ||||
237 | } | ||||
238 | |||||
239 | if (ShAmt) { | ||||
240 | // Bail out if we could end with more instructions than we started with. | ||||
241 | if (!Ext.getVectorOperand()->hasOneUse()) | ||||
242 | return nullptr; | ||||
243 | Scalar = Builder.CreateLShr(Scalar, ShAmt); | ||||
244 | } | ||||
245 | |||||
246 | if (NeedDestBitcast) { | ||||
247 | Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth); | ||||
248 | return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy); | ||||
249 | } | ||||
250 | return new TruncInst(Scalar, DestTy); | ||||
251 | } | ||||
252 | |||||
253 | return nullptr; | ||||
254 | } | ||||
255 | |||||
256 | /// Find elements of V demanded by UserInstr. | ||||
257 | static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { | ||||
258 | unsigned VWidth = V->getType()->getVectorNumElements(); | ||||
259 | |||||
260 | // Conservatively assume that all elements are needed. | ||||
261 | APInt UsedElts(APInt::getAllOnesValue(VWidth)); | ||||
262 | |||||
263 | switch (UserInstr->getOpcode()) { | ||||
264 | case Instruction::ExtractElement: { | ||||
265 | ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr); | ||||
266 | assert(EEI->getVectorOperand() == V)((EEI->getVectorOperand() == V) ? static_cast<void> ( 0) : __assert_fail ("EEI->getVectorOperand() == V", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 266, __PRETTY_FUNCTION__)); | ||||
267 | ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand()); | ||||
268 | if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { | ||||
269 | UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); | ||||
270 | } | ||||
271 | break; | ||||
272 | } | ||||
273 | case Instruction::ShuffleVector: { | ||||
274 | ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr); | ||||
275 | unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements(); | ||||
276 | |||||
277 | UsedElts = APInt(VWidth, 0); | ||||
278 | for (unsigned i = 0; i < MaskNumElts; i++) { | ||||
279 | unsigned MaskVal = Shuffle->getMaskValue(i); | ||||
280 | if (MaskVal == -1u || MaskVal >= 2 * VWidth) | ||||
281 | continue; | ||||
282 | if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) | ||||
283 | UsedElts.setBit(MaskVal); | ||||
284 | if (Shuffle->getOperand(1) == V && | ||||
285 | ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) | ||||
286 | UsedElts.setBit(MaskVal - VWidth); | ||||
287 | } | ||||
288 | break; | ||||
289 | } | ||||
290 | default: | ||||
291 | break; | ||||
292 | } | ||||
293 | return UsedElts; | ||||
294 | } | ||||
295 | |||||
296 | /// Find union of elements of V demanded by all its users. | ||||
297 | /// If it is known by querying findDemandedEltsBySingleUser that | ||||
298 | /// no user demands an element of V, then the corresponding bit | ||||
299 | /// remains unset in the returned value. | ||||
300 | static APInt findDemandedEltsByAllUsers(Value *V) { | ||||
301 | unsigned VWidth = V->getType()->getVectorNumElements(); | ||||
302 | |||||
303 | APInt UnionUsedElts(VWidth, 0); | ||||
304 | for (const Use &U : V->uses()) { | ||||
305 | if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { | ||||
306 | UnionUsedElts |= findDemandedEltsBySingleUser(V, I); | ||||
307 | } else { | ||||
308 | UnionUsedElts = APInt::getAllOnesValue(VWidth); | ||||
309 | break; | ||||
310 | } | ||||
311 | |||||
312 | if (UnionUsedElts.isAllOnesValue()) | ||||
313 | break; | ||||
314 | } | ||||
315 | |||||
316 | return UnionUsedElts; | ||||
317 | } | ||||
318 | |||||
319 | Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { | ||||
320 | Value *SrcVec = EI.getVectorOperand(); | ||||
321 | Value *Index = EI.getIndexOperand(); | ||||
322 | if (Value *V = SimplifyExtractElementInst(SrcVec, Index, | ||||
| |||||
323 | SQ.getWithInstruction(&EI))) | ||||
324 | return replaceInstUsesWith(EI, V); | ||||
325 | |||||
326 | // If extracting a specified index from the vector, see if we can recursively | ||||
327 | // find a previously computed scalar that was inserted into the vector. | ||||
328 | auto *IndexC = dyn_cast<ConstantInt>(Index); | ||||
329 | if (IndexC
| ||||
330 | unsigned NumElts = EI.getVectorOperandType()->getNumElements(); | ||||
331 | |||||
332 | // InstSimplify should handle cases where the index is invalid. | ||||
333 | if (!IndexC->getValue().ule(NumElts)) | ||||
334 | return nullptr; | ||||
335 | |||||
336 | // This instruction only demands the single element from the input vector. | ||||
337 | if (NumElts != 1) { | ||||
338 | // If the input vector has a single use, simplify it based on this use | ||||
339 | // property. | ||||
340 | if (SrcVec->hasOneUse()) { | ||||
341 | APInt UndefElts(NumElts, 0); | ||||
342 | APInt DemandedElts(NumElts, 0); | ||||
343 | DemandedElts.setBit(IndexC->getZExtValue()); | ||||
344 | if (Value *V = | ||||
345 | SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) { | ||||
346 | EI.setOperand(0, V); | ||||
347 | return &EI; | ||||
348 | } | ||||
349 | } else { | ||||
350 | // If the input vector has multiple uses, simplify it based on a union | ||||
351 | // of all elements used. | ||||
352 | APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); | ||||
353 | if (!DemandedElts.isAllOnesValue()) { | ||||
354 | APInt UndefElts(NumElts, 0); | ||||
355 | if (Value *V = SimplifyDemandedVectorElts( | ||||
356 | SrcVec, DemandedElts, UndefElts, 0 /* Depth */, | ||||
357 | true /* AllowMultipleUsers */)) { | ||||
358 | if (V != SrcVec) { | ||||
359 | SrcVec->replaceAllUsesWith(V); | ||||
360 | return &EI; | ||||
361 | } | ||||
362 | } | ||||
363 | } | ||||
364 | } | ||||
365 | } | ||||
366 | if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) | ||||
367 | return I; | ||||
368 | |||||
369 | // If there's a vector PHI feeding a scalar use through this extractelement | ||||
370 | // instruction, try to scalarize the PHI. | ||||
371 | if (auto *Phi = dyn_cast<PHINode>(SrcVec)) | ||||
372 | if (Instruction *ScalarPHI = scalarizePHI(EI, Phi)) | ||||
373 | return ScalarPHI; | ||||
374 | } | ||||
375 | |||||
376 | BinaryOperator *BO; | ||||
377 | if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) { | ||||
378 | // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) | ||||
379 | Value *X = BO->getOperand(0), *Y = BO->getOperand(1); | ||||
380 | Value *E0 = Builder.CreateExtractElement(X, Index); | ||||
381 | Value *E1 = Builder.CreateExtractElement(Y, Index); | ||||
382 | return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO); | ||||
383 | } | ||||
384 | |||||
385 | Value *X, *Y; | ||||
386 | CmpInst::Predicate Pred; | ||||
387 | if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) && | ||||
388 | cheapToScalarize(SrcVec, IndexC)) { | ||||
389 | // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index) | ||||
390 | Value *E0 = Builder.CreateExtractElement(X, Index); | ||||
391 | Value *E1 = Builder.CreateExtractElement(Y, Index); | ||||
392 | return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1); | ||||
393 | } | ||||
394 | |||||
395 | if (auto *I = dyn_cast<Instruction>(SrcVec)) { | ||||
396 | if (auto *IE = dyn_cast<InsertElementInst>(I)) { | ||||
397 | // Extracting the inserted element? | ||||
398 | if (IE->getOperand(2) == Index) | ||||
399 | return replaceInstUsesWith(EI, IE->getOperand(1)); | ||||
400 | // If the inserted and extracted elements are constants, they must not | ||||
401 | // be the same value, extract from the pre-inserted value instead. | ||||
402 | if (isa<Constant>(IE->getOperand(2)) && IndexC) | ||||
403 | return replaceOperand(EI, 0, IE->getOperand(0)); | ||||
404 | } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) { | ||||
405 | // If this is extracting an element from a shufflevector, figure out where | ||||
406 | // it came from and extract from the appropriate input element instead. | ||||
407 | if (auto *Elt = dyn_cast<ConstantInt>(Index)) { | ||||
408 | int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); | ||||
409 | Value *Src; | ||||
410 | unsigned LHSWidth = | ||||
411 | SVI->getOperand(0)->getType()->getVectorNumElements(); | ||||
412 | |||||
413 | if (SrcIdx < 0) | ||||
414 | return replaceInstUsesWith(EI, UndefValue::get(EI.getType())); | ||||
415 | if (SrcIdx < (int)LHSWidth) | ||||
416 | Src = SVI->getOperand(0); | ||||
417 | else { | ||||
418 | SrcIdx -= LHSWidth; | ||||
419 | Src = SVI->getOperand(1); | ||||
420 | } | ||||
421 | Type *Int32Ty = Type::getInt32Ty(EI.getContext()); | ||||
422 | return ExtractElementInst::Create(Src, | ||||
423 | ConstantInt::get(Int32Ty, | ||||
424 | SrcIdx, false)); | ||||
425 | } | ||||
426 | } else if (auto *CI = dyn_cast<CastInst>(I)) { | ||||
427 | // Canonicalize extractelement(cast) -> cast(extractelement). | ||||
428 | // Bitcasts can change the number of vector elements, and they cost | ||||
429 | // nothing. | ||||
430 | if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { | ||||
431 | Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); | ||||
432 | return CastInst::Create(CI->getOpcode(), EE, EI.getType()); | ||||
433 | } | ||||
434 | } | ||||
435 | } | ||||
436 | return nullptr; | ||||
437 | } | ||||
438 | |||||
439 | /// If V is a shuffle of values that ONLY returns elements from either LHS or | ||||
440 | /// RHS, return the shuffle mask and true. Otherwise, return false. | ||||
441 | static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, | ||||
442 | SmallVectorImpl<Constant*> &Mask) { | ||||
443 | assert(LHS->getType() == RHS->getType() &&((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements" ) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 444, __PRETTY_FUNCTION__)) | ||||
444 | "Invalid CollectSingleShuffleElements")((LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements" ) ? static_cast<void> (0) : __assert_fail ("LHS->getType() == RHS->getType() && \"Invalid CollectSingleShuffleElements\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 444, __PRETTY_FUNCTION__)); | ||||
445 | unsigned NumElts = V->getType()->getVectorNumElements(); | ||||
446 | |||||
447 | if (isa<UndefValue>(V)) { | ||||
448 | Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); | ||||
449 | return true; | ||||
450 | } | ||||
451 | |||||
452 | if (V == LHS) { | ||||
453 | for (unsigned i = 0; i != NumElts; ++i) | ||||
454 | Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); | ||||
455 | return true; | ||||
456 | } | ||||
457 | |||||
458 | if (V == RHS) { | ||||
459 | for (unsigned i = 0; i != NumElts; ++i) | ||||
460 | Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), | ||||
461 | i+NumElts)); | ||||
462 | return true; | ||||
463 | } | ||||
464 | |||||
465 | if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { | ||||
466 | // If this is an insert of an extract from some other vector, include it. | ||||
467 | Value *VecOp = IEI->getOperand(0); | ||||
468 | Value *ScalarOp = IEI->getOperand(1); | ||||
469 | Value *IdxOp = IEI->getOperand(2); | ||||
470 | |||||
471 | if (!isa<ConstantInt>(IdxOp)) | ||||
472 | return false; | ||||
473 | unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); | ||||
474 | |||||
475 | if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. | ||||
476 | // We can handle this if the vector we are inserting into is | ||||
477 | // transitively ok. | ||||
478 | if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { | ||||
479 | // If so, update the mask to reflect the inserted undef. | ||||
480 | Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); | ||||
481 | return true; | ||||
482 | } | ||||
483 | } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ | ||||
484 | if (isa<ConstantInt>(EI->getOperand(1))) { | ||||
485 | unsigned ExtractedIdx = | ||||
486 | cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); | ||||
487 | unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); | ||||
488 | |||||
489 | // This must be extracting from either LHS or RHS. | ||||
490 | if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { | ||||
491 | // We can handle this if the vector we are inserting into is | ||||
492 | // transitively ok. | ||||
493 | if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { | ||||
494 | // If so, update the mask to reflect the inserted value. | ||||
495 | if (EI->getOperand(0) == LHS) { | ||||
496 | Mask[InsertedIdx % NumElts] = | ||||
497 | ConstantInt::get(Type::getInt32Ty(V->getContext()), | ||||
498 | ExtractedIdx); | ||||
499 | } else { | ||||
500 | assert(EI->getOperand(0) == RHS)((EI->getOperand(0) == RHS) ? static_cast<void> (0) : __assert_fail ("EI->getOperand(0) == RHS", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 500, __PRETTY_FUNCTION__)); | ||||
501 | Mask[InsertedIdx % NumElts] = | ||||
502 | ConstantInt::get(Type::getInt32Ty(V->getContext()), | ||||
503 | ExtractedIdx + NumLHSElts); | ||||
504 | } | ||||
505 | return true; | ||||
506 | } | ||||
507 | } | ||||
508 | } | ||||
509 | } | ||||
510 | } | ||||
511 | |||||
512 | return false; | ||||
513 | } | ||||
514 | |||||
515 | /// If we have insertion into a vector that is wider than the vector that we | ||||
516 | /// are extracting from, try to widen the source vector to allow a single | ||||
517 | /// shufflevector to replace one or more insert/extract pairs. | ||||
518 | static void replaceExtractElements(InsertElementInst *InsElt, | ||||
519 | ExtractElementInst *ExtElt, | ||||
520 | InstCombiner &IC) { | ||||
521 | VectorType *InsVecType = InsElt->getType(); | ||||
522 | VectorType *ExtVecType = ExtElt->getVectorOperandType(); | ||||
523 | unsigned NumInsElts = InsVecType->getVectorNumElements(); | ||||
524 | unsigned NumExtElts = ExtVecType->getVectorNumElements(); | ||||
525 | |||||
526 | // The inserted-to vector must be wider than the extracted-from vector. | ||||
527 | if (InsVecType->getElementType() != ExtVecType->getElementType() || | ||||
528 | NumExtElts >= NumInsElts) | ||||
529 | return; | ||||
530 | |||||
531 | // Create a shuffle mask to widen the extended-from vector using undefined | ||||
532 | // values. The mask selects all of the values of the original vector followed | ||||
533 | // by as many undefined values as needed to create a vector of the same length | ||||
534 | // as the inserted-to vector. | ||||
535 | SmallVector<Constant *, 16> ExtendMask; | ||||
536 | IntegerType *IntType = Type::getInt32Ty(InsElt->getContext()); | ||||
537 | for (unsigned i = 0; i < NumExtElts; ++i) | ||||
538 | ExtendMask.push_back(ConstantInt::get(IntType, i)); | ||||
539 | for (unsigned i = NumExtElts; i < NumInsElts; ++i) | ||||
540 | ExtendMask.push_back(UndefValue::get(IntType)); | ||||
541 | |||||
542 | Value *ExtVecOp = ExtElt->getVectorOperand(); | ||||
543 | auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); | ||||
544 | BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) | ||||
545 | ? ExtVecOpInst->getParent() | ||||
546 | : ExtElt->getParent(); | ||||
547 | |||||
548 | // TODO: This restriction matches the basic block check below when creating | ||||
549 | // new extractelement instructions. If that limitation is removed, this one | ||||
550 | // could also be removed. But for now, we just bail out to ensure that we | ||||
551 | // will replace the extractelement instruction that is feeding our | ||||
552 | // insertelement instruction. This allows the insertelement to then be | ||||
553 | // replaced by a shufflevector. If the insertelement is not replaced, we can | ||||
554 | // induce infinite looping because there's an optimization for extractelement | ||||
555 | // that will delete our widening shuffle. This would trigger another attempt | ||||
556 | // here to create that shuffle, and we spin forever. | ||||
557 | if (InsertionBlock != InsElt->getParent()) | ||||
558 | return; | ||||
559 | |||||
560 | // TODO: This restriction matches the check in visitInsertElementInst() and | ||||
561 | // prevents an infinite loop caused by not turning the extract/insert pair | ||||
562 | // into a shuffle. We really should not need either check, but we're lacking | ||||
563 | // folds for shufflevectors because we're afraid to generate shuffle masks | ||||
564 | // that the backend can't handle. | ||||
565 | if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back())) | ||||
566 | return; | ||||
567 | |||||
568 | auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), | ||||
569 | ConstantVector::get(ExtendMask)); | ||||
570 | |||||
571 | // Insert the new shuffle after the vector operand of the extract is defined | ||||
572 | // (as long as it's not a PHI) or at the start of the basic block of the | ||||
573 | // extract, so any subsequent extracts in the same basic block can use it. | ||||
574 | // TODO: Insert before the earliest ExtractElementInst that is replaced. | ||||
575 | if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) | ||||
576 | WideVec->insertAfter(ExtVecOpInst); | ||||
577 | else | ||||
578 | IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); | ||||
579 | |||||
580 | // Replace extracts from the original narrow vector with extracts from the new | ||||
581 | // wide vector. | ||||
582 | for (User *U : ExtVecOp->users()) { | ||||
583 | ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); | ||||
584 | if (!OldExt || OldExt->getParent() != WideVec->getParent()) | ||||
585 | continue; | ||||
586 | auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); | ||||
587 | NewExt->insertAfter(OldExt); | ||||
588 | IC.replaceInstUsesWith(*OldExt, NewExt); | ||||
589 | } | ||||
590 | } | ||||
591 | |||||
592 | /// We are building a shuffle to create V, which is a sequence of insertelement, | ||||
593 | /// extractelement pairs. If PermittedRHS is set, then we must either use it or | ||||
594 | /// not rely on the second vector source. Return a std::pair containing the | ||||
595 | /// left and right vectors of the proposed shuffle (or 0), and set the Mask | ||||
596 | /// parameter as required. | ||||
597 | /// | ||||
598 | /// Note: we intentionally don't try to fold earlier shuffles since they have | ||||
599 | /// often been chosen carefully to be efficiently implementable on the target. | ||||
600 | using ShuffleOps = std::pair<Value *, Value *>; | ||||
601 | |||||
602 | static ShuffleOps collectShuffleElements(Value *V, | ||||
603 | SmallVectorImpl<Constant *> &Mask, | ||||
604 | Value *PermittedRHS, | ||||
605 | InstCombiner &IC) { | ||||
606 | assert(V->getType()->isVectorTy() && "Invalid shuffle!")((V->getType()->isVectorTy() && "Invalid shuffle!" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"Invalid shuffle!\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 606, __PRETTY_FUNCTION__)); | ||||
607 | unsigned NumElts = V->getType()->getVectorNumElements(); | ||||
608 | |||||
609 | if (isa<UndefValue>(V)) { | ||||
610 | Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); | ||||
611 | return std::make_pair( | ||||
612 | PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); | ||||
613 | } | ||||
614 | |||||
615 | if (isa<ConstantAggregateZero>(V)) { | ||||
616 | Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); | ||||
617 | return std::make_pair(V, nullptr); | ||||
618 | } | ||||
619 | |||||
620 | if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { | ||||
621 | // If this is an insert of an extract from some other vector, include it. | ||||
622 | Value *VecOp = IEI->getOperand(0); | ||||
623 | Value *ScalarOp = IEI->getOperand(1); | ||||
624 | Value *IdxOp = IEI->getOperand(2); | ||||
625 | |||||
626 | if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { | ||||
627 | if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { | ||||
628 | unsigned ExtractedIdx = | ||||
629 | cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); | ||||
630 | unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); | ||||
631 | |||||
632 | // Either the extracted from or inserted into vector must be RHSVec, | ||||
633 | // otherwise we'd end up with a shuffle of three inputs. | ||||
634 | if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { | ||||
635 | Value *RHS = EI->getOperand(0); | ||||
636 | ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC); | ||||
637 | assert(LR.second == nullptr || LR.second == RHS)((LR.second == nullptr || LR.second == RHS) ? static_cast< void> (0) : __assert_fail ("LR.second == nullptr || LR.second == RHS" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 637, __PRETTY_FUNCTION__)); | ||||
638 | |||||
639 | if (LR.first->getType() != RHS->getType()) { | ||||
640 | // Although we are giving up for now, see if we can create extracts | ||||
641 | // that match the inserts for another round of combining. | ||||
642 | replaceExtractElements(IEI, EI, IC); | ||||
643 | |||||
644 | // We tried our best, but we can't find anything compatible with RHS | ||||
645 | // further up the chain. Return a trivial shuffle. | ||||
646 | for (unsigned i = 0; i < NumElts; ++i) | ||||
647 | Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); | ||||
648 | return std::make_pair(V, nullptr); | ||||
649 | } | ||||
650 | |||||
651 | unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); | ||||
652 | Mask[InsertedIdx % NumElts] = | ||||
653 | ConstantInt::get(Type::getInt32Ty(V->getContext()), | ||||
654 | NumLHSElts+ExtractedIdx); | ||||
655 | return std::make_pair(LR.first, RHS); | ||||
656 | } | ||||
657 | |||||
658 | if (VecOp == PermittedRHS) { | ||||
659 | // We've gone as far as we can: anything on the other side of the | ||||
660 | // extractelement will already have been converted into a shuffle. | ||||
661 | unsigned NumLHSElts = | ||||
662 | EI->getOperand(0)->getType()->getVectorNumElements(); | ||||
663 | for (unsigned i = 0; i != NumElts; ++i) | ||||
664 | Mask.push_back(ConstantInt::get( | ||||
665 | Type::getInt32Ty(V->getContext()), | ||||
666 | i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); | ||||
667 | return std::make_pair(EI->getOperand(0), PermittedRHS); | ||||
668 | } | ||||
669 | |||||
670 | // If this insertelement is a chain that comes from exactly these two | ||||
671 | // vectors, return the vector and the effective shuffle. | ||||
672 | if (EI->getOperand(0)->getType() == PermittedRHS->getType() && | ||||
673 | collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, | ||||
674 | Mask)) | ||||
675 | return std::make_pair(EI->getOperand(0), PermittedRHS); | ||||
676 | } | ||||
677 | } | ||||
678 | } | ||||
679 | |||||
680 | // Otherwise, we can't do anything fancy. Return an identity vector. | ||||
681 | for (unsigned i = 0; i != NumElts; ++i) | ||||
682 | Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); | ||||
683 | return std::make_pair(V, nullptr); | ||||
684 | } | ||||
685 | |||||
686 | /// Try to find redundant insertvalue instructions, like the following ones: | ||||
687 | /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 | ||||
688 | /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 | ||||
689 | /// Here the second instruction inserts values at the same indices, as the | ||||
690 | /// first one, making the first one redundant. | ||||
691 | /// It should be transformed to: | ||||
692 | /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 | ||||
693 | Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { | ||||
694 | bool IsRedundant = false; | ||||
695 | ArrayRef<unsigned int> FirstIndices = I.getIndices(); | ||||
696 | |||||
697 | // If there is a chain of insertvalue instructions (each of them except the | ||||
698 | // last one has only one use and it's another insertvalue insn from this | ||||
699 | // chain), check if any of the 'children' uses the same indices as the first | ||||
700 | // instruction. In this case, the first one is redundant. | ||||
701 | Value *V = &I; | ||||
702 | unsigned Depth = 0; | ||||
703 | while (V->hasOneUse() && Depth < 10) { | ||||
704 | User *U = V->user_back(); | ||||
705 | auto UserInsInst = dyn_cast<InsertValueInst>(U); | ||||
706 | if (!UserInsInst || U->getOperand(0) != V) | ||||
707 | break; | ||||
708 | if (UserInsInst->getIndices() == FirstIndices) { | ||||
709 | IsRedundant = true; | ||||
710 | break; | ||||
711 | } | ||||
712 | V = UserInsInst; | ||||
713 | Depth++; | ||||
714 | } | ||||
715 | |||||
716 | if (IsRedundant) | ||||
717 | return replaceInstUsesWith(I, I.getOperand(0)); | ||||
718 | return nullptr; | ||||
719 | } | ||||
720 | |||||
721 | static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { | ||||
722 | int MaskSize = Shuf.getMask()->getType()->getVectorNumElements(); | ||||
723 | int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements(); | ||||
724 | |||||
725 | // A vector select does not change the size of the operands. | ||||
726 | if (MaskSize != VecSize) | ||||
727 | return false; | ||||
728 | |||||
729 | // Each mask element must be undefined or choose a vector element from one of | ||||
730 | // the source operands without crossing vector lanes. | ||||
731 | for (int i = 0; i != MaskSize; ++i) { | ||||
732 | int Elt = Shuf.getMaskValue(i); | ||||
733 | if (Elt != -1 && Elt != i && Elt != i + VecSize) | ||||
734 | return false; | ||||
735 | } | ||||
736 | |||||
737 | return true; | ||||
738 | } | ||||
739 | |||||
740 | /// Turn a chain of inserts that splats a value into an insert + shuffle: | ||||
741 | /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... -> | ||||
742 | /// shufflevector(insertelt(X, %k, 0), undef, zero) | ||||
743 | static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { | ||||
744 | // We are interested in the last insert in a chain. So if this insert has a | ||||
745 | // single user and that user is an insert, bail. | ||||
746 | if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back())) | ||||
747 | return nullptr; | ||||
748 | |||||
749 | auto *VecTy = cast<VectorType>(InsElt.getType()); | ||||
750 | unsigned NumElements = VecTy->getNumElements(); | ||||
751 | |||||
752 | // Do not try to do this for a one-element vector, since that's a nop, | ||||
753 | // and will cause an inf-loop. | ||||
754 | if (NumElements == 1) | ||||
755 | return nullptr; | ||||
756 | |||||
757 | Value *SplatVal = InsElt.getOperand(1); | ||||
758 | InsertElementInst *CurrIE = &InsElt; | ||||
759 | SmallVector<bool, 16> ElementPresent(NumElements, false); | ||||
760 | InsertElementInst *FirstIE = nullptr; | ||||
761 | |||||
762 | // Walk the chain backwards, keeping track of which indices we inserted into, | ||||
763 | // until we hit something that isn't an insert of the splatted value. | ||||
764 | while (CurrIE) { | ||||
765 | auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2)); | ||||
766 | if (!Idx || CurrIE->getOperand(1) != SplatVal) | ||||
767 | return nullptr; | ||||
768 | |||||
769 | auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0)); | ||||
770 | // Check none of the intermediate steps have any additional uses, except | ||||
771 | // for the root insertelement instruction, which can be re-used, if it | ||||
772 | // inserts at position 0. | ||||
773 | if (CurrIE != &InsElt && | ||||
774 | (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero()))) | ||||
775 | return nullptr; | ||||
776 | |||||
777 | ElementPresent[Idx->getZExtValue()] = true; | ||||
778 | FirstIE = CurrIE; | ||||
779 | CurrIE = NextIE; | ||||
780 | } | ||||
781 | |||||
782 | // If this is just a single insertelement (not a sequence), we are done. | ||||
783 | if (FirstIE == &InsElt) | ||||
784 | return nullptr; | ||||
785 | |||||
786 | // If we are not inserting into an undef vector, make sure we've seen an | ||||
787 | // insert into every element. | ||||
788 | // TODO: If the base vector is not undef, it might be better to create a splat | ||||
789 | // and then a select-shuffle (blend) with the base vector. | ||||
790 | if (!isa<UndefValue>(FirstIE->getOperand(0))) | ||||
791 | if (any_of(ElementPresent, [](bool Present) { return !Present; })) | ||||
792 | return nullptr; | ||||
793 | |||||
794 | // Create the insert + shuffle. | ||||
795 | Type *Int32Ty = Type::getInt32Ty(InsElt.getContext()); | ||||
796 | UndefValue *UndefVec = UndefValue::get(VecTy); | ||||
797 | Constant *Zero = ConstantInt::get(Int32Ty, 0); | ||||
798 | if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero()) | ||||
799 | FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt); | ||||
800 | |||||
801 | // Splat from element 0, but replace absent elements with undef in the mask. | ||||
802 | SmallVector<Constant *, 16> Mask(NumElements, Zero); | ||||
803 | for (unsigned i = 0; i != NumElements; ++i) | ||||
804 | if (!ElementPresent[i]) | ||||
805 | Mask[i] = UndefValue::get(Int32Ty); | ||||
806 | |||||
807 | return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask)); | ||||
808 | } | ||||
809 | |||||
810 | /// Try to fold an insert element into an existing splat shuffle by changing | ||||
811 | /// the shuffle's mask to include the index of this insert element. | ||||
812 | static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { | ||||
813 | // Check if the vector operand of this insert is a canonical splat shuffle. | ||||
814 | auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); | ||||
815 | if (!Shuf || !Shuf->isZeroEltSplat()) | ||||
816 | return nullptr; | ||||
817 | |||||
818 | // Check for a constant insertion index. | ||||
819 | uint64_t IdxC; | ||||
820 | if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) | ||||
821 | return nullptr; | ||||
822 | |||||
823 | // Check if the splat shuffle's input is the same as this insert's scalar op. | ||||
824 | Value *X = InsElt.getOperand(1); | ||||
825 | Value *Op0 = Shuf->getOperand(0); | ||||
826 | if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt()))) | ||||
827 | return nullptr; | ||||
828 | |||||
829 | // Replace the shuffle mask element at the index of this insert with a zero. | ||||
830 | // For example: | ||||
831 | // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1 | ||||
832 | // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef> | ||||
833 | unsigned NumMaskElts = Shuf->getType()->getVectorNumElements(); | ||||
834 | SmallVector<Constant *, 16> NewMaskVec(NumMaskElts); | ||||
835 | Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext()); | ||||
836 | Constant *Zero = ConstantInt::getNullValue(I32Ty); | ||||
837 | for (unsigned i = 0; i != NumMaskElts; ++i) | ||||
838 | NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i); | ||||
839 | |||||
840 | Constant *NewMask = ConstantVector::get(NewMaskVec); | ||||
841 | return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); | ||||
842 | } | ||||
843 | |||||
844 | /// Try to fold an extract+insert element into an existing identity shuffle by | ||||
845 | /// changing the shuffle's mask to include the index of this insert element. | ||||
846 | static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { | ||||
847 | // Check if the vector operand of this insert is an identity shuffle. | ||||
848 | auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); | ||||
849 | if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) || | ||||
850 | !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) | ||||
851 | return nullptr; | ||||
852 | |||||
853 | // Check for a constant insertion index. | ||||
854 | uint64_t IdxC; | ||||
855 | if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) | ||||
856 | return nullptr; | ||||
857 | |||||
858 | // Check if this insert's scalar op is extracted from the identity shuffle's | ||||
859 | // input vector. | ||||
860 | Value *Scalar = InsElt.getOperand(1); | ||||
861 | Value *X = Shuf->getOperand(0); | ||||
862 | if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC)))) | ||||
863 | return nullptr; | ||||
864 | |||||
865 | // Replace the shuffle mask element at the index of this extract+insert with | ||||
866 | // that same index value. | ||||
867 | // For example: | ||||
868 | // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' | ||||
869 | unsigned NumMaskElts = Shuf->getType()->getVectorNumElements(); | ||||
870 | SmallVector<Constant *, 16> NewMaskVec(NumMaskElts); | ||||
871 | Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext()); | ||||
872 | Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC); | ||||
873 | Constant *OldMask = Shuf->getMask(); | ||||
874 | for (unsigned i = 0; i != NumMaskElts; ++i) { | ||||
875 | if (i != IdxC) { | ||||
876 | // All mask elements besides the inserted element remain the same. | ||||
877 | NewMaskVec[i] = OldMask->getAggregateElement(i); | ||||
878 | } else if (OldMask->getAggregateElement(i) == NewMaskEltC) { | ||||
879 | // If the mask element was already set, there's nothing to do | ||||
880 | // (demanded elements analysis may unset it later). | ||||
881 | return nullptr; | ||||
882 | } else { | ||||
883 | assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&((isa<UndefValue>(OldMask->getAggregateElement(i)) && "Unexpected shuffle mask element for identity shuffle") ? static_cast <void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 884, __PRETTY_FUNCTION__)) | ||||
884 | "Unexpected shuffle mask element for identity shuffle")((isa<UndefValue>(OldMask->getAggregateElement(i)) && "Unexpected shuffle mask element for identity shuffle") ? static_cast <void> (0) : __assert_fail ("isa<UndefValue>(OldMask->getAggregateElement(i)) && \"Unexpected shuffle mask element for identity shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 884, __PRETTY_FUNCTION__)); | ||||
885 | NewMaskVec[i] = NewMaskEltC; | ||||
886 | } | ||||
887 | } | ||||
888 | |||||
889 | Constant *NewMask = ConstantVector::get(NewMaskVec); | ||||
890 | return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask); | ||||
891 | } | ||||
892 | |||||
893 | /// If we have an insertelement instruction feeding into another insertelement | ||||
894 | /// and the 2nd is inserting a constant into the vector, canonicalize that | ||||
895 | /// constant insertion before the insertion of a variable: | ||||
896 | /// | ||||
897 | /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 --> | ||||
898 | /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1 | ||||
899 | /// | ||||
900 | /// This has the potential of eliminating the 2nd insertelement instruction | ||||
901 | /// via constant folding of the scalar constant into a vector constant. | ||||
902 | static Instruction *hoistInsEltConst(InsertElementInst &InsElt2, | ||||
903 | InstCombiner::BuilderTy &Builder) { | ||||
904 | auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0)); | ||||
905 | if (!InsElt1 || !InsElt1->hasOneUse()) | ||||
906 | return nullptr; | ||||
907 | |||||
908 | Value *X, *Y; | ||||
909 | Constant *ScalarC; | ||||
910 | ConstantInt *IdxC1, *IdxC2; | ||||
911 | if (match(InsElt1->getOperand(0), m_Value(X)) && | ||||
912 | match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) && | ||||
913 | match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) && | ||||
914 | match(InsElt2.getOperand(1), m_Constant(ScalarC)) && | ||||
915 | match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) { | ||||
916 | Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2); | ||||
917 | return InsertElementInst::Create(NewInsElt1, Y, IdxC1); | ||||
918 | } | ||||
919 | |||||
920 | return nullptr; | ||||
921 | } | ||||
922 | |||||
923 | /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex | ||||
924 | /// --> shufflevector X, CVec', Mask' | ||||
925 | static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { | ||||
926 | auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0)); | ||||
927 | // Bail out if the parent has more than one use. In that case, we'd be | ||||
928 | // replacing the insertelt with a shuffle, and that's not a clear win. | ||||
929 | if (!Inst || !Inst->hasOneUse()) | ||||
930 | return nullptr; | ||||
931 | if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) { | ||||
932 | // The shuffle must have a constant vector operand. The insertelt must have | ||||
933 | // a constant scalar being inserted at a constant position in the vector. | ||||
934 | Constant *ShufConstVec, *InsEltScalar; | ||||
935 | uint64_t InsEltIndex; | ||||
936 | if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || | ||||
937 | !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || | ||||
938 | !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) | ||||
939 | return nullptr; | ||||
940 | |||||
941 | // Adding an element to an arbitrary shuffle could be expensive, but a | ||||
942 | // shuffle that selects elements from vectors without crossing lanes is | ||||
943 | // assumed cheap. | ||||
944 | // If we're just adding a constant into that shuffle, it will still be | ||||
945 | // cheap. | ||||
946 | if (!isShuffleEquivalentToSelect(*Shuf)) | ||||
947 | return nullptr; | ||||
948 | |||||
949 | // From the above 'select' check, we know that the mask has the same number | ||||
950 | // of elements as the vector input operands. We also know that each constant | ||||
951 | // input element is used in its lane and can not be used more than once by | ||||
952 | // the shuffle. Therefore, replace the constant in the shuffle's constant | ||||
953 | // vector with the insertelt constant. Replace the constant in the shuffle's | ||||
954 | // mask vector with the insertelt index plus the length of the vector | ||||
955 | // (because the constant vector operand of a shuffle is always the 2nd | ||||
956 | // operand). | ||||
957 | Constant *Mask = Shuf->getMask(); | ||||
958 | unsigned NumElts = Mask->getType()->getVectorNumElements(); | ||||
959 | SmallVector<Constant *, 16> NewShufElts(NumElts); | ||||
960 | SmallVector<Constant *, 16> NewMaskElts(NumElts); | ||||
961 | for (unsigned I = 0; I != NumElts; ++I) { | ||||
962 | if (I == InsEltIndex) { | ||||
963 | NewShufElts[I] = InsEltScalar; | ||||
964 | Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); | ||||
965 | NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); | ||||
966 | } else { | ||||
967 | // Copy over the existing values. | ||||
968 | NewShufElts[I] = ShufConstVec->getAggregateElement(I); | ||||
969 | NewMaskElts[I] = Mask->getAggregateElement(I); | ||||
970 | } | ||||
971 | } | ||||
972 | |||||
973 | // Create new operands for a shuffle that includes the constant of the | ||||
974 | // original insertelt. The old shuffle will be dead now. | ||||
975 | return new ShuffleVectorInst(Shuf->getOperand(0), | ||||
976 | ConstantVector::get(NewShufElts), | ||||
977 | ConstantVector::get(NewMaskElts)); | ||||
978 | } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) { | ||||
979 | // Transform sequences of insertelements ops with constant data/indexes into | ||||
980 | // a single shuffle op. | ||||
981 | unsigned NumElts = InsElt.getType()->getNumElements(); | ||||
982 | |||||
983 | uint64_t InsertIdx[2]; | ||||
984 | Constant *Val[2]; | ||||
985 | if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || | ||||
986 | !match(InsElt.getOperand(1), m_Constant(Val[0])) || | ||||
987 | !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || | ||||
988 | !match(IEI->getOperand(1), m_Constant(Val[1]))) | ||||
989 | return nullptr; | ||||
990 | SmallVector<Constant *, 16> Values(NumElts); | ||||
991 | SmallVector<Constant *, 16> Mask(NumElts); | ||||
992 | auto ValI = std::begin(Val); | ||||
993 | // Generate new constant vector and mask. | ||||
994 | // We have 2 values/masks from the insertelements instructions. Insert them | ||||
995 | // into new value/mask vectors. | ||||
996 | for (uint64_t I : InsertIdx) { | ||||
997 | if (!Values[I]) { | ||||
998 | assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 998, __PRETTY_FUNCTION__)); | ||||
999 | Values[I] = *ValI; | ||||
1000 | Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), | ||||
1001 | NumElts + I); | ||||
1002 | } | ||||
1003 | ++ValI; | ||||
1004 | } | ||||
1005 | // Remaining values are filled with 'undef' values. | ||||
1006 | for (unsigned I = 0; I < NumElts; ++I) { | ||||
1007 | if (!Values[I]) { | ||||
1008 | assert(!Mask[I])((!Mask[I]) ? static_cast<void> (0) : __assert_fail ("!Mask[I]" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1008, __PRETTY_FUNCTION__)); | ||||
1009 | Values[I] = UndefValue::get(InsElt.getType()->getElementType()); | ||||
1010 | Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I); | ||||
1011 | } | ||||
1012 | } | ||||
1013 | // Create new operands for a shuffle that includes the constant of the | ||||
1014 | // original insertelt. | ||||
1015 | return new ShuffleVectorInst(IEI->getOperand(0), | ||||
1016 | ConstantVector::get(Values), | ||||
1017 | ConstantVector::get(Mask)); | ||||
1018 | } | ||||
1019 | return nullptr; | ||||
1020 | } | ||||
1021 | |||||
1022 | Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { | ||||
1023 | Value *VecOp = IE.getOperand(0); | ||||
1024 | Value *ScalarOp = IE.getOperand(1); | ||||
1025 | Value *IdxOp = IE.getOperand(2); | ||||
1026 | |||||
1027 | if (auto *V = SimplifyInsertElementInst( | ||||
1028 | VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) | ||||
1029 | return replaceInstUsesWith(IE, V); | ||||
1030 | |||||
1031 | // If the vector and scalar are both bitcast from the same element type, do | ||||
1032 | // the insert in that source type followed by bitcast. | ||||
1033 | Value *VecSrc, *ScalarSrc; | ||||
1034 | if (match(VecOp, m_BitCast(m_Value(VecSrc))) && | ||||
1035 | match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && | ||||
1036 | (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && | ||||
1037 | VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() && | ||||
1038 | VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) { | ||||
1039 | // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp --> | ||||
1040 | // bitcast (inselt VecSrc, ScalarSrc, IdxOp) | ||||
1041 | Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp); | ||||
1042 | return new BitCastInst(NewInsElt, IE.getType()); | ||||
1043 | } | ||||
1044 | |||||
1045 | // If the inserted element was extracted from some other vector and both | ||||
1046 | // indexes are valid constants, try to turn this into a shuffle. | ||||
1047 | uint64_t InsertedIdx, ExtractedIdx; | ||||
1048 | Value *ExtVecOp; | ||||
1049 | if (match(IdxOp, m_ConstantInt(InsertedIdx)) && | ||||
1050 | match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp), | ||||
1051 | m_ConstantInt(ExtractedIdx))) && | ||||
1052 | ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) { | ||||
1053 | // TODO: Looking at the user(s) to determine if this insert is a | ||||
1054 | // fold-to-shuffle opportunity does not match the usual instcombine | ||||
1055 | // constraints. We should decide if the transform is worthy based only | ||||
1056 | // on this instruction and its operands, but that may not work currently. | ||||
1057 | // | ||||
1058 | // Here, we are trying to avoid creating shuffles before reaching | ||||
1059 | // the end of a chain of extract-insert pairs. This is complicated because | ||||
1060 | // we do not generally form arbitrary shuffle masks in instcombine | ||||
1061 | // (because those may codegen poorly), but collectShuffleElements() does | ||||
1062 | // exactly that. | ||||
1063 | // | ||||
1064 | // The rules for determining what is an acceptable target-independent | ||||
1065 | // shuffle mask are fuzzy because they evolve based on the backend's | ||||
1066 | // capabilities and real-world impact. | ||||
1067 | auto isShuffleRootCandidate = [](InsertElementInst &Insert) { | ||||
1068 | if (!Insert.hasOneUse()) | ||||
1069 | return true; | ||||
1070 | auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back()); | ||||
1071 | if (!InsertUser) | ||||
1072 | return true; | ||||
1073 | return false; | ||||
1074 | }; | ||||
1075 | |||||
1076 | // Try to form a shuffle from a chain of extract-insert ops. | ||||
1077 | if (isShuffleRootCandidate(IE)) { | ||||
1078 | SmallVector<Constant*, 16> Mask; | ||||
1079 | ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this); | ||||
1080 | |||||
1081 | // The proposed shuffle may be trivial, in which case we shouldn't | ||||
1082 | // perform the combine. | ||||
1083 | if (LR.first != &IE && LR.second != &IE) { | ||||
1084 | // We now have a shuffle of LHS, RHS, Mask. | ||||
1085 | if (LR.second == nullptr) | ||||
1086 | LR.second = UndefValue::get(LR.first->getType()); | ||||
1087 | return new ShuffleVectorInst(LR.first, LR.second, | ||||
1088 | ConstantVector::get(Mask)); | ||||
1089 | } | ||||
1090 | } | ||||
1091 | } | ||||
1092 | |||||
1093 | unsigned VWidth = VecOp->getType()->getVectorNumElements(); | ||||
1094 | APInt UndefElts(VWidth, 0); | ||||
1095 | APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); | ||||
1096 | if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { | ||||
1097 | if (V != &IE) | ||||
1098 | return replaceInstUsesWith(IE, V); | ||||
1099 | return &IE; | ||||
1100 | } | ||||
1101 | |||||
1102 | if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) | ||||
1103 | return Shuf; | ||||
1104 | |||||
1105 | if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) | ||||
1106 | return NewInsElt; | ||||
1107 | |||||
1108 | if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE)) | ||||
1109 | return Broadcast; | ||||
1110 | |||||
1111 | if (Instruction *Splat = foldInsEltIntoSplat(IE)) | ||||
1112 | return Splat; | ||||
1113 | |||||
1114 | if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) | ||||
1115 | return IdentityShuf; | ||||
1116 | |||||
1117 | return nullptr; | ||||
1118 | } | ||||
1119 | |||||
1120 | /// Return true if we can evaluate the specified expression tree if the vector | ||||
1121 | /// elements were shuffled in a different order. | ||||
1122 | static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, | ||||
1123 | unsigned Depth = 5) { | ||||
1124 | // We can always reorder the elements of a constant. | ||||
1125 | if (isa<Constant>(V)) | ||||
1126 | return true; | ||||
1127 | |||||
1128 | // We won't reorder vector arguments. No IPO here. | ||||
1129 | Instruction *I = dyn_cast<Instruction>(V); | ||||
1130 | if (!I) return false; | ||||
1131 | |||||
1132 | // Two users may expect different orders of the elements. Don't try it. | ||||
1133 | if (!I->hasOneUse()) | ||||
1134 | return false; | ||||
1135 | |||||
1136 | if (Depth == 0) return false; | ||||
1137 | |||||
1138 | switch (I->getOpcode()) { | ||||
1139 | case Instruction::UDiv: | ||||
1140 | case Instruction::SDiv: | ||||
1141 | case Instruction::URem: | ||||
1142 | case Instruction::SRem: | ||||
1143 | // Propagating an undefined shuffle mask element to integer div/rem is not | ||||
1144 | // allowed because those opcodes can create immediate undefined behavior | ||||
1145 | // from an undefined element in an operand. | ||||
1146 | if (llvm::any_of(Mask, [](int M){ return M == -1; })) | ||||
1147 | return false; | ||||
1148 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
1149 | case Instruction::Add: | ||||
1150 | case Instruction::FAdd: | ||||
1151 | case Instruction::Sub: | ||||
1152 | case Instruction::FSub: | ||||
1153 | case Instruction::Mul: | ||||
1154 | case Instruction::FMul: | ||||
1155 | case Instruction::FDiv: | ||||
1156 | case Instruction::FRem: | ||||
1157 | case Instruction::Shl: | ||||
1158 | case Instruction::LShr: | ||||
1159 | case Instruction::AShr: | ||||
1160 | case Instruction::And: | ||||
1161 | case Instruction::Or: | ||||
1162 | case Instruction::Xor: | ||||
1163 | case Instruction::ICmp: | ||||
1164 | case Instruction::FCmp: | ||||
1165 | case Instruction::Trunc: | ||||
1166 | case Instruction::ZExt: | ||||
1167 | case Instruction::SExt: | ||||
1168 | case Instruction::FPToUI: | ||||
1169 | case Instruction::FPToSI: | ||||
1170 | case Instruction::UIToFP: | ||||
1171 | case Instruction::SIToFP: | ||||
1172 | case Instruction::FPTrunc: | ||||
1173 | case Instruction::FPExt: | ||||
1174 | case Instruction::GetElementPtr: { | ||||
1175 | // Bail out if we would create longer vector ops. We could allow creating | ||||
1176 | // longer vector ops, but that may result in more expensive codegen. | ||||
1177 | Type *ITy = I->getType(); | ||||
1178 | if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) | ||||
1179 | return false; | ||||
1180 | for (Value *Operand : I->operands()) { | ||||
1181 | if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) | ||||
1182 | return false; | ||||
1183 | } | ||||
1184 | return true; | ||||
1185 | } | ||||
1186 | case Instruction::InsertElement: { | ||||
1187 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); | ||||
1188 | if (!CI) return false; | ||||
1189 | int ElementNumber = CI->getLimitedValue(); | ||||
1190 | |||||
1191 | // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' | ||||
1192 | // can't put an element into multiple indices. | ||||
1193 | bool SeenOnce = false; | ||||
1194 | for (int i = 0, e = Mask.size(); i != e; ++i) { | ||||
1195 | if (Mask[i] == ElementNumber) { | ||||
1196 | if (SeenOnce) | ||||
1197 | return false; | ||||
1198 | SeenOnce = true; | ||||
1199 | } | ||||
1200 | } | ||||
1201 | return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1); | ||||
1202 | } | ||||
1203 | } | ||||
1204 | return false; | ||||
1205 | } | ||||
1206 | |||||
1207 | /// Rebuild a new instruction just like 'I' but with the new operands given. | ||||
1208 | /// In the event of type mismatch, the type of the operands is correct. | ||||
1209 | static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { | ||||
1210 | // We don't want to use the IRBuilder here because we want the replacement | ||||
1211 | // instructions to appear next to 'I', not the builder's insertion point. | ||||
1212 | switch (I->getOpcode()) { | ||||
1213 | case Instruction::Add: | ||||
1214 | case Instruction::FAdd: | ||||
1215 | case Instruction::Sub: | ||||
1216 | case Instruction::FSub: | ||||
1217 | case Instruction::Mul: | ||||
1218 | case Instruction::FMul: | ||||
1219 | case Instruction::UDiv: | ||||
1220 | case Instruction::SDiv: | ||||
1221 | case Instruction::FDiv: | ||||
1222 | case Instruction::URem: | ||||
1223 | case Instruction::SRem: | ||||
1224 | case Instruction::FRem: | ||||
1225 | case Instruction::Shl: | ||||
1226 | case Instruction::LShr: | ||||
1227 | case Instruction::AShr: | ||||
1228 | case Instruction::And: | ||||
1229 | case Instruction::Or: | ||||
1230 | case Instruction::Xor: { | ||||
1231 | BinaryOperator *BO = cast<BinaryOperator>(I); | ||||
1232 | assert(NewOps.size() == 2 && "binary operator with #ops != 2")((NewOps.size() == 2 && "binary operator with #ops != 2" ) ? static_cast<void> (0) : __assert_fail ("NewOps.size() == 2 && \"binary operator with #ops != 2\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1232, __PRETTY_FUNCTION__)); | ||||
1233 | BinaryOperator *New = | ||||
1234 | BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), | ||||
1235 | NewOps[0], NewOps[1], "", BO); | ||||
1236 | if (isa<OverflowingBinaryOperator>(BO)) { | ||||
1237 | New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); | ||||
1238 | New->setHasNoSignedWrap(BO->hasNoSignedWrap()); | ||||
1239 | } | ||||
1240 | if (isa<PossiblyExactOperator>(BO)) { | ||||
1241 | New->setIsExact(BO->isExact()); | ||||
1242 | } | ||||
1243 | if (isa<FPMathOperator>(BO)) | ||||
1244 | New->copyFastMathFlags(I); | ||||
1245 | return New; | ||||
1246 | } | ||||
1247 | case Instruction::ICmp: | ||||
1248 | assert(NewOps.size() == 2 && "icmp with #ops != 2")((NewOps.size() == 2 && "icmp with #ops != 2") ? static_cast <void> (0) : __assert_fail ("NewOps.size() == 2 && \"icmp with #ops != 2\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1248, __PRETTY_FUNCTION__)); | ||||
1249 | return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), | ||||
1250 | NewOps[0], NewOps[1]); | ||||
1251 | case Instruction::FCmp: | ||||
1252 | assert(NewOps.size() == 2 && "fcmp with #ops != 2")((NewOps.size() == 2 && "fcmp with #ops != 2") ? static_cast <void> (0) : __assert_fail ("NewOps.size() == 2 && \"fcmp with #ops != 2\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1252, __PRETTY_FUNCTION__)); | ||||
1253 | return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), | ||||
1254 | NewOps[0], NewOps[1]); | ||||
1255 | case Instruction::Trunc: | ||||
1256 | case Instruction::ZExt: | ||||
1257 | case Instruction::SExt: | ||||
1258 | case Instruction::FPToUI: | ||||
1259 | case Instruction::FPToSI: | ||||
1260 | case Instruction::UIToFP: | ||||
1261 | case Instruction::SIToFP: | ||||
1262 | case Instruction::FPTrunc: | ||||
1263 | case Instruction::FPExt: { | ||||
1264 | // It's possible that the mask has a different number of elements from | ||||
1265 | // the original cast. We recompute the destination type to match the mask. | ||||
1266 | Type *DestTy = | ||||
1267 | VectorType::get(I->getType()->getScalarType(), | ||||
1268 | NewOps[0]->getType()->getVectorNumElements()); | ||||
1269 | assert(NewOps.size() == 1 && "cast with #ops != 1")((NewOps.size() == 1 && "cast with #ops != 1") ? static_cast <void> (0) : __assert_fail ("NewOps.size() == 1 && \"cast with #ops != 1\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1269, __PRETTY_FUNCTION__)); | ||||
1270 | return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, | ||||
1271 | "", I); | ||||
1272 | } | ||||
1273 | case Instruction::GetElementPtr: { | ||||
1274 | Value *Ptr = NewOps[0]; | ||||
1275 | ArrayRef<Value*> Idx = NewOps.slice(1); | ||||
1276 | GetElementPtrInst *GEP = GetElementPtrInst::Create( | ||||
1277 | cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); | ||||
1278 | GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); | ||||
1279 | return GEP; | ||||
1280 | } | ||||
1281 | } | ||||
1282 | llvm_unreachable("failed to rebuild vector instructions")::llvm::llvm_unreachable_internal("failed to rebuild vector instructions" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1282); | ||||
1283 | } | ||||
1284 | |||||
1285 | static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { | ||||
1286 | // Mask.size() does not need to be equal to the number of vector elements. | ||||
1287 | |||||
1288 | assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")((V->getType()->isVectorTy() && "can't reorder non-vector elements" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isVectorTy() && \"can't reorder non-vector elements\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1288, __PRETTY_FUNCTION__)); | ||||
1289 | Type *EltTy = V->getType()->getScalarType(); | ||||
1290 | Type *I32Ty = IntegerType::getInt32Ty(V->getContext()); | ||||
1291 | if (isa<UndefValue>(V)) | ||||
1292 | return UndefValue::get(VectorType::get(EltTy, Mask.size())); | ||||
1293 | |||||
1294 | if (isa<ConstantAggregateZero>(V)) | ||||
1295 | return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size())); | ||||
1296 | |||||
1297 | if (Constant *C = dyn_cast<Constant>(V)) { | ||||
1298 | SmallVector<Constant *, 16> MaskValues; | ||||
1299 | for (int i = 0, e = Mask.size(); i != e; ++i) { | ||||
1300 | if (Mask[i] == -1) | ||||
1301 | MaskValues.push_back(UndefValue::get(I32Ty)); | ||||
1302 | else | ||||
1303 | MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i])); | ||||
1304 | } | ||||
1305 | return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), | ||||
1306 | ConstantVector::get(MaskValues)); | ||||
1307 | } | ||||
1308 | |||||
1309 | Instruction *I = cast<Instruction>(V); | ||||
1310 | switch (I->getOpcode()) { | ||||
1311 | case Instruction::Add: | ||||
1312 | case Instruction::FAdd: | ||||
1313 | case Instruction::Sub: | ||||
1314 | case Instruction::FSub: | ||||
1315 | case Instruction::Mul: | ||||
1316 | case Instruction::FMul: | ||||
1317 | case Instruction::UDiv: | ||||
1318 | case Instruction::SDiv: | ||||
1319 | case Instruction::FDiv: | ||||
1320 | case Instruction::URem: | ||||
1321 | case Instruction::SRem: | ||||
1322 | case Instruction::FRem: | ||||
1323 | case Instruction::Shl: | ||||
1324 | case Instruction::LShr: | ||||
1325 | case Instruction::AShr: | ||||
1326 | case Instruction::And: | ||||
1327 | case Instruction::Or: | ||||
1328 | case Instruction::Xor: | ||||
1329 | case Instruction::ICmp: | ||||
1330 | case Instruction::FCmp: | ||||
1331 | case Instruction::Trunc: | ||||
1332 | case Instruction::ZExt: | ||||
1333 | case Instruction::SExt: | ||||
1334 | case Instruction::FPToUI: | ||||
1335 | case Instruction::FPToSI: | ||||
1336 | case Instruction::UIToFP: | ||||
1337 | case Instruction::SIToFP: | ||||
1338 | case Instruction::FPTrunc: | ||||
1339 | case Instruction::FPExt: | ||||
1340 | case Instruction::Select: | ||||
1341 | case Instruction::GetElementPtr: { | ||||
1342 | SmallVector<Value*, 8> NewOps; | ||||
1343 | bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); | ||||
1344 | for (int i = 0, e = I->getNumOperands(); i != e; ++i) { | ||||
1345 | Value *V; | ||||
1346 | // Recursively call evaluateInDifferentElementOrder on vector arguments | ||||
1347 | // as well. E.g. GetElementPtr may have scalar operands even if the | ||||
1348 | // return value is a vector, so we need to examine the operand type. | ||||
1349 | if (I->getOperand(i)->getType()->isVectorTy()) | ||||
1350 | V = evaluateInDifferentElementOrder(I->getOperand(i), Mask); | ||||
1351 | else | ||||
1352 | V = I->getOperand(i); | ||||
1353 | NewOps.push_back(V); | ||||
1354 | NeedsRebuild |= (V != I->getOperand(i)); | ||||
1355 | } | ||||
1356 | if (NeedsRebuild) { | ||||
1357 | return buildNew(I, NewOps); | ||||
1358 | } | ||||
1359 | return I; | ||||
1360 | } | ||||
1361 | case Instruction::InsertElement: { | ||||
1362 | int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); | ||||
1363 | |||||
1364 | // The insertelement was inserting at Element. Figure out which element | ||||
1365 | // that becomes after shuffling. The answer is guaranteed to be unique | ||||
1366 | // by CanEvaluateShuffled. | ||||
1367 | bool Found = false; | ||||
1368 | int Index = 0; | ||||
1369 | for (int e = Mask.size(); Index != e; ++Index) { | ||||
1370 | if (Mask[Index] == Element) { | ||||
1371 | Found = true; | ||||
1372 | break; | ||||
1373 | } | ||||
1374 | } | ||||
1375 | |||||
1376 | // If element is not in Mask, no need to handle the operand 1 (element to | ||||
1377 | // be inserted). Just evaluate values in operand 0 according to Mask. | ||||
1378 | if (!Found) | ||||
1379 | return evaluateInDifferentElementOrder(I->getOperand(0), Mask); | ||||
1380 | |||||
1381 | Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask); | ||||
1382 | return InsertElementInst::Create(V, I->getOperand(1), | ||||
1383 | ConstantInt::get(I32Ty, Index), "", I); | ||||
1384 | } | ||||
1385 | } | ||||
1386 | llvm_unreachable("failed to reorder elements of vector instruction!")::llvm::llvm_unreachable_internal("failed to reorder elements of vector instruction!" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1386); | ||||
1387 | } | ||||
1388 | |||||
1389 | // Returns true if the shuffle is extracting a contiguous range of values from | ||||
1390 | // LHS, for example: | ||||
1391 | // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||||
1392 | // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| | ||||
1393 | // Shuffles to: |EE|FF|GG|HH| | ||||
1394 | // +--+--+--+--+ | ||||
1395 | static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, | ||||
1396 | SmallVector<int, 16> &Mask) { | ||||
1397 | unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements(); | ||||
1398 | unsigned MaskElems = Mask.size(); | ||||
1399 | unsigned BegIdx = Mask.front(); | ||||
1400 | unsigned EndIdx = Mask.back(); | ||||
1401 | if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) | ||||
1402 | return false; | ||||
1403 | for (unsigned I = 0; I != MaskElems; ++I) | ||||
1404 | if (static_cast<unsigned>(Mask[I]) != BegIdx + I) | ||||
1405 | return false; | ||||
1406 | return true; | ||||
1407 | } | ||||
1408 | |||||
1409 | /// These are the ingredients in an alternate form binary operator as described | ||||
1410 | /// below. | ||||
1411 | struct BinopElts { | ||||
1412 | BinaryOperator::BinaryOps Opcode; | ||||
1413 | Value *Op0; | ||||
1414 | Value *Op1; | ||||
1415 | BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0, | ||||
1416 | Value *V0 = nullptr, Value *V1 = nullptr) : | ||||
1417 | Opcode(Opc), Op0(V0), Op1(V1) {} | ||||
1418 | operator bool() const { return Opcode != 0; } | ||||
1419 | }; | ||||
1420 | |||||
1421 | /// Binops may be transformed into binops with different opcodes and operands. | ||||
1422 | /// Reverse the usual canonicalization to enable folds with the non-canonical | ||||
1423 | /// form of the binop. If a transform is possible, return the elements of the | ||||
1424 | /// new binop. If not, return invalid elements. | ||||
1425 | static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { | ||||
1426 | Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1); | ||||
1427 | Type *Ty = BO->getType(); | ||||
1428 | switch (BO->getOpcode()) { | ||||
1429 | case Instruction::Shl: { | ||||
1430 | // shl X, C --> mul X, (1 << C) | ||||
1431 | Constant *C; | ||||
1432 | if (match(BO1, m_Constant(C))) { | ||||
1433 | Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C); | ||||
1434 | return { Instruction::Mul, BO0, ShlOne }; | ||||
1435 | } | ||||
1436 | break; | ||||
1437 | } | ||||
1438 | case Instruction::Or: { | ||||
1439 | // or X, C --> add X, C (when X and C have no common bits set) | ||||
1440 | const APInt *C; | ||||
1441 | if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL)) | ||||
1442 | return { Instruction::Add, BO0, BO1 }; | ||||
1443 | break; | ||||
1444 | } | ||||
1445 | default: | ||||
1446 | break; | ||||
1447 | } | ||||
1448 | return {}; | ||||
1449 | } | ||||
1450 | |||||
1451 | static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { | ||||
1452 | assert(Shuf.isSelect() && "Must have select-equivalent shuffle")((Shuf.isSelect() && "Must have select-equivalent shuffle" ) ? static_cast<void> (0) : __assert_fail ("Shuf.isSelect() && \"Must have select-equivalent shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1452, __PRETTY_FUNCTION__)); | ||||
1453 | |||||
1454 | // Are we shuffling together some value and that same value after it has been | ||||
1455 | // modified by a binop with a constant? | ||||
1456 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||
1457 | Constant *C; | ||||
1458 | bool Op0IsBinop; | ||||
1459 | if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C)))) | ||||
1460 | Op0IsBinop = true; | ||||
1461 | else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C)))) | ||||
1462 | Op0IsBinop = false; | ||||
1463 | else | ||||
1464 | return nullptr; | ||||
1465 | |||||
1466 | // The identity constant for a binop leaves a variable operand unchanged. For | ||||
1467 | // a vector, this is a splat of something like 0, -1, or 1. | ||||
1468 | // If there's no identity constant for this binop, we're done. | ||||
1469 | auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1); | ||||
1470 | BinaryOperator::BinaryOps BOpcode = BO->getOpcode(); | ||||
1471 | Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true); | ||||
1472 | if (!IdC) | ||||
1473 | return nullptr; | ||||
1474 | |||||
1475 | // Shuffle identity constants into the lanes that return the original value. | ||||
1476 | // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4} | ||||
1477 | // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4} | ||||
1478 | // The existing binop constant vector remains in the same operand position. | ||||
1479 | Constant *Mask = Shuf.getMask(); | ||||
1480 | Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) : | ||||
1481 | ConstantExpr::getShuffleVector(IdC, C, Mask); | ||||
1482 | |||||
1483 | bool MightCreatePoisonOrUB = | ||||
1484 | Mask->containsUndefElement() && | ||||
1485 | (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); | ||||
1486 | if (MightCreatePoisonOrUB) | ||||
1487 | NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true); | ||||
1488 | |||||
1489 | // shuf (bop X, C), X, M --> bop X, C' | ||||
1490 | // shuf X, (bop X, C), M --> bop X, C' | ||||
1491 | Value *X = Op0IsBinop ? Op1 : Op0; | ||||
1492 | Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC); | ||||
1493 | NewBO->copyIRFlags(BO); | ||||
1494 | |||||
1495 | // An undef shuffle mask element may propagate as an undef constant element in | ||||
1496 | // the new binop. That would produce poison where the original code might not. | ||||
1497 | // If we already made a safe constant, then there's no danger. | ||||
1498 | if (Mask->containsUndefElement() && !MightCreatePoisonOrUB) | ||||
1499 | NewBO->dropPoisonGeneratingFlags(); | ||||
1500 | return NewBO; | ||||
1501 | } | ||||
1502 | |||||
1503 | /// If we have an insert of a scalar to a non-zero element of an undefined | ||||
1504 | /// vector and then shuffle that value, that's the same as inserting to the zero | ||||
1505 | /// element and shuffling. Splatting from the zero element is recognized as the | ||||
1506 | /// canonical form of splat. | ||||
1507 | static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, | ||||
1508 | InstCombiner::BuilderTy &Builder) { | ||||
1509 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||
1510 | Constant *Mask = Shuf.getMask(); | ||||
1511 | Value *X; | ||||
1512 | uint64_t IndexC; | ||||
1513 | |||||
1514 | // Match a shuffle that is a splat to a non-zero element. | ||||
1515 | if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X), | ||||
1516 | m_ConstantInt(IndexC)))) || | ||||
1517 | !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0) | ||||
1518 | return nullptr; | ||||
1519 | |||||
1520 | // Insert into element 0 of an undef vector. | ||||
1521 | UndefValue *UndefVec = UndefValue::get(Shuf.getType()); | ||||
1522 | Constant *Zero = Builder.getInt32(0); | ||||
1523 | Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero); | ||||
1524 | |||||
1525 | // Splat from element 0. Any mask element that is undefined remains undefined. | ||||
1526 | // For example: | ||||
1527 | // shuf (inselt undef, X, 2), undef, <2,2,undef> | ||||
1528 | // --> shuf (inselt undef, X, 0), undef, <0,0,undef> | ||||
1529 | unsigned NumMaskElts = Shuf.getType()->getVectorNumElements(); | ||||
1530 | SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero); | ||||
1531 | for (unsigned i = 0; i != NumMaskElts; ++i) | ||||
1532 | if (isa<UndefValue>(Mask->getAggregateElement(i))) | ||||
1533 | NewMask[i] = Mask->getAggregateElement(i); | ||||
1534 | |||||
1535 | return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask)); | ||||
1536 | } | ||||
1537 | |||||
1538 | /// Try to fold shuffles that are the equivalent of a vector select. | ||||
1539 | static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, | ||||
1540 | InstCombiner::BuilderTy &Builder, | ||||
1541 | const DataLayout &DL) { | ||||
1542 | if (!Shuf.isSelect()) | ||||
1543 | return nullptr; | ||||
1544 | |||||
1545 | // Canonicalize to choose from operand 0 first unless operand 1 is undefined. | ||||
1546 | // Commuting undef to operand 0 conflicts with another canonicalization. | ||||
1547 | unsigned NumElts = Shuf.getType()->getVectorNumElements(); | ||||
1548 | if (!isa<UndefValue>(Shuf.getOperand(1)) && | ||||
1549 | Shuf.getMaskValue(0) >= (int)NumElts) { | ||||
1550 | // TODO: Can we assert that both operands of a shuffle-select are not undef | ||||
1551 | // (otherwise, it would have been folded by instsimplify? | ||||
1552 | Shuf.commute(); | ||||
1553 | return &Shuf; | ||||
1554 | } | ||||
1555 | |||||
1556 | if (Instruction *I = foldSelectShuffleWith1Binop(Shuf)) | ||||
1557 | return I; | ||||
1558 | |||||
1559 | BinaryOperator *B0, *B1; | ||||
1560 | if (!match(Shuf.getOperand(0), m_BinOp(B0)) || | ||||
1561 | !match(Shuf.getOperand(1), m_BinOp(B1))) | ||||
1562 | return nullptr; | ||||
1563 | |||||
1564 | Value *X, *Y; | ||||
1565 | Constant *C0, *C1; | ||||
1566 | bool ConstantsAreOp1; | ||||
1567 | if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) && | ||||
1568 | match(B1, m_BinOp(m_Value(Y), m_Constant(C1)))) | ||||
1569 | ConstantsAreOp1 = true; | ||||
1570 | else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) && | ||||
1571 | match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))) | ||||
1572 | ConstantsAreOp1 = false; | ||||
1573 | else | ||||
1574 | return nullptr; | ||||
1575 | |||||
1576 | // We need matching binops to fold the lanes together. | ||||
1577 | BinaryOperator::BinaryOps Opc0 = B0->getOpcode(); | ||||
1578 | BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); | ||||
1579 | bool DropNSW = false; | ||||
1580 | if (ConstantsAreOp1 && Opc0 != Opc1) { | ||||
1581 | // TODO: We drop "nsw" if shift is converted into multiply because it may | ||||
1582 | // not be correct when the shift amount is BitWidth - 1. We could examine | ||||
1583 | // each vector element to determine if it is safe to keep that flag. | ||||
1584 | if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl) | ||||
1585 | DropNSW = true; | ||||
1586 | if (BinopElts AltB0 = getAlternateBinop(B0, DL)) { | ||||
1587 | assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop" ) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB0.Op1) && \"Expecting constant with alt binop\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1587, __PRETTY_FUNCTION__)); | ||||
1588 | Opc0 = AltB0.Opcode; | ||||
1589 | C0 = cast<Constant>(AltB0.Op1); | ||||
1590 | } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) { | ||||
1591 | assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")((isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop" ) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(AltB1.Op1) && \"Expecting constant with alt binop\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1591, __PRETTY_FUNCTION__)); | ||||
1592 | Opc1 = AltB1.Opcode; | ||||
1593 | C1 = cast<Constant>(AltB1.Op1); | ||||
1594 | } | ||||
1595 | } | ||||
1596 | |||||
1597 | if (Opc0 != Opc1) | ||||
1598 | return nullptr; | ||||
1599 | |||||
1600 | // The opcodes must be the same. Use a new name to make that clear. | ||||
1601 | BinaryOperator::BinaryOps BOpc = Opc0; | ||||
1602 | |||||
1603 | // Select the constant elements needed for the single binop. | ||||
1604 | Constant *Mask = Shuf.getMask(); | ||||
1605 | Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); | ||||
1606 | |||||
1607 | // We are moving a binop after a shuffle. When a shuffle has an undefined | ||||
1608 | // mask element, the result is undefined, but it is not poison or undefined | ||||
1609 | // behavior. That is not necessarily true for div/rem/shift. | ||||
1610 | bool MightCreatePoisonOrUB = | ||||
1611 | Mask->containsUndefElement() && | ||||
1612 | (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); | ||||
1613 | if (MightCreatePoisonOrUB) | ||||
1614 | NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1); | ||||
1615 | |||||
1616 | Value *V; | ||||
1617 | if (X == Y) { | ||||
1618 | // Remove a binop and the shuffle by rearranging the constant: | ||||
1619 | // shuffle (op V, C0), (op V, C1), M --> op V, C' | ||||
1620 | // shuffle (op C0, V), (op C1, V), M --> op C', V | ||||
1621 | V = X; | ||||
1622 | } else { | ||||
1623 | // If there are 2 different variable operands, we must create a new shuffle | ||||
1624 | // (select) first, so check uses to ensure that we don't end up with more | ||||
1625 | // instructions than we started with. | ||||
1626 | if (!B0->hasOneUse() && !B1->hasOneUse()) | ||||
1627 | return nullptr; | ||||
1628 | |||||
1629 | // If we use the original shuffle mask and op1 is *variable*, we would be | ||||
1630 | // putting an undef into operand 1 of div/rem/shift. This is either UB or | ||||
1631 | // poison. We do not have to guard against UB when *constants* are op1 | ||||
1632 | // because safe constants guarantee that we do not overflow sdiv/srem (and | ||||
1633 | // there's no danger for other opcodes). | ||||
1634 | // TODO: To allow this case, create a new shuffle mask with no undefs. | ||||
1635 | if (MightCreatePoisonOrUB && !ConstantsAreOp1) | ||||
1636 | return nullptr; | ||||
1637 | |||||
1638 | // Note: In general, we do not create new shuffles in InstCombine because we | ||||
1639 | // do not know if a target can lower an arbitrary shuffle optimally. In this | ||||
1640 | // case, the shuffle uses the existing mask, so there is no additional risk. | ||||
1641 | |||||
1642 | // Select the variable vectors first, then perform the binop: | ||||
1643 | // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' | ||||
1644 | // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) | ||||
1645 | V = Builder.CreateShuffleVector(X, Y, Mask); | ||||
1646 | } | ||||
1647 | |||||
1648 | Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) : | ||||
1649 | BinaryOperator::Create(BOpc, NewC, V); | ||||
1650 | |||||
1651 | // Flags are intersected from the 2 source binops. But there are 2 exceptions: | ||||
1652 | // 1. If we changed an opcode, poison conditions might have changed. | ||||
1653 | // 2. If the shuffle had undef mask elements, the new binop might have undefs | ||||
1654 | // where the original code did not. But if we already made a safe constant, | ||||
1655 | // then there's no danger. | ||||
1656 | NewBO->copyIRFlags(B0); | ||||
1657 | NewBO->andIRFlags(B1); | ||||
1658 | if (DropNSW) | ||||
1659 | NewBO->setHasNoSignedWrap(false); | ||||
1660 | if (Mask->containsUndefElement() && !MightCreatePoisonOrUB) | ||||
1661 | NewBO->dropPoisonGeneratingFlags(); | ||||
1662 | return NewBO; | ||||
1663 | } | ||||
1664 | |||||
1665 | /// Match a shuffle-select-shuffle pattern where the shuffles are widening and | ||||
1666 | /// narrowing (concatenating with undef and extracting back to the original | ||||
1667 | /// length). This allows replacing the wide select with a narrow select. | ||||
1668 | static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, | ||||
1669 | InstCombiner::BuilderTy &Builder) { | ||||
1670 | // This must be a narrowing identity shuffle. It extracts the 1st N elements | ||||
1671 | // of the 1st vector operand of a shuffle. | ||||
1672 | if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract()) | ||||
1673 | return nullptr; | ||||
1674 | |||||
1675 | // The vector being shuffled must be a vector select that we can eliminate. | ||||
1676 | // TODO: The one-use requirement could be eased if X and/or Y are constants. | ||||
1677 | Value *Cond, *X, *Y; | ||||
1678 | if (!match(Shuf.getOperand(0), | ||||
1679 | m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y))))) | ||||
1680 | return nullptr; | ||||
1681 | |||||
1682 | // We need a narrow condition value. It must be extended with undef elements | ||||
1683 | // and have the same number of elements as this shuffle. | ||||
1684 | unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements(); | ||||
1685 | Value *NarrowCond; | ||||
1686 | if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(), | ||||
1687 | m_Constant()))) || | ||||
1688 | NarrowCond->getType()->getVectorNumElements() != NarrowNumElts || | ||||
1689 | !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding()) | ||||
1690 | return nullptr; | ||||
1691 | |||||
1692 | // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) --> | ||||
1693 | // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask) | ||||
1694 | Value *Undef = UndefValue::get(X->getType()); | ||||
1695 | Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask()); | ||||
1696 | Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask()); | ||||
1697 | return SelectInst::Create(NarrowCond, NarrowX, NarrowY); | ||||
1698 | } | ||||
1699 | |||||
1700 | /// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. | ||||
1701 | static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { | ||||
1702 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||
1703 | if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1)) | ||||
1704 | return nullptr; | ||||
1705 | |||||
1706 | Value *X, *Y; | ||||
1707 | Constant *Mask; | ||||
1708 | if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask)))) | ||||
1709 | return nullptr; | ||||
1710 | |||||
1711 | // Be conservative with shuffle transforms. If we can't kill the 1st shuffle, | ||||
1712 | // then combining may result in worse codegen. | ||||
1713 | if (!Op0->hasOneUse()) | ||||
1714 | return nullptr; | ||||
1715 | |||||
1716 | // We are extracting a subvector from a shuffle. Remove excess elements from | ||||
1717 | // the 1st shuffle mask to eliminate the extract. | ||||
1718 | // | ||||
1719 | // This transform is conservatively limited to identity extracts because we do | ||||
1720 | // not allow arbitrary shuffle mask creation as a target-independent transform | ||||
1721 | // (because we can't guarantee that will lower efficiently). | ||||
1722 | // | ||||
1723 | // If the extracting shuffle has an undef mask element, it transfers to the | ||||
1724 | // new shuffle mask. Otherwise, copy the original mask element. Example: | ||||
1725 | // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> --> | ||||
1726 | // shuf X, Y, <C0, undef, C2, undef> | ||||
1727 | unsigned NumElts = Shuf.getType()->getVectorNumElements(); | ||||
1728 | SmallVector<Constant *, 16> NewMask(NumElts); | ||||
1729 | assert(NumElts < Mask->getType()->getVectorNumElements() &&((NumElts < Mask->getType()->getVectorNumElements() && "Identity with extract must have less elements than its inputs" ) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1730, __PRETTY_FUNCTION__)) | ||||
1730 | "Identity with extract must have less elements than its inputs")((NumElts < Mask->getType()->getVectorNumElements() && "Identity with extract must have less elements than its inputs" ) ? static_cast<void> (0) : __assert_fail ("NumElts < Mask->getType()->getVectorNumElements() && \"Identity with extract must have less elements than its inputs\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1730, __PRETTY_FUNCTION__)); | ||||
1731 | |||||
1732 | for (unsigned i = 0; i != NumElts; ++i) { | ||||
1733 | Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i); | ||||
1734 | Constant *MaskElt = Mask->getAggregateElement(i); | ||||
1735 | NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt; | ||||
1736 | } | ||||
1737 | return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); | ||||
1738 | } | ||||
1739 | |||||
1740 | /// Try to replace a shuffle with an insertelement or try to replace a shuffle | ||||
1741 | /// operand with the operand of an insertelement. | ||||
1742 | static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, | ||||
1743 | InstCombiner &IC) { | ||||
1744 | Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1); | ||||
1745 | SmallVector<int, 16> Mask = Shuf.getShuffleMask(); | ||||
1746 | |||||
1747 | // The shuffle must not change vector sizes. | ||||
1748 | // TODO: This restriction could be removed if the insert has only one use | ||||
1749 | // (because the transform would require a new length-changing shuffle). | ||||
1750 | int NumElts = Mask.size(); | ||||
1751 | if (NumElts != (int)(V0->getType()->getVectorNumElements())) | ||||
1752 | return nullptr; | ||||
1753 | |||||
1754 | // This is a specialization of a fold in SimplifyDemandedVectorElts. We may | ||||
1755 | // not be able to handle it there if the insertelement has >1 use. | ||||
1756 | // If the shuffle has an insertelement operand but does not choose the | ||||
1757 | // inserted scalar element from that value, then we can replace that shuffle | ||||
1758 | // operand with the source vector of the insertelement. | ||||
1759 | Value *X; | ||||
1760 | uint64_t IdxC; | ||||
1761 | if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { | ||||
1762 | // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask | ||||
1763 | if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) | ||||
1764 | return IC.replaceOperand(Shuf, 0, X); | ||||
1765 | } | ||||
1766 | if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { | ||||
1767 | // Offset the index constant by the vector width because we are checking for | ||||
1768 | // accesses to the 2nd vector input of the shuffle. | ||||
1769 | IdxC += NumElts; | ||||
1770 | // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask | ||||
1771 | if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; })) | ||||
1772 | return IC.replaceOperand(Shuf, 1, X); | ||||
1773 | } | ||||
1774 | |||||
1775 | // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' | ||||
1776 | auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { | ||||
1777 | // We need an insertelement with a constant index. | ||||
1778 | if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar), | ||||
1779 | m_ConstantInt(IndexC)))) | ||||
1780 | return false; | ||||
1781 | |||||
1782 | // Test the shuffle mask to see if it splices the inserted scalar into the | ||||
1783 | // operand 1 vector of the shuffle. | ||||
1784 | int NewInsIndex = -1; | ||||
1785 | for (int i = 0; i != NumElts; ++i) { | ||||
1786 | // Ignore undef mask elements. | ||||
1787 | if (Mask[i] == -1) | ||||
1788 | continue; | ||||
1789 | |||||
1790 | // The shuffle takes elements of operand 1 without lane changes. | ||||
1791 | if (Mask[i] == NumElts + i) | ||||
1792 | continue; | ||||
1793 | |||||
1794 | // The shuffle must choose the inserted scalar exactly once. | ||||
1795 | if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue()) | ||||
1796 | return false; | ||||
1797 | |||||
1798 | // The shuffle is placing the inserted scalar into element i. | ||||
1799 | NewInsIndex = i; | ||||
1800 | } | ||||
1801 | |||||
1802 | assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")((NewInsIndex != -1 && "Did not fold shuffle with unused operand?" ) ? static_cast<void> (0) : __assert_fail ("NewInsIndex != -1 && \"Did not fold shuffle with unused operand?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1802, __PRETTY_FUNCTION__)); | ||||
1803 | |||||
1804 | // Index is updated to the potentially translated insertion lane. | ||||
1805 | IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex); | ||||
1806 | return true; | ||||
1807 | }; | ||||
1808 | |||||
1809 | // If the shuffle is unnecessary, insert the scalar operand directly into | ||||
1810 | // operand 1 of the shuffle. Example: | ||||
1811 | // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0 | ||||
1812 | Value *Scalar; | ||||
1813 | ConstantInt *IndexC; | ||||
1814 | if (isShufflingScalarIntoOp1(Scalar, IndexC)) | ||||
1815 | return InsertElementInst::Create(V1, Scalar, IndexC); | ||||
1816 | |||||
1817 | // Try again after commuting shuffle. Example: | ||||
1818 | // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> --> | ||||
1819 | // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3 | ||||
1820 | std::swap(V0, V1); | ||||
1821 | ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); | ||||
1822 | if (isShufflingScalarIntoOp1(Scalar, IndexC)) | ||||
1823 | return InsertElementInst::Create(V1, Scalar, IndexC); | ||||
1824 | |||||
1825 | return nullptr; | ||||
1826 | } | ||||
1827 | |||||
1828 | static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { | ||||
1829 | // Match the operands as identity with padding (also known as concatenation | ||||
1830 | // with undef) shuffles of the same source type. The backend is expected to | ||||
1831 | // recreate these concatenations from a shuffle of narrow operands. | ||||
1832 | auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0)); | ||||
1833 | auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1)); | ||||
1834 | if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || | ||||
1835 | !Shuffle1 || !Shuffle1->isIdentityWithPadding()) | ||||
1836 | return nullptr; | ||||
1837 | |||||
1838 | // We limit this transform to power-of-2 types because we expect that the | ||||
1839 | // backend can convert the simplified IR patterns to identical nodes as the | ||||
1840 | // original IR. | ||||
1841 | // TODO: If we can verify the same behavior for arbitrary types, the | ||||
1842 | // power-of-2 checks can be removed. | ||||
1843 | Value *X = Shuffle0->getOperand(0); | ||||
1844 | Value *Y = Shuffle1->getOperand(0); | ||||
1845 | if (X->getType() != Y->getType() || | ||||
1846 | !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) || | ||||
1847 | !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) || | ||||
1848 | !isPowerOf2_32(X->getType()->getVectorNumElements()) || | ||||
1849 | isa<UndefValue>(X) || isa<UndefValue>(Y)) | ||||
1850 | return nullptr; | ||||
1851 | assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && "Unexpected operand for identity shuffle") ? static_cast< void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1853, __PRETTY_FUNCTION__)) | ||||
1852 | isa<UndefValue>(Shuffle1->getOperand(1)) &&((isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && "Unexpected operand for identity shuffle") ? static_cast< void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1853, __PRETTY_FUNCTION__)) | ||||
1853 | "Unexpected operand for identity shuffle")((isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && "Unexpected operand for identity shuffle") ? static_cast< void> (0) : __assert_fail ("isa<UndefValue>(Shuffle0->getOperand(1)) && isa<UndefValue>(Shuffle1->getOperand(1)) && \"Unexpected operand for identity shuffle\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1853, __PRETTY_FUNCTION__)); | ||||
1854 | |||||
1855 | // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source | ||||
1856 | // operands directly by adjusting the shuffle mask to account for the narrower | ||||
1857 | // types: | ||||
1858 | // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' | ||||
1859 | int NarrowElts = X->getType()->getVectorNumElements(); | ||||
1860 | int WideElts = Shuffle0->getType()->getVectorNumElements(); | ||||
1861 | assert(WideElts > NarrowElts && "Unexpected types for identity with padding")((WideElts > NarrowElts && "Unexpected types for identity with padding" ) ? static_cast<void> (0) : __assert_fail ("WideElts > NarrowElts && \"Unexpected types for identity with padding\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1861, __PRETTY_FUNCTION__)); | ||||
1862 | |||||
1863 | Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext()); | ||||
1864 | SmallVector<int, 16> Mask = Shuf.getShuffleMask(); | ||||
1865 | SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty)); | ||||
1866 | for (int i = 0, e = Mask.size(); i != e; ++i) { | ||||
1867 | if (Mask[i] == -1) | ||||
1868 | continue; | ||||
1869 | |||||
1870 | // If this shuffle is choosing an undef element from 1 of the sources, that | ||||
1871 | // element is undef. | ||||
1872 | if (Mask[i] < WideElts) { | ||||
1873 | if (Shuffle0->getMaskValue(Mask[i]) == -1) | ||||
1874 | continue; | ||||
1875 | } else { | ||||
1876 | if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1) | ||||
1877 | continue; | ||||
1878 | } | ||||
1879 | |||||
1880 | // If this shuffle is choosing from the 1st narrow op, the mask element is | ||||
1881 | // the same. If this shuffle is choosing from the 2nd narrow op, the mask | ||||
1882 | // element is offset down to adjust for the narrow vector widths. | ||||
1883 | if (Mask[i] < WideElts) { | ||||
1884 | assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")((Mask[i] < NarrowElts && "Unexpected shuffle mask" ) ? static_cast<void> (0) : __assert_fail ("Mask[i] < NarrowElts && \"Unexpected shuffle mask\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1884, __PRETTY_FUNCTION__)); | ||||
1885 | NewMask[i] = ConstantInt::get(I32Ty, Mask[i]); | ||||
1886 | } else { | ||||
1887 | assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")((Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask" ) ? static_cast<void> (0) : __assert_fail ("Mask[i] < (WideElts + NarrowElts) && \"Unexpected shuffle mask\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1887, __PRETTY_FUNCTION__)); | ||||
1888 | NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts)); | ||||
1889 | } | ||||
1890 | } | ||||
1891 | return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); | ||||
1892 | } | ||||
1893 | |||||
1894 | Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { | ||||
1895 | Value *LHS = SVI.getOperand(0); | ||||
1896 | Value *RHS = SVI.getOperand(1); | ||||
1897 | if (auto *V = SimplifyShuffleVectorInst( | ||||
1898 | LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) | ||||
1899 | return replaceInstUsesWith(SVI, V); | ||||
1900 | |||||
1901 | // shuffle x, x, mask --> shuffle x, undef, mask' | ||||
1902 | unsigned VWidth = SVI.getType()->getVectorNumElements(); | ||||
1903 | unsigned LHSWidth = LHS->getType()->getVectorNumElements(); | ||||
1904 | SmallVector<int, 16> Mask = SVI.getShuffleMask(); | ||||
1905 | Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); | ||||
1906 | if (LHS == RHS) { | ||||
1907 | assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?")((!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?" ) ? static_cast<void> (0) : __assert_fail ("!isa<UndefValue>(RHS) && \"Shuffle with 2 undef ops not simplified?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1907, __PRETTY_FUNCTION__)); | ||||
1908 | // Remap any references to RHS to use LHS. | ||||
1909 | SmallVector<Constant*, 16> Elts; | ||||
1910 | for (unsigned i = 0; i != VWidth; ++i) { | ||||
1911 | // Propagate undef elements or force mask to LHS. | ||||
1912 | if (Mask[i] < 0) | ||||
1913 | Elts.push_back(UndefValue::get(Int32Ty)); | ||||
1914 | else | ||||
1915 | Elts.push_back(ConstantInt::get(Int32Ty, Mask[i] % LHSWidth)); | ||||
1916 | } | ||||
1917 | return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), | ||||
1918 | ConstantVector::get(Elts)); | ||||
1919 | } | ||||
1920 | |||||
1921 | // shuffle undef, x, mask --> shuffle x, undef, mask' | ||||
1922 | if (isa<UndefValue>(LHS)) { | ||||
1923 | SVI.commute(); | ||||
1924 | return &SVI; | ||||
1925 | } | ||||
1926 | |||||
1927 | if (Instruction *I = canonicalizeInsertSplat(SVI, Builder)) | ||||
1928 | return I; | ||||
1929 | |||||
1930 | if (Instruction *I = foldSelectShuffle(SVI, Builder, DL)) | ||||
1931 | return I; | ||||
1932 | |||||
1933 | if (Instruction *I = narrowVectorSelect(SVI, Builder)) | ||||
1934 | return I; | ||||
1935 | |||||
1936 | APInt UndefElts(VWidth, 0); | ||||
1937 | APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); | ||||
1938 | if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { | ||||
1939 | if (V != &SVI) | ||||
1940 | return replaceInstUsesWith(SVI, V); | ||||
1941 | return &SVI; | ||||
1942 | } | ||||
1943 | |||||
1944 | if (Instruction *I = foldIdentityExtractShuffle(SVI)) | ||||
1945 | return I; | ||||
1946 | |||||
1947 | // These transforms have the potential to lose undef knowledge, so they are | ||||
1948 | // intentionally placed after SimplifyDemandedVectorElts(). | ||||
1949 | if (Instruction *I = foldShuffleWithInsert(SVI, *this)) | ||||
1950 | return I; | ||||
1951 | if (Instruction *I = foldIdentityPaddedShuffles(SVI)) | ||||
1952 | return I; | ||||
1953 | |||||
1954 | if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) { | ||||
1955 | Value *V = evaluateInDifferentElementOrder(LHS, Mask); | ||||
1956 | return replaceInstUsesWith(SVI, V); | ||||
1957 | } | ||||
1958 | |||||
1959 | // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to | ||||
1960 | // a non-vector type. We can instead bitcast the original vector followed by | ||||
1961 | // an extract of the desired element: | ||||
1962 | // | ||||
1963 | // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, | ||||
1964 | // <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||||
1965 | // %1 = bitcast <4 x i8> %sroa to i32 | ||||
1966 | // Becomes: | ||||
1967 | // %bc = bitcast <16 x i8> %in to <4 x i32> | ||||
1968 | // %ext = extractelement <4 x i32> %bc, i32 0 | ||||
1969 | // | ||||
1970 | // If the shuffle is extracting a contiguous range of values from the input | ||||
1971 | // vector then each use which is a bitcast of the extracted size can be | ||||
1972 | // replaced. This will work if the vector types are compatible, and the begin | ||||
1973 | // index is aligned to a value in the casted vector type. If the begin index | ||||
1974 | // isn't aligned then we can shuffle the original vector (keeping the same | ||||
1975 | // vector type) before extracting. | ||||
1976 | // | ||||
1977 | // This code will bail out if the target type is fundamentally incompatible | ||||
1978 | // with vectors of the source type. | ||||
1979 | // | ||||
1980 | // Example of <16 x i8>, target type i32: | ||||
1981 | // Index range [4,8): v-----------v Will work. | ||||
1982 | // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||||
1983 | // <16 x i8>: | | | | | | | | | | | | | | | | | | ||||
1984 | // <4 x i32>: | | | | | | ||||
1985 | // +-----------+-----------+-----------+-----------+ | ||||
1986 | // Index range [6,10): ^-----------^ Needs an extra shuffle. | ||||
1987 | // Target type i40: ^--------------^ Won't work, bail. | ||||
1988 | bool MadeChange = false; | ||||
1989 | if (isShuffleExtractingFromLHS(SVI, Mask)) { | ||||
1990 | Value *V = LHS; | ||||
1991 | unsigned MaskElems = Mask.size(); | ||||
1992 | VectorType *SrcTy = cast<VectorType>(V->getType()); | ||||
1993 | unsigned VecBitWidth = SrcTy->getBitWidth(); | ||||
1994 | unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); | ||||
1995 | assert(SrcElemBitWidth && "vector elements must have a bitwidth")((SrcElemBitWidth && "vector elements must have a bitwidth" ) ? static_cast<void> (0) : __assert_fail ("SrcElemBitWidth && \"vector elements must have a bitwidth\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 1995, __PRETTY_FUNCTION__)); | ||||
1996 | unsigned SrcNumElems = SrcTy->getNumElements(); | ||||
1997 | SmallVector<BitCastInst *, 8> BCs; | ||||
1998 | DenseMap<Type *, Value *> NewBCs; | ||||
1999 | for (User *U : SVI.users()) | ||||
2000 | if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) | ||||
2001 | if (!BC->use_empty()) | ||||
2002 | // Only visit bitcasts that weren't previously handled. | ||||
2003 | BCs.push_back(BC); | ||||
2004 | for (BitCastInst *BC : BCs) { | ||||
2005 | unsigned BegIdx = Mask.front(); | ||||
2006 | Type *TgtTy = BC->getDestTy(); | ||||
2007 | unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); | ||||
2008 | if (!TgtElemBitWidth) | ||||
2009 | continue; | ||||
2010 | unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; | ||||
2011 | bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; | ||||
2012 | bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); | ||||
2013 | if (!VecBitWidthsEqual) | ||||
2014 | continue; | ||||
2015 | if (!VectorType::isValidElementType(TgtTy)) | ||||
2016 | continue; | ||||
2017 | VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); | ||||
2018 | if (!BegIsAligned) { | ||||
2019 | // Shuffle the input so [0,NumElements) contains the output, and | ||||
2020 | // [NumElems,SrcNumElems) is undef. | ||||
2021 | SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, | ||||
2022 | UndefValue::get(Int32Ty)); | ||||
2023 | for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) | ||||
2024 | ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); | ||||
2025 | V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), | ||||
2026 | ConstantVector::get(ShuffleMask), | ||||
2027 | SVI.getName() + ".extract"); | ||||
2028 | BegIdx = 0; | ||||
2029 | } | ||||
2030 | unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; | ||||
2031 | assert(SrcElemsPerTgtElem)((SrcElemsPerTgtElem) ? static_cast<void> (0) : __assert_fail ("SrcElemsPerTgtElem", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 2031, __PRETTY_FUNCTION__)); | ||||
2032 | BegIdx /= SrcElemsPerTgtElem; | ||||
2033 | bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); | ||||
2034 | auto *NewBC = | ||||
2035 | BCAlreadyExists | ||||
2036 | ? NewBCs[CastSrcTy] | ||||
2037 | : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); | ||||
2038 | if (!BCAlreadyExists) | ||||
2039 | NewBCs[CastSrcTy] = NewBC; | ||||
2040 | auto *Ext = Builder.CreateExtractElement( | ||||
2041 | NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); | ||||
2042 | // The shufflevector isn't being replaced: the bitcast that used it | ||||
2043 | // is. InstCombine will visit the newly-created instructions. | ||||
2044 | replaceInstUsesWith(*BC, Ext); | ||||
2045 | MadeChange = true; | ||||
2046 | } | ||||
2047 | } | ||||
2048 | |||||
2049 | // If the LHS is a shufflevector itself, see if we can combine it with this | ||||
2050 | // one without producing an unusual shuffle. | ||||
2051 | // Cases that might be simplified: | ||||
2052 | // 1. | ||||
2053 | // x1=shuffle(v1,v2,mask1) | ||||
2054 | // x=shuffle(x1,undef,mask) | ||||
2055 | // ==> | ||||
2056 | // x=shuffle(v1,undef,newMask) | ||||
2057 | // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 | ||||
2058 | // 2. | ||||
2059 | // x1=shuffle(v1,undef,mask1) | ||||
2060 | // x=shuffle(x1,x2,mask) | ||||
2061 | // where v1.size() == mask1.size() | ||||
2062 | // ==> | ||||
2063 | // x=shuffle(v1,x2,newMask) | ||||
2064 | // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] | ||||
2065 | // 3. | ||||
2066 | // x2=shuffle(v2,undef,mask2) | ||||
2067 | // x=shuffle(x1,x2,mask) | ||||
2068 | // where v2.size() == mask2.size() | ||||
2069 | // ==> | ||||
2070 | // x=shuffle(x1,v2,newMask) | ||||
2071 | // newMask[i] = (mask[i] < x1.size()) | ||||
2072 | // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() | ||||
2073 | // 4. | ||||
2074 | // x1=shuffle(v1,undef,mask1) | ||||
2075 | // x2=shuffle(v2,undef,mask2) | ||||
2076 | // x=shuffle(x1,x2,mask) | ||||
2077 | // where v1.size() == v2.size() | ||||
2078 | // ==> | ||||
2079 | // x=shuffle(v1,v2,newMask) | ||||
2080 | // newMask[i] = (mask[i] < x1.size()) | ||||
2081 | // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() | ||||
2082 | // | ||||
2083 | // Here we are really conservative: | ||||
2084 | // we are absolutely afraid of producing a shuffle mask not in the input | ||||
2085 | // program, because the code gen may not be smart enough to turn a merged | ||||
2086 | // shuffle into two specific shuffles: it may produce worse code. As such, | ||||
2087 | // we only merge two shuffles if the result is either a splat or one of the | ||||
2088 | // input shuffle masks. In this case, merging the shuffles just removes | ||||
2089 | // one instruction, which we know is safe. This is good for things like | ||||
2090 | // turning: (splat(splat)) -> splat, or | ||||
2091 | // merge(V[0..n], V[n+1..2n]) -> V[0..2n] | ||||
2092 | ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); | ||||
2093 | ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); | ||||
2094 | if (LHSShuffle) | ||||
2095 | if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) | ||||
2096 | LHSShuffle = nullptr; | ||||
2097 | if (RHSShuffle) | ||||
2098 | if (!isa<UndefValue>(RHSShuffle->getOperand(1))) | ||||
2099 | RHSShuffle = nullptr; | ||||
2100 | if (!LHSShuffle && !RHSShuffle) | ||||
2101 | return MadeChange ? &SVI : nullptr; | ||||
2102 | |||||
2103 | Value* LHSOp0 = nullptr; | ||||
2104 | Value* LHSOp1 = nullptr; | ||||
2105 | Value* RHSOp0 = nullptr; | ||||
2106 | unsigned LHSOp0Width = 0; | ||||
2107 | unsigned RHSOp0Width = 0; | ||||
2108 | if (LHSShuffle) { | ||||
2109 | LHSOp0 = LHSShuffle->getOperand(0); | ||||
2110 | LHSOp1 = LHSShuffle->getOperand(1); | ||||
2111 | LHSOp0Width = LHSOp0->getType()->getVectorNumElements(); | ||||
2112 | } | ||||
2113 | if (RHSShuffle) { | ||||
2114 | RHSOp0 = RHSShuffle->getOperand(0); | ||||
2115 | RHSOp0Width = RHSOp0->getType()->getVectorNumElements(); | ||||
2116 | } | ||||
2117 | Value* newLHS = LHS; | ||||
2118 | Value* newRHS = RHS; | ||||
2119 | if (LHSShuffle) { | ||||
2120 | // case 1 | ||||
2121 | if (isa<UndefValue>(RHS)) { | ||||
2122 | newLHS = LHSOp0; | ||||
2123 | newRHS = LHSOp1; | ||||
2124 | } | ||||
2125 | // case 2 or 4 | ||||
2126 | else if (LHSOp0Width == LHSWidth) { | ||||
2127 | newLHS = LHSOp0; | ||||
2128 | } | ||||
2129 | } | ||||
2130 | // case 3 or 4 | ||||
2131 | if (RHSShuffle && RHSOp0Width == LHSWidth) { | ||||
2132 | newRHS = RHSOp0; | ||||
2133 | } | ||||
2134 | // case 4 | ||||
2135 | if (LHSOp0 == RHSOp0) { | ||||
2136 | newLHS = LHSOp0; | ||||
2137 | newRHS = nullptr; | ||||
2138 | } | ||||
2139 | |||||
2140 | if (newLHS == LHS && newRHS == RHS) | ||||
2141 | return MadeChange ? &SVI : nullptr; | ||||
2142 | |||||
2143 | SmallVector<int, 16> LHSMask; | ||||
2144 | SmallVector<int, 16> RHSMask; | ||||
2145 | if (newLHS != LHS) | ||||
2146 | LHSMask = LHSShuffle->getShuffleMask(); | ||||
2147 | if (RHSShuffle && newRHS != RHS) | ||||
2148 | RHSMask = RHSShuffle->getShuffleMask(); | ||||
2149 | |||||
2150 | unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; | ||||
2151 | SmallVector<int, 16> newMask; | ||||
2152 | bool isSplat = true; | ||||
2153 | int SplatElt = -1; | ||||
2154 | // Create a new mask for the new ShuffleVectorInst so that the new | ||||
2155 | // ShuffleVectorInst is equivalent to the original one. | ||||
2156 | for (unsigned i = 0; i < VWidth; ++i) { | ||||
2157 | int eltMask; | ||||
2158 | if (Mask[i] < 0) { | ||||
2159 | // This element is an undef value. | ||||
2160 | eltMask = -1; | ||||
2161 | } else if (Mask[i] < (int)LHSWidth) { | ||||
2162 | // This element is from left hand side vector operand. | ||||
2163 | // | ||||
2164 | // If LHS is going to be replaced (case 1, 2, or 4), calculate the | ||||
2165 | // new mask value for the element. | ||||
2166 | if (newLHS != LHS) { | ||||
2167 | eltMask = LHSMask[Mask[i]]; | ||||
2168 | // If the value selected is an undef value, explicitly specify it | ||||
2169 | // with a -1 mask value. | ||||
2170 | if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) | ||||
2171 | eltMask = -1; | ||||
2172 | } else | ||||
2173 | eltMask = Mask[i]; | ||||
2174 | } else { | ||||
2175 | // This element is from right hand side vector operand | ||||
2176 | // | ||||
2177 | // If the value selected is an undef value, explicitly specify it | ||||
2178 | // with a -1 mask value. (case 1) | ||||
2179 | if (isa<UndefValue>(RHS)) | ||||
2180 | eltMask = -1; | ||||
2181 | // If RHS is going to be replaced (case 3 or 4), calculate the | ||||
2182 | // new mask value for the element. | ||||
2183 | else if (newRHS != RHS) { | ||||
2184 | eltMask = RHSMask[Mask[i]-LHSWidth]; | ||||
2185 | // If the value selected is an undef value, explicitly specify it | ||||
2186 | // with a -1 mask value. | ||||
2187 | if (eltMask >= (int)RHSOp0Width) { | ||||
2188 | assert(isa<UndefValue>(RHSShuffle->getOperand(1))((isa<UndefValue>(RHSShuffle->getOperand(1)) && "should have been check above") ? static_cast<void> (0 ) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 2189, __PRETTY_FUNCTION__)) | ||||
2189 | && "should have been check above")((isa<UndefValue>(RHSShuffle->getOperand(1)) && "should have been check above") ? static_cast<void> (0 ) : __assert_fail ("isa<UndefValue>(RHSShuffle->getOperand(1)) && \"should have been check above\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp" , 2189, __PRETTY_FUNCTION__)); | ||||
2190 | eltMask = -1; | ||||
2191 | } | ||||
2192 | } else | ||||
2193 | eltMask = Mask[i]-LHSWidth; | ||||
2194 | |||||
2195 | // If LHS's width is changed, shift the mask value accordingly. | ||||
2196 | // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any | ||||
2197 | // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. | ||||
2198 | // If newRHS == newLHS, we want to remap any references from newRHS to | ||||
2199 | // newLHS so that we can properly identify splats that may occur due to | ||||
2200 | // obfuscation across the two vectors. | ||||
2201 | if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) | ||||
2202 | eltMask += newLHSWidth; | ||||
2203 | } | ||||
2204 | |||||
2205 | // Check if this could still be a splat. | ||||
2206 | if (eltMask >= 0) { | ||||
2207 | if (SplatElt >= 0 && SplatElt != eltMask) | ||||
2208 | isSplat = false; | ||||
2209 | SplatElt = eltMask; | ||||
2210 | } | ||||
2211 | |||||
2212 | newMask.push_back(eltMask); | ||||
2213 | } | ||||
2214 | |||||
2215 | // If the result mask is equal to one of the original shuffle masks, | ||||
2216 | // or is a splat, do the replacement. | ||||
2217 | if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { | ||||
2218 | SmallVector<Constant*, 16> Elts; | ||||
2219 | for (unsigned i = 0, e = newMask.size(); i != e; ++i) { | ||||
2220 | if (newMask[i] < 0) { | ||||
2221 | Elts.push_back(UndefValue::get(Int32Ty)); | ||||
2222 | } else { | ||||
2223 | Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); | ||||
2224 | } | ||||
2225 | } | ||||
2226 | if (!newRHS) | ||||
2227 | newRHS = UndefValue::get(newLHS->getType()); | ||||
2228 | return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); | ||||
2229 | } | ||||
2230 | |||||
2231 | return MadeChange ? &SVI : nullptr; | ||||
2232 | } |
1 | //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides a simple and efficient mechanism for performing general |
10 | // tree-based pattern matches on the LLVM IR. The power of these routines is |
11 | // that it allows you to write concise patterns that are expressive and easy to |
12 | // understand. The other major advantage of this is that it allows you to |
13 | // trivially capture/bind elements in the pattern to variables. For example, |
14 | // you can do something like this: |
15 | // |
16 | // Value *Exp = ... |
17 | // Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2) |
18 | // if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)), |
19 | // m_And(m_Value(Y), m_ConstantInt(C2))))) { |
20 | // ... Pattern is matched and variables are bound ... |
21 | // } |
22 | // |
23 | // This is primarily useful to things like the instruction combiner, but can |
24 | // also be useful for static analysis tools or code generators. |
25 | // |
26 | //===----------------------------------------------------------------------===// |
27 | |
28 | #ifndef LLVM_IR_PATTERNMATCH_H |
29 | #define LLVM_IR_PATTERNMATCH_H |
30 | |
31 | #include "llvm/ADT/APFloat.h" |
32 | #include "llvm/ADT/APInt.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/InstrTypes.h" |
37 | #include "llvm/IR/Instruction.h" |
38 | #include "llvm/IR/Instructions.h" |
39 | #include "llvm/IR/IntrinsicInst.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Value.h" |
43 | #include "llvm/Support/Casting.h" |
44 | #include <cstdint> |
45 | |
46 | namespace llvm { |
47 | namespace PatternMatch { |
48 | |
49 | template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) { |
50 | return const_cast<Pattern &>(P).match(V); |
51 | } |
52 | |
53 | template <typename SubPattern_t> struct OneUse_match { |
54 | SubPattern_t SubPattern; |
55 | |
56 | OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} |
57 | |
58 | template <typename OpTy> bool match(OpTy *V) { |
59 | return V->hasOneUse() && SubPattern.match(V); |
60 | } |
61 | }; |
62 | |
63 | template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) { |
64 | return SubPattern; |
65 | } |
66 | |
67 | template <typename Class> struct class_match { |
68 | template <typename ITy> bool match(ITy *V) { return isa<Class>(V); } |
69 | }; |
70 | |
71 | /// Match an arbitrary value and ignore it. |
72 | inline class_match<Value> m_Value() { return class_match<Value>(); } |
73 | |
74 | /// Match an arbitrary binary operation and ignore it. |
75 | inline class_match<BinaryOperator> m_BinOp() { |
76 | return class_match<BinaryOperator>(); |
77 | } |
78 | |
79 | /// Matches any compare instruction and ignore it. |
80 | inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } |
81 | |
82 | /// Match an arbitrary ConstantInt and ignore it. |
83 | inline class_match<ConstantInt> m_ConstantInt() { |
84 | return class_match<ConstantInt>(); |
85 | } |
86 | |
87 | /// Match an arbitrary undef constant. |
88 | inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } |
89 | |
90 | /// Match an arbitrary Constant and ignore it. |
91 | inline class_match<Constant> m_Constant() { return class_match<Constant>(); } |
92 | |
93 | /// Match an arbitrary basic block value and ignore it. |
94 | inline class_match<BasicBlock> m_BasicBlock() { |
95 | return class_match<BasicBlock>(); |
96 | } |
97 | |
98 | /// Inverting matcher |
99 | template <typename Ty> struct match_unless { |
100 | Ty M; |
101 | |
102 | match_unless(const Ty &Matcher) : M(Matcher) {} |
103 | |
104 | template <typename ITy> bool match(ITy *V) { return !M.match(V); } |
105 | }; |
106 | |
107 | /// Match if the inner matcher does *NOT* match. |
108 | template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) { |
109 | return match_unless<Ty>(M); |
110 | } |
111 | |
112 | /// Matching combinators |
113 | template <typename LTy, typename RTy> struct match_combine_or { |
114 | LTy L; |
115 | RTy R; |
116 | |
117 | match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} |
118 | |
119 | template <typename ITy> bool match(ITy *V) { |
120 | if (L.match(V)) |
121 | return true; |
122 | if (R.match(V)) |
123 | return true; |
124 | return false; |
125 | } |
126 | }; |
127 | |
128 | template <typename LTy, typename RTy> struct match_combine_and { |
129 | LTy L; |
130 | RTy R; |
131 | |
132 | match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} |
133 | |
134 | template <typename ITy> bool match(ITy *V) { |
135 | if (L.match(V)) |
136 | if (R.match(V)) |
137 | return true; |
138 | return false; |
139 | } |
140 | }; |
141 | |
142 | /// Combine two pattern matchers matching L || R |
143 | template <typename LTy, typename RTy> |
144 | inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) { |
145 | return match_combine_or<LTy, RTy>(L, R); |
146 | } |
147 | |
148 | /// Combine two pattern matchers matching L && R |
149 | template <typename LTy, typename RTy> |
150 | inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { |
151 | return match_combine_and<LTy, RTy>(L, R); |
152 | } |
153 | |
154 | struct apint_match { |
155 | const APInt *&Res; |
156 | bool AllowUndef; |
157 | |
158 | apint_match(const APInt *&Res, bool AllowUndef) |
159 | : Res(Res), AllowUndef(AllowUndef) {} |
160 | |
161 | template <typename ITy> bool match(ITy *V) { |
162 | if (auto *CI = dyn_cast<ConstantInt>(V)) { |
163 | Res = &CI->getValue(); |
164 | return true; |
165 | } |
166 | if (V->getType()->isVectorTy()) |
167 | if (const auto *C = dyn_cast<Constant>(V)) |
168 | if (auto *CI = dyn_cast_or_null<ConstantInt>( |
169 | C->getSplatValue(AllowUndef))) { |
170 | Res = &CI->getValue(); |
171 | return true; |
172 | } |
173 | return false; |
174 | } |
175 | }; |
176 | // Either constexpr if or renaming ConstantFP::getValueAPF to |
177 | // ConstantFP::getValue is needed to do it via single template |
178 | // function for both apint/apfloat. |
179 | struct apfloat_match { |
180 | const APFloat *&Res; |
181 | bool AllowUndef; |
182 | |
183 | apfloat_match(const APFloat *&Res, bool AllowUndef) |
184 | : Res(Res), AllowUndef(AllowUndef) {} |
185 | |
186 | template <typename ITy> bool match(ITy *V) { |
187 | if (auto *CI = dyn_cast<ConstantFP>(V)) { |
188 | Res = &CI->getValueAPF(); |
189 | return true; |
190 | } |
191 | if (V->getType()->isVectorTy()) |
192 | if (const auto *C = dyn_cast<Constant>(V)) |
193 | if (auto *CI = dyn_cast_or_null<ConstantFP>( |
194 | C->getSplatValue(AllowUndef))) { |
195 | Res = &CI->getValueAPF(); |
196 | return true; |
197 | } |
198 | return false; |
199 | } |
200 | }; |
201 | |
202 | /// Match a ConstantInt or splatted ConstantVector, binding the |
203 | /// specified pointer to the contained APInt. |
204 | inline apint_match m_APInt(const APInt *&Res) { |
205 | // Forbid undefs by default to maintain previous behavior. |
206 | return apint_match(Res, /* AllowUndef */ false); |
207 | } |
208 | |
209 | /// Match APInt while allowing undefs in splat vector constants. |
210 | inline apint_match m_APIntAllowUndef(const APInt *&Res) { |
211 | return apint_match(Res, /* AllowUndef */ true); |
212 | } |
213 | |
214 | /// Match APInt while forbidding undefs in splat vector constants. |
215 | inline apint_match m_APIntForbidUndef(const APInt *&Res) { |
216 | return apint_match(Res, /* AllowUndef */ false); |
217 | } |
218 | |
219 | /// Match a ConstantFP or splatted ConstantVector, binding the |
220 | /// specified pointer to the contained APFloat. |
221 | inline apfloat_match m_APFloat(const APFloat *&Res) { |
222 | // Forbid undefs by default to maintain previous behavior. |
223 | return apfloat_match(Res, /* AllowUndef */ false); |
224 | } |
225 | |
226 | /// Match APFloat while allowing undefs in splat vector constants. |
227 | inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) { |
228 | return apfloat_match(Res, /* AllowUndef */ true); |
229 | } |
230 | |
231 | /// Match APFloat while forbidding undefs in splat vector constants. |
232 | inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) { |
233 | return apfloat_match(Res, /* AllowUndef */ false); |
234 | } |
235 | |
236 | template <int64_t Val> struct constantint_match { |
237 | template <typename ITy> bool match(ITy *V) { |
238 | if (const auto *CI = dyn_cast<ConstantInt>(V)) { |
239 | const APInt &CIV = CI->getValue(); |
240 | if (Val >= 0) |
241 | return CIV == static_cast<uint64_t>(Val); |
242 | // If Val is negative, and CI is shorter than it, truncate to the right |
243 | // number of bits. If it is larger, then we have to sign extend. Just |
244 | // compare their negated values. |
245 | return -CIV == -Val; |
246 | } |
247 | return false; |
248 | } |
249 | }; |
250 | |
251 | /// Match a ConstantInt with a specific value. |
252 | template <int64_t Val> inline constantint_match<Val> m_ConstantInt() { |
253 | return constantint_match<Val>(); |
254 | } |
255 | |
256 | /// This helper class is used to match scalar and vector integer constants that |
257 | /// satisfy a specified predicate. |
258 | /// For vector constants, undefined elements are ignored. |
259 | template <typename Predicate> struct cst_pred_ty : public Predicate { |
260 | template <typename ITy> bool match(ITy *V) { |
261 | if (const auto *CI = dyn_cast<ConstantInt>(V)) |
262 | return this->isValue(CI->getValue()); |
263 | if (V->getType()->isVectorTy()) { |
264 | if (const auto *C = dyn_cast<Constant>(V)) { |
265 | if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) |
266 | return this->isValue(CI->getValue()); |
267 | |
268 | // Non-splat vector constant: check each element for a match. |
269 | unsigned NumElts = V->getType()->getVectorNumElements(); |
270 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h" , 270, __PRETTY_FUNCTION__)); |
271 | bool HasNonUndefElements = false; |
272 | for (unsigned i = 0; i != NumElts; ++i) { |
273 | Constant *Elt = C->getAggregateElement(i); |
274 | if (!Elt) |
275 | return false; |
276 | if (isa<UndefValue>(Elt)) |
277 | continue; |
278 | auto *CI = dyn_cast<ConstantInt>(Elt); |
279 | if (!CI || !this->isValue(CI->getValue())) |
280 | return false; |
281 | HasNonUndefElements = true; |
282 | } |
283 | return HasNonUndefElements; |
284 | } |
285 | } |
286 | return false; |
287 | } |
288 | }; |
289 | |
290 | /// This helper class is used to match scalar and vector constants that |
291 | /// satisfy a specified predicate, and bind them to an APInt. |
292 | template <typename Predicate> struct api_pred_ty : public Predicate { |
293 | const APInt *&Res; |
294 | |
295 | api_pred_ty(const APInt *&R) : Res(R) {} |
296 | |
297 | template <typename ITy> bool match(ITy *V) { |
298 | if (const auto *CI = dyn_cast<ConstantInt>(V)) |
299 | if (this->isValue(CI->getValue())) { |
300 | Res = &CI->getValue(); |
301 | return true; |
302 | } |
303 | if (V->getType()->isVectorTy()) |
304 | if (const auto *C = dyn_cast<Constant>(V)) |
305 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) |
306 | if (this->isValue(CI->getValue())) { |
307 | Res = &CI->getValue(); |
308 | return true; |
309 | } |
310 | |
311 | return false; |
312 | } |
313 | }; |
314 | |
315 | /// This helper class is used to match scalar and vector floating-point |
316 | /// constants that satisfy a specified predicate. |
317 | /// For vector constants, undefined elements are ignored. |
318 | template <typename Predicate> struct cstfp_pred_ty : public Predicate { |
319 | template <typename ITy> bool match(ITy *V) { |
320 | if (const auto *CF = dyn_cast<ConstantFP>(V)) |
321 | return this->isValue(CF->getValueAPF()); |
322 | if (V->getType()->isVectorTy()) { |
323 | if (const auto *C = dyn_cast<Constant>(V)) { |
324 | if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) |
325 | return this->isValue(CF->getValueAPF()); |
326 | |
327 | // Non-splat vector constant: check each element for a match. |
328 | unsigned NumElts = V->getType()->getVectorNumElements(); |
329 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h" , 329, __PRETTY_FUNCTION__)); |
330 | bool HasNonUndefElements = false; |
331 | for (unsigned i = 0; i != NumElts; ++i) { |
332 | Constant *Elt = C->getAggregateElement(i); |
333 | if (!Elt) |
334 | return false; |
335 | if (isa<UndefValue>(Elt)) |
336 | continue; |
337 | auto *CF = dyn_cast<ConstantFP>(Elt); |
338 | if (!CF || !this->isValue(CF->getValueAPF())) |
339 | return false; |
340 | HasNonUndefElements = true; |
341 | } |
342 | return HasNonUndefElements; |
343 | } |
344 | } |
345 | return false; |
346 | } |
347 | }; |
348 | |
349 | /////////////////////////////////////////////////////////////////////////////// |
350 | // |
351 | // Encapsulate constant value queries for use in templated predicate matchers. |
352 | // This allows checking if constants match using compound predicates and works |
353 | // with vector constants, possibly with relaxed constraints. For example, ignore |
354 | // undef values. |
355 | // |
356 | /////////////////////////////////////////////////////////////////////////////// |
357 | |
358 | struct is_any_apint { |
359 | bool isValue(const APInt &C) { return true; } |
360 | }; |
361 | /// Match an integer or vector with any integral constant. |
362 | /// For vectors, this includes constants with undefined elements. |
363 | inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { |
364 | return cst_pred_ty<is_any_apint>(); |
365 | } |
366 | |
367 | struct is_all_ones { |
368 | bool isValue(const APInt &C) { return C.isAllOnesValue(); } |
369 | }; |
370 | /// Match an integer or vector with all bits set. |
371 | /// For vectors, this includes constants with undefined elements. |
372 | inline cst_pred_ty<is_all_ones> m_AllOnes() { |
373 | return cst_pred_ty<is_all_ones>(); |
374 | } |
375 | |
376 | struct is_maxsignedvalue { |
377 | bool isValue(const APInt &C) { return C.isMaxSignedValue(); } |
378 | }; |
379 | /// Match an integer or vector with values having all bits except for the high |
380 | /// bit set (0x7f...). |
381 | /// For vectors, this includes constants with undefined elements. |
382 | inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { |
383 | return cst_pred_ty<is_maxsignedvalue>(); |
384 | } |
385 | inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { |
386 | return V; |
387 | } |
388 | |
389 | struct is_negative { |
390 | bool isValue(const APInt &C) { return C.isNegative(); } |
391 | }; |
392 | /// Match an integer or vector of negative values. |
393 | /// For vectors, this includes constants with undefined elements. |
394 | inline cst_pred_ty<is_negative> m_Negative() { |
395 | return cst_pred_ty<is_negative>(); |
396 | } |
397 | inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { |
398 | return V; |
399 | } |
400 | |
401 | struct is_nonnegative { |
402 | bool isValue(const APInt &C) { return C.isNonNegative(); } |
403 | }; |
404 | /// Match an integer or vector of non-negative values. |
405 | /// For vectors, this includes constants with undefined elements. |
406 | inline cst_pred_ty<is_nonnegative> m_NonNegative() { |
407 | return cst_pred_ty<is_nonnegative>(); |
408 | } |
409 | inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { |
410 | return V; |
411 | } |
412 | |
413 | struct is_strictlypositive { |
414 | bool isValue(const APInt &C) { return C.isStrictlyPositive(); } |
415 | }; |
416 | /// Match an integer or vector of strictly positive values. |
417 | /// For vectors, this includes constants with undefined elements. |
418 | inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() { |
419 | return cst_pred_ty<is_strictlypositive>(); |
420 | } |
421 | inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) { |
422 | return V; |
423 | } |
424 | |
425 | struct is_nonpositive { |
426 | bool isValue(const APInt &C) { return C.isNonPositive(); } |
427 | }; |
428 | /// Match an integer or vector of non-positive values. |
429 | /// For vectors, this includes constants with undefined elements. |
430 | inline cst_pred_ty<is_nonpositive> m_NonPositive() { |
431 | return cst_pred_ty<is_nonpositive>(); |
432 | } |
433 | inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } |
434 | |
435 | struct is_one { |
436 | bool isValue(const APInt &C) { return C.isOneValue(); } |
437 | }; |
438 | /// Match an integer 1 or a vector with all elements equal to 1. |
439 | /// For vectors, this includes constants with undefined elements. |
440 | inline cst_pred_ty<is_one> m_One() { |
441 | return cst_pred_ty<is_one>(); |
442 | } |
443 | |
444 | struct is_zero_int { |
445 | bool isValue(const APInt &C) { return C.isNullValue(); } |
446 | }; |
447 | /// Match an integer 0 or a vector with all elements equal to 0. |
448 | /// For vectors, this includes constants with undefined elements. |
449 | inline cst_pred_ty<is_zero_int> m_ZeroInt() { |
450 | return cst_pred_ty<is_zero_int>(); |
451 | } |
452 | |
453 | struct is_zero { |
454 | template <typename ITy> bool match(ITy *V) { |
455 | auto *C = dyn_cast<Constant>(V); |
456 | return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C)); |
457 | } |
458 | }; |
459 | /// Match any null constant or a vector with all elements equal to 0. |
460 | /// For vectors, this includes constants with undefined elements. |
461 | inline is_zero m_Zero() { |
462 | return is_zero(); |
463 | } |
464 | |
465 | struct is_power2 { |
466 | bool isValue(const APInt &C) { return C.isPowerOf2(); } |
467 | }; |
468 | /// Match an integer or vector power-of-2. |
469 | /// For vectors, this includes constants with undefined elements. |
470 | inline cst_pred_ty<is_power2> m_Power2() { |
471 | return cst_pred_ty<is_power2>(); |
472 | } |
473 | inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { |
474 | return V; |
475 | } |
476 | |
477 | struct is_negated_power2 { |
478 | bool isValue(const APInt &C) { return (-C).isPowerOf2(); } |
479 | }; |
480 | /// Match a integer or vector negated power-of-2. |
481 | /// For vectors, this includes constants with undefined elements. |
482 | inline cst_pred_ty<is_negated_power2> m_NegatedPower2() { |
483 | return cst_pred_ty<is_negated_power2>(); |
484 | } |
485 | inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) { |
486 | return V; |
487 | } |
488 | |
489 | struct is_power2_or_zero { |
490 | bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } |
491 | }; |
492 | /// Match an integer or vector of 0 or power-of-2 values. |
493 | /// For vectors, this includes constants with undefined elements. |
494 | inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() { |
495 | return cst_pred_ty<is_power2_or_zero>(); |
496 | } |
497 | inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) { |
498 | return V; |
499 | } |
500 | |
501 | struct is_sign_mask { |
502 | bool isValue(const APInt &C) { return C.isSignMask(); } |
503 | }; |
504 | /// Match an integer or vector with only the sign bit(s) set. |
505 | /// For vectors, this includes constants with undefined elements. |
506 | inline cst_pred_ty<is_sign_mask> m_SignMask() { |
507 | return cst_pred_ty<is_sign_mask>(); |
508 | } |
509 | |
510 | struct is_lowbit_mask { |
511 | bool isValue(const APInt &C) { return C.isMask(); } |
512 | }; |
513 | /// Match an integer or vector with only the low bit(s) set. |
514 | /// For vectors, this includes constants with undefined elements. |
515 | inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { |
516 | return cst_pred_ty<is_lowbit_mask>(); |
517 | } |
518 | |
519 | struct icmp_pred_with_threshold { |
520 | ICmpInst::Predicate Pred; |
521 | const APInt *Thr; |
522 | bool isValue(const APInt &C) { |
523 | switch (Pred) { |
524 | case ICmpInst::Predicate::ICMP_EQ: |
525 | return C.eq(*Thr); |
526 | case ICmpInst::Predicate::ICMP_NE: |
527 | return C.ne(*Thr); |
528 | case ICmpInst::Predicate::ICMP_UGT: |
529 | return C.ugt(*Thr); |
530 | case ICmpInst::Predicate::ICMP_UGE: |
531 | return C.uge(*Thr); |
532 | case ICmpInst::Predicate::ICMP_ULT: |
533 | return C.ult(*Thr); |
534 | case ICmpInst::Predicate::ICMP_ULE: |
535 | return C.ule(*Thr); |
536 | case ICmpInst::Predicate::ICMP_SGT: |
537 | return C.sgt(*Thr); |
538 | case ICmpInst::Predicate::ICMP_SGE: |
539 | return C.sge(*Thr); |
540 | case ICmpInst::Predicate::ICMP_SLT: |
541 | return C.slt(*Thr); |
542 | case ICmpInst::Predicate::ICMP_SLE: |
543 | return C.sle(*Thr); |
544 | default: |
545 | llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/PatternMatch.h" , 545); |
546 | } |
547 | } |
548 | }; |
549 | /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) |
550 | /// to Threshold. For vectors, this includes constants with undefined elements. |
551 | inline cst_pred_ty<icmp_pred_with_threshold> |
552 | m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) { |
553 | cst_pred_ty<icmp_pred_with_threshold> P; |
554 | P.Pred = Predicate; |
555 | P.Thr = &Threshold; |
556 | return P; |
557 | } |
558 | |
559 | struct is_nan { |
560 | bool isValue(const APFloat &C) { return C.isNaN(); } |
561 | }; |
562 | /// Match an arbitrary NaN constant. This includes quiet and signalling nans. |
563 | /// For vectors, this includes constants with undefined elements. |
564 | inline cstfp_pred_ty<is_nan> m_NaN() { |
565 | return cstfp_pred_ty<is_nan>(); |
566 | } |
567 | |
568 | struct is_any_zero_fp { |
569 | bool isValue(const APFloat &C) { return C.isZero(); } |
570 | }; |
571 | /// Match a floating-point negative zero or positive zero. |
572 | /// For vectors, this includes constants with undefined elements. |
573 | inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() { |
574 | return cstfp_pred_ty<is_any_zero_fp>(); |
575 | } |
576 | |
577 | struct is_pos_zero_fp { |
578 | bool isValue(const APFloat &C) { return C.isPosZero(); } |
579 | }; |
580 | /// Match a floating-point positive zero. |
581 | /// For vectors, this includes constants with undefined elements. |
582 | inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() { |
583 | return cstfp_pred_ty<is_pos_zero_fp>(); |
584 | } |
585 | |
586 | struct is_neg_zero_fp { |
587 | bool isValue(const APFloat &C) { return C.isNegZero(); } |
588 | }; |
589 | /// Match a floating-point negative zero. |
590 | /// For vectors, this includes constants with undefined elements. |
591 | inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { |
592 | return cstfp_pred_ty<is_neg_zero_fp>(); |
593 | } |
594 | |
595 | /////////////////////////////////////////////////////////////////////////////// |
596 | |
597 | template <typename Class> struct bind_ty { |
598 | Class *&VR; |
599 | |
600 | bind_ty(Class *&V) : VR(V) {} |
601 | |
602 | template <typename ITy> bool match(ITy *V) { |
603 | if (auto *CV = dyn_cast<Class>(V)) { |
604 | VR = CV; |
605 | return true; |
606 | } |
607 | return false; |
608 | } |
609 | }; |
610 | |
611 | /// Match a value, capturing it if we match. |
612 | inline bind_ty<Value> m_Value(Value *&V) { return V; } |
613 | inline bind_ty<const Value> m_Value(const Value *&V) { return V; } |
614 | |
615 | /// Match an instruction, capturing it if we match. |
616 | inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } |
617 | /// Match a binary operator, capturing it if we match. |
618 | inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } |
619 | /// Match a with overflow intrinsic, capturing it if we match. |
620 | inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } |
621 | |
622 | /// Match a ConstantInt, capturing the value if we match. |
623 | inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } |
624 | |
625 | /// Match a Constant, capturing the value if we match. |
626 | inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } |
627 | |
628 | /// Match a ConstantFP, capturing the value if we match. |
629 | inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } |
630 | |
631 | /// Match a basic block value, capturing it if we match. |
632 | inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } |
633 | inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { |
634 | return V; |
635 | } |
636 | |
637 | /// Match a specified Value*. |
638 | struct specificval_ty { |
639 | const Value *Val; |
640 | |
641 | specificval_ty(const Value *V) : Val(V) {} |
642 | |
643 | template <typename ITy> bool match(ITy *V) { return V == Val; } |
644 | }; |
645 | |
646 | /// Match if we have a specific specified value. |
647 | inline specificval_ty m_Specific(const Value *V) { return V; } |
648 | |
649 | /// Stores a reference to the Value *, not the Value * itself, |
650 | /// thus can be used in commutative matchers. |
651 | template <typename Class> struct deferredval_ty { |
652 | Class *const &Val; |
653 | |
654 | deferredval_ty(Class *const &V) : Val(V) {} |
655 | |
656 | template <typename ITy> bool match(ITy *const V) { return V == Val; } |
657 | }; |
658 | |
659 | /// A commutative-friendly version of m_Specific(). |
660 | inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; } |
661 | inline deferredval_ty<const Value> m_Deferred(const Value *const &V) { |
662 | return V; |
663 | } |
664 | |
665 | /// Match a specified floating point value or vector of all elements of |
666 | /// that value. |
667 | struct specific_fpval { |
668 | double Val; |
669 | |
670 | specific_fpval(double V) : Val(V) {} |
671 | |
672 | template <typename ITy> bool match(ITy *V) { |
673 | if (const auto *CFP = dyn_cast<ConstantFP>(V)) |
674 | return CFP->isExactlyValue(Val); |
675 | if (V->getType()->isVectorTy()) |
676 | if (const auto *C = dyn_cast<Constant>(V)) |
677 | if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) |
678 | return CFP->isExactlyValue(Val); |
679 | return false; |
680 | } |
681 | }; |
682 | |
683 | /// Match a specific floating point value or vector with all elements |
684 | /// equal to the value. |
685 | inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); } |
686 | |
687 | /// Match a float 1.0 or vector with all elements equal to 1.0. |
688 | inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } |
689 | |
690 | struct bind_const_intval_ty { |
691 | uint64_t &VR; |
692 | |
693 | bind_const_intval_ty(uint64_t &V) : VR(V) {} |
694 | |
695 | template <typename ITy> bool match(ITy *V) { |
696 | if (const auto *CV = dyn_cast<ConstantInt>(V)) |
697 | if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) { |
698 | VR = CV->getZExtValue(); |
699 | return true; |
700 | } |
701 | return false; |
702 | } |
703 | }; |
704 | |
705 | /// Match a specified integer value or vector of all elements of that |
706 | /// value. |
707 | struct specific_intval { |
708 | APInt Val; |
709 | |
710 | specific_intval(APInt V) : Val(std::move(V)) {} |
711 | |
712 | template <typename ITy> bool match(ITy *V) { |
713 | const auto *CI = dyn_cast<ConstantInt>(V); |
714 | if (!CI && V->getType()->isVectorTy()) |
715 | if (const auto *C = dyn_cast<Constant>(V)) |
716 | CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()); |
717 | |
718 | return CI && APInt::isSameValue(CI->getValue(), Val); |
719 | } |
720 | }; |
721 | |
722 | /// Match a specific integer value or vector with all elements equal to |
723 | /// the value. |
724 | inline specific_intval m_SpecificInt(APInt V) { |
725 | return specific_intval(std::move(V)); |
726 | } |
727 | |
728 | inline specific_intval m_SpecificInt(uint64_t V) { |
729 | return m_SpecificInt(APInt(64, V)); |
730 | } |
731 | |
732 | /// Match a ConstantInt and bind to its value. This does not match |
733 | /// ConstantInts wider than 64-bits. |
734 | inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } |
735 | |
736 | /// Match a specified basic block value. |
737 | struct specific_bbval { |
738 | BasicBlock *Val; |
739 | |
740 | specific_bbval(BasicBlock *Val) : Val(Val) {} |
741 | |
742 | template <typename ITy> bool match(ITy *V) { |
743 | const auto *BB = dyn_cast<BasicBlock>(V); |
744 | return BB && BB == Val; |
745 | } |
746 | }; |
747 | |
748 | /// Match a specific basic block value. |
749 | inline specific_bbval m_SpecificBB(BasicBlock *BB) { |
750 | return specific_bbval(BB); |
751 | } |
752 | |
753 | /// A commutative-friendly version of m_Specific(). |
754 | inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) { |
755 | return BB; |
756 | } |
757 | inline deferredval_ty<const BasicBlock> |
758 | m_Deferred(const BasicBlock *const &BB) { |
759 | return BB; |
760 | } |
761 | |
762 | //===----------------------------------------------------------------------===// |
763 | // Matcher for any binary operator. |
764 | // |
765 | template <typename LHS_t, typename RHS_t, bool Commutable = false> |
766 | struct AnyBinaryOp_match { |
767 | LHS_t L; |
768 | RHS_t R; |
769 | |
770 | // The evaluation order is always stable, regardless of Commutability. |
771 | // The LHS is always matched first. |
772 | AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
773 | |
774 | template <typename OpTy> bool match(OpTy *V) { |
775 | if (auto *I = dyn_cast<BinaryOperator>(V)) |
776 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || |
777 | (Commutable && L.match(I->getOperand(1)) && |
778 | R.match(I->getOperand(0))); |
779 | return false; |
780 | } |
781 | }; |
782 | |
783 | template <typename LHS, typename RHS> |
784 | inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) { |
785 | return AnyBinaryOp_match<LHS, RHS>(L, R); |
786 | } |
787 | |
788 | //===----------------------------------------------------------------------===// |
789 | // Matchers for specific binary operators. |
790 | // |
791 | |
792 | template <typename LHS_t, typename RHS_t, unsigned Opcode, |
793 | bool Commutable = false> |
794 | struct BinaryOp_match { |
795 | LHS_t L; |
796 | RHS_t R; |
797 | |
798 | // The evaluation order is always stable, regardless of Commutability. |
799 | // The LHS is always matched first. |
800 | BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
801 | |
802 | template <typename OpTy> bool match(OpTy *V) { |
803 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
804 | auto *I = cast<BinaryOperator>(V); |
805 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || |
806 | (Commutable && L.match(I->getOperand(1)) && |
807 | R.match(I->getOperand(0))); |
808 | } |
809 | if (auto *CE = dyn_cast<ConstantExpr>(V)) |
810 | return CE->getOpcode() == Opcode && |
811 | ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || |
812 | (Commutable && L.match(CE->getOperand(1)) && |
813 | R.match(CE->getOperand(0)))); |
814 | return false; |
815 | } |
816 | }; |
817 | |
818 | template <typename LHS, typename RHS> |
819 | inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L, |
820 | const RHS &R) { |
821 | return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R); |
822 | } |
823 | |
824 | template <typename LHS, typename RHS> |
825 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L, |
826 | const RHS &R) { |
827 | return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R); |
828 | } |
829 | |
830 | template <typename LHS, typename RHS> |
831 | inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L, |
832 | const RHS &R) { |
833 | return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R); |
834 | } |
835 | |
836 | template <typename LHS, typename RHS> |
837 | inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L, |
838 | const RHS &R) { |
839 | return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R); |
840 | } |
841 | |
842 | template <typename Op_t> struct FNeg_match { |
843 | Op_t X; |
844 | |
845 | FNeg_match(const Op_t &Op) : X(Op) {} |
846 | template <typename OpTy> bool match(OpTy *V) { |
847 | auto *FPMO = dyn_cast<FPMathOperator>(V); |
848 | if (!FPMO) return false; |
849 | |
850 | if (FPMO->getOpcode() == Instruction::FNeg) |
851 | return X.match(FPMO->getOperand(0)); |
852 | |
853 | if (FPMO->getOpcode() == Instruction::FSub) { |
854 | if (FPMO->hasNoSignedZeros()) { |
855 | // With 'nsz', any zero goes. |
856 | if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0))) |
857 | return false; |
858 | } else { |
859 | // Without 'nsz', we need fsub -0.0, X exactly. |
860 | if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0))) |
861 | return false; |
862 | } |
863 | |
864 | return X.match(FPMO->getOperand(1)); |
865 | } |
866 | |
867 | return false; |
868 | } |
869 | }; |
870 | |
871 | /// Match 'fneg X' as 'fsub -0.0, X'. |
872 | template <typename OpTy> |
873 | inline FNeg_match<OpTy> |
874 | m_FNeg(const OpTy &X) { |
875 | return FNeg_match<OpTy>(X); |
876 | } |
877 | |
878 | /// Match 'fneg X' as 'fsub +-0.0, X'. |
879 | template <typename RHS> |
880 | inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub> |
881 | m_FNegNSZ(const RHS &X) { |
882 | return m_FSub(m_AnyZeroFP(), X); |
883 | } |
884 | |
885 | template <typename LHS, typename RHS> |
886 | inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L, |
887 | const RHS &R) { |
888 | return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R); |
889 | } |
890 | |
891 | template <typename LHS, typename RHS> |
892 | inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L, |
893 | const RHS &R) { |
894 | return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R); |
895 | } |
896 | |
897 | template <typename LHS, typename RHS> |
898 | inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L, |
899 | const RHS &R) { |
900 | return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R); |
901 | } |
902 | |
903 | template <typename LHS, typename RHS> |
904 | inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L, |
905 | const RHS &R) { |
906 | return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R); |
907 | } |
908 | |
909 | template <typename LHS, typename RHS> |
910 | inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L, |
911 | const RHS &R) { |
912 | return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R); |
913 | } |
914 | |
915 | template <typename LHS, typename RHS> |
916 | inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L, |
917 | const RHS &R) { |
918 | return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R); |
919 | } |
920 | |
921 | template <typename LHS, typename RHS> |
922 | inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L, |
923 | const RHS &R) { |
924 | return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R); |
925 | } |
926 | |
927 | template <typename LHS, typename RHS> |
928 | inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L, |
929 | const RHS &R) { |
930 | return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R); |
931 | } |
932 | |
933 | template <typename LHS, typename RHS> |
934 | inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L, |
935 | const RHS &R) { |
936 | return BinaryOp_match<LHS, RHS, Instruction::And>(L, R); |
937 | } |
938 | |
939 | template <typename LHS, typename RHS> |
940 | inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L, |
941 | const RHS &R) { |
942 | return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R); |
943 | } |
944 | |
945 | template <typename LHS, typename RHS> |
946 | inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L, |
947 | const RHS &R) { |
948 | return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R); |
949 | } |
950 | |
951 | template <typename LHS, typename RHS> |
952 | inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L, |
953 | const RHS &R) { |
954 | return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R); |
955 | } |
956 | |
957 | template <typename LHS, typename RHS> |
958 | inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L, |
959 | const RHS &R) { |
960 | return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R); |
961 | } |
962 | |
963 | template <typename LHS, typename RHS> |
964 | inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L, |
965 | const RHS &R) { |
966 | return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R); |
967 | } |
968 | |
969 | template <typename LHS_t, typename RHS_t, unsigned Opcode, |
970 | unsigned WrapFlags = 0> |
971 | struct OverflowingBinaryOp_match { |
972 | LHS_t L; |
973 | RHS_t R; |
974 | |
975 | OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) |
976 | : L(LHS), R(RHS) {} |
977 | |
978 | template <typename OpTy> bool match(OpTy *V) { |
979 | if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) { |
980 | if (Op->getOpcode() != Opcode) |
981 | return false; |
982 | if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap && |
983 | !Op->hasNoUnsignedWrap()) |
984 | return false; |
985 | if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap && |
986 | !Op->hasNoSignedWrap()) |
987 | return false; |
988 | return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1)); |
989 | } |
990 | return false; |
991 | } |
992 | }; |
993 | |
994 | template <typename LHS, typename RHS> |
995 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
996 | OverflowingBinaryOperator::NoSignedWrap> |
997 | m_NSWAdd(const LHS &L, const RHS &R) { |
998 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
999 | OverflowingBinaryOperator::NoSignedWrap>( |
1000 | L, R); |
1001 | } |
1002 | template <typename LHS, typename RHS> |
1003 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1004 | OverflowingBinaryOperator::NoSignedWrap> |
1005 | m_NSWSub(const LHS &L, const RHS &R) { |
1006 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1007 | OverflowingBinaryOperator::NoSignedWrap>( |
1008 | L, R); |
1009 | } |
1010 | template <typename LHS, typename RHS> |
1011 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1012 | OverflowingBinaryOperator::NoSignedWrap> |
1013 | m_NSWMul(const LHS &L, const RHS &R) { |
1014 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1015 | OverflowingBinaryOperator::NoSignedWrap>( |
1016 | L, R); |
1017 | } |
1018 | template <typename LHS, typename RHS> |
1019 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1020 | OverflowingBinaryOperator::NoSignedWrap> |
1021 | m_NSWShl(const LHS &L, const RHS &R) { |
1022 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1023 | OverflowingBinaryOperator::NoSignedWrap>( |
1024 | L, R); |
1025 | } |
1026 | |
1027 | template <typename LHS, typename RHS> |
1028 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1029 | OverflowingBinaryOperator::NoUnsignedWrap> |
1030 | m_NUWAdd(const LHS &L, const RHS &R) { |
1031 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1032 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1033 | L, R); |
1034 | } |
1035 | template <typename LHS, typename RHS> |
1036 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1037 | OverflowingBinaryOperator::NoUnsignedWrap> |
1038 | m_NUWSub(const LHS &L, const RHS &R) { |
1039 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1040 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1041 | L, R); |
1042 | } |
1043 | template <typename LHS, typename RHS> |
1044 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1045 | OverflowingBinaryOperator::NoUnsignedWrap> |
1046 | m_NUWMul(const LHS &L, const RHS &R) { |
1047 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1048 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1049 | L, R); |
1050 | } |
1051 | template <typename LHS, typename RHS> |
1052 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1053 | OverflowingBinaryOperator::NoUnsignedWrap> |
1054 | m_NUWShl(const LHS &L, const RHS &R) { |
1055 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1056 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1057 | L, R); |
1058 | } |
1059 | |
1060 | //===----------------------------------------------------------------------===// |
1061 | // Class that matches a group of binary opcodes. |
1062 | // |
1063 | template <typename LHS_t, typename RHS_t, typename Predicate> |
1064 | struct BinOpPred_match : Predicate { |
1065 | LHS_t L; |
1066 | RHS_t R; |
1067 | |
1068 | BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
1069 | |
1070 | template <typename OpTy> bool match(OpTy *V) { |
1071 | if (auto *I = dyn_cast<Instruction>(V)) |
1072 | return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && |
1073 | R.match(I->getOperand(1)); |
1074 | if (auto *CE = dyn_cast<ConstantExpr>(V)) |
1075 | return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && |
1076 | R.match(CE->getOperand(1)); |
1077 | return false; |
1078 | } |
1079 | }; |
1080 | |
1081 | struct is_shift_op { |
1082 | bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } |
1083 | }; |
1084 | |
1085 | struct is_right_shift_op { |
1086 | bool isOpType(unsigned Opcode) { |
1087 | return Opcode == Instruction::LShr || Opcode == Instruction::AShr; |
1088 | } |
1089 | }; |
1090 | |
1091 | struct is_logical_shift_op { |
1092 | bool isOpType(unsigned Opcode) { |
1093 | return Opcode == Instruction::LShr || Opcode == Instruction::Shl; |
1094 | } |
1095 | }; |
1096 | |
1097 | struct is_bitwiselogic_op { |
1098 | bool isOpType(unsigned Opcode) { |
1099 | return Instruction::isBitwiseLogicOp(Opcode); |
1100 | } |
1101 | }; |
1102 | |
1103 | struct is_idiv_op { |
1104 | bool isOpType(unsigned Opcode) { |
1105 | return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; |
1106 | } |
1107 | }; |
1108 | |
1109 | struct is_irem_op { |
1110 | bool isOpType(unsigned Opcode) { |
1111 | return Opcode == Instruction::SRem || Opcode == Instruction::URem; |
1112 | } |
1113 | }; |
1114 | |
1115 | /// Matches shift operations. |
1116 | template <typename LHS, typename RHS> |
1117 | inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L, |
1118 | const RHS &R) { |
1119 | return BinOpPred_match<LHS, RHS, is_shift_op>(L, R); |
1120 | } |
1121 | |
1122 | /// Matches logical shift operations. |
1123 | template <typename LHS, typename RHS> |
1124 | inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L, |
1125 | const RHS &R) { |
1126 | return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R); |
1127 | } |
1128 | |
1129 | /// Matches logical shift operations. |
1130 | template <typename LHS, typename RHS> |
1131 | inline BinOpPred_match<LHS, RHS, is_logical_shift_op> |
1132 | m_LogicalShift(const LHS &L, const RHS &R) { |
1133 | return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R); |
1134 | } |
1135 | |
1136 | /// Matches bitwise logic operations. |
1137 | template <typename LHS, typename RHS> |
1138 | inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op> |
1139 | m_BitwiseLogic(const LHS &L, const RHS &R) { |
1140 | return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R); |
1141 | } |
1142 | |
1143 | /// Matches integer division operations. |
1144 | template <typename LHS, typename RHS> |
1145 | inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L, |
1146 | const RHS &R) { |
1147 | return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R); |
1148 | } |
1149 | |
1150 | /// Matches integer remainder operations. |
1151 | template <typename LHS, typename RHS> |
1152 | inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L, |
1153 | const RHS &R) { |
1154 | return BinOpPred_match<LHS, RHS, is_irem_op>(L, R); |
1155 | } |
1156 | |
1157 | //===----------------------------------------------------------------------===// |
1158 | // Class that matches exact binary ops. |
1159 | // |
1160 | template <typename SubPattern_t> struct Exact_match { |
1161 | SubPattern_t SubPattern; |
1162 | |
1163 | Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} |
1164 | |
1165 | template <typename OpTy> bool match(OpTy *V) { |
1166 | if (auto *PEO = dyn_cast<PossiblyExactOperator>(V)) |
1167 | return PEO->isExact() && SubPattern.match(V); |
1168 | return false; |
1169 | } |
1170 | }; |
1171 | |
1172 | template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { |
1173 | return SubPattern; |
1174 | } |
1175 | |
1176 | //===----------------------------------------------------------------------===// |
1177 | // Matchers for CmpInst classes |
1178 | // |
1179 | |
1180 | template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, |
1181 | bool Commutable = false> |
1182 | struct CmpClass_match { |
1183 | PredicateTy &Predicate; |
1184 | LHS_t L; |
1185 | RHS_t R; |
1186 | |
1187 | // The evaluation order is always stable, regardless of Commutability. |
1188 | // The LHS is always matched first. |
1189 | CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) |
1190 | : Predicate(Pred), L(LHS), R(RHS) {} |
1191 | |
1192 | template <typename OpTy> bool match(OpTy *V) { |
1193 | if (auto *I = dyn_cast<Class>(V)) { |
1194 | if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { |
1195 | Predicate = I->getPredicate(); |
1196 | return true; |
1197 | } else if (Commutable && L.match(I->getOperand(1)) && |
1198 | R.match(I->getOperand(0))) { |
1199 | Predicate = I->getSwappedPredicate(); |
1200 | return true; |
1201 | } |
1202 | } |
1203 | return false; |
1204 | } |
1205 | }; |
1206 | |
1207 | template <typename LHS, typename RHS> |
1208 | inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> |
1209 | m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1210 | return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); |
1211 | } |
1212 | |
1213 | template <typename LHS, typename RHS> |
1214 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> |
1215 | m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1216 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); |
1217 | } |
1218 | |
1219 | template <typename LHS, typename RHS> |
1220 | inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> |
1221 | m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1222 | return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); |
1223 | } |
1224 | |
1225 | //===----------------------------------------------------------------------===// |
1226 | // Matchers for instructions with a given opcode and number of operands. |
1227 | // |
1228 | |
1229 | /// Matches instructions with Opcode and three operands. |
1230 | template <typename T0, unsigned Opcode> struct OneOps_match { |
1231 | T0 Op1; |
1232 | |
1233 | OneOps_match(const T0 &Op1) : Op1(Op1) {} |
1234 | |
1235 | template <typename OpTy> bool match(OpTy *V) { |
1236 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1237 | auto *I = cast<Instruction>(V); |
1238 | return Op1.match(I->getOperand(0)); |
1239 | } |
1240 | return false; |
1241 | } |
1242 | }; |
1243 | |
1244 | /// Matches instructions with Opcode and three operands. |
1245 | template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match { |
1246 | T0 Op1; |
1247 | T1 Op2; |
1248 | |
1249 | TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {} |
1250 | |
1251 | template <typename OpTy> bool match(OpTy *V) { |
1252 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1253 | auto *I = cast<Instruction>(V); |
1254 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)); |
1255 | } |
1256 | return false; |
1257 | } |
1258 | }; |
1259 | |
1260 | /// Matches instructions with Opcode and three operands. |
1261 | template <typename T0, typename T1, typename T2, unsigned Opcode> |
1262 | struct ThreeOps_match { |
1263 | T0 Op1; |
1264 | T1 Op2; |
1265 | T2 Op3; |
1266 | |
1267 | ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3) |
1268 | : Op1(Op1), Op2(Op2), Op3(Op3) {} |
1269 | |
1270 | template <typename OpTy> bool match(OpTy *V) { |
1271 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1272 | auto *I = cast<Instruction>(V); |
1273 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && |
1274 | Op3.match(I->getOperand(2)); |
1275 | } |
1276 | return false; |
1277 | } |
1278 | }; |
1279 | |
1280 | /// Matches SelectInst. |
1281 | template <typename Cond, typename LHS, typename RHS> |
1282 | inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> |
1283 | m_Select(const Cond &C, const LHS &L, const RHS &R) { |
1284 | return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R); |
1285 | } |
1286 | |
1287 | /// This matches a select of two constants, e.g.: |
1288 | /// m_SelectCst<-1, 0>(m_Value(V)) |
1289 | template <int64_t L, int64_t R, typename Cond> |
1290 | inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>, |
1291 | Instruction::Select> |
1292 | m_SelectCst(const Cond &C) { |
1293 | return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>()); |
1294 | } |
1295 | |
1296 | /// Matches FreezeInst. |
1297 | template <typename OpTy> |
1298 | inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) { |
1299 | return OneOps_match<OpTy, Instruction::Freeze>(Op); |
1300 | } |
1301 | |
1302 | /// Matches InsertElementInst. |
1303 | template <typename Val_t, typename Elt_t, typename Idx_t> |
1304 | inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement> |
1305 | m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) { |
1306 | return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>( |
1307 | Val, Elt, Idx); |
1308 | } |
1309 | |
1310 | /// Matches ExtractElementInst. |
1311 | template <typename Val_t, typename Idx_t> |
1312 | inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement> |
1313 | m_ExtractElement(const Val_t &Val, const Idx_t &Idx) { |
1314 | return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx); |
1315 | } |
1316 | |
1317 | /// Matches ShuffleVectorInst. |
1318 | template <typename V1_t, typename V2_t, typename Mask_t> |
1319 | inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector> |
1320 | m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) { |
1321 | return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2, |
1322 | m); |
1323 | } |
1324 | |
1325 | /// Matches LoadInst. |
1326 | template <typename OpTy> |
1327 | inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) { |
1328 | return OneOps_match<OpTy, Instruction::Load>(Op); |
1329 | } |
1330 | |
1331 | /// Matches StoreInst. |
1332 | template <typename ValueOpTy, typename PointerOpTy> |
1333 | inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store> |
1334 | m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { |
1335 | return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp, |
1336 | PointerOp); |
1337 | } |
1338 | |
1339 | //===----------------------------------------------------------------------===// |
1340 | // Matchers for CastInst classes |
1341 | // |
1342 | |
1343 | template <typename Op_t, unsigned Opcode> struct CastClass_match { |
1344 | Op_t Op; |
1345 | |
1346 | CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {} |
1347 | |
1348 | template <typename OpTy> bool match(OpTy *V) { |
1349 | if (auto *O = dyn_cast<Operator>(V)) |
1350 | return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); |
1351 | return false; |
1352 | } |
1353 | }; |
1354 | |
1355 | /// Matches BitCast. |
1356 | template <typename OpTy> |
1357 | inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) { |
1358 | return CastClass_match<OpTy, Instruction::BitCast>(Op); |
1359 | } |
1360 | |
1361 | /// Matches PtrToInt. |
1362 | template <typename OpTy> |
1363 | inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { |
1364 | return CastClass_match<OpTy, Instruction::PtrToInt>(Op); |
1365 | } |
1366 | |
1367 | /// Matches Trunc. |
1368 | template <typename OpTy> |
1369 | inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { |
1370 | return CastClass_match<OpTy, Instruction::Trunc>(Op); |
1371 | } |
1372 | |
1373 | template <typename OpTy> |
1374 | inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy> |
1375 | m_TruncOrSelf(const OpTy &Op) { |
1376 | return m_CombineOr(m_Trunc(Op), Op); |
1377 | } |
1378 | |
1379 | /// Matches SExt. |
1380 | template <typename OpTy> |
1381 | inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) { |
1382 | return CastClass_match<OpTy, Instruction::SExt>(Op); |
1383 | } |
1384 | |
1385 | /// Matches ZExt. |
1386 | template <typename OpTy> |
1387 | inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) { |
1388 | return CastClass_match<OpTy, Instruction::ZExt>(Op); |
1389 | } |
1390 | |
1391 | template <typename OpTy> |
1392 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy> |
1393 | m_ZExtOrSelf(const OpTy &Op) { |
1394 | return m_CombineOr(m_ZExt(Op), Op); |
1395 | } |
1396 | |
1397 | template <typename OpTy> |
1398 | inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy> |
1399 | m_SExtOrSelf(const OpTy &Op) { |
1400 | return m_CombineOr(m_SExt(Op), Op); |
1401 | } |
1402 | |
1403 | template <typename OpTy> |
1404 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, |
1405 | CastClass_match<OpTy, Instruction::SExt>> |
1406 | m_ZExtOrSExt(const OpTy &Op) { |
1407 | return m_CombineOr(m_ZExt(Op), m_SExt(Op)); |
1408 | } |
1409 | |
1410 | template <typename OpTy> |
1411 | inline match_combine_or< |
1412 | match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, |
1413 | CastClass_match<OpTy, Instruction::SExt>>, |
1414 | OpTy> |
1415 | m_ZExtOrSExtOrSelf(const OpTy &Op) { |
1416 | return m_CombineOr(m_ZExtOrSExt(Op), Op); |
1417 | } |
1418 | |
1419 | /// Matches UIToFP. |
1420 | template <typename OpTy> |
1421 | inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) { |
1422 | return CastClass_match<OpTy, Instruction::UIToFP>(Op); |
1423 | } |
1424 | |
1425 | /// Matches SIToFP. |
1426 | template <typename OpTy> |
1427 | inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) { |
1428 | return CastClass_match<OpTy, Instruction::SIToFP>(Op); |
1429 | } |
1430 | |
1431 | /// Matches FPTrunc |
1432 | template <typename OpTy> |
1433 | inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) { |
1434 | return CastClass_match<OpTy, Instruction::FPTrunc>(Op); |
1435 | } |
1436 | |
1437 | /// Matches FPExt |
1438 | template <typename OpTy> |
1439 | inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { |
1440 | return CastClass_match<OpTy, Instruction::FPExt>(Op); |
1441 | } |
1442 | |
1443 | //===----------------------------------------------------------------------===// |
1444 | // Matchers for control flow. |
1445 | // |
1446 | |
1447 | struct br_match { |
1448 | BasicBlock *&Succ; |
1449 | |
1450 | br_match(BasicBlock *&Succ) : Succ(Succ) {} |
1451 | |
1452 | template <typename OpTy> bool match(OpTy *V) { |
1453 | if (auto *BI = dyn_cast<BranchInst>(V)) |
1454 | if (BI->isUnconditional()) { |
1455 | Succ = BI->getSuccessor(0); |
1456 | return true; |
1457 | } |
1458 | return false; |
1459 | } |
1460 | }; |
1461 | |
1462 | inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } |
1463 | |
1464 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> |
1465 | struct brc_match { |
1466 | Cond_t Cond; |
1467 | TrueBlock_t T; |
1468 | FalseBlock_t F; |
1469 | |
1470 | brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) |
1471 | : Cond(C), T(t), F(f) {} |
1472 | |
1473 | template <typename OpTy> bool match(OpTy *V) { |
1474 | if (auto *BI = dyn_cast<BranchInst>(V)) |
1475 | if (BI->isConditional() && Cond.match(BI->getCondition())) |
1476 | return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); |
1477 | return false; |
1478 | } |
1479 | }; |
1480 | |
1481 | template <typename Cond_t> |
1482 | inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>> |
1483 | m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { |
1484 | return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>( |
1485 | C, m_BasicBlock(T), m_BasicBlock(F)); |
1486 | } |
1487 | |
1488 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> |
1489 | inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t> |
1490 | m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { |
1491 | return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F); |
1492 | } |
1493 | |
1494 | //===----------------------------------------------------------------------===// |
1495 | // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). |
1496 | // |
1497 | |
1498 | template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t, |
1499 | bool Commutable = false> |
1500 | struct MaxMin_match { |
1501 | LHS_t L; |
1502 | RHS_t R; |
1503 | |
1504 | // The evaluation order is always stable, regardless of Commutability. |
1505 | // The LHS is always matched first. |
1506 | MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
1507 | |
1508 | template <typename OpTy> bool match(OpTy *V) { |
1509 | // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". |
1510 | auto *SI = dyn_cast<SelectInst>(V); |
1511 | if (!SI) |
1512 | return false; |
1513 | auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition()); |
1514 | if (!Cmp) |
1515 | return false; |
1516 | // At this point we have a select conditioned on a comparison. Check that |
1517 | // it is the values returned by the select that are being compared. |
1518 | Value *TrueVal = SI->getTrueValue(); |
1519 | Value *FalseVal = SI->getFalseValue(); |
1520 | Value *LHS = Cmp->getOperand(0); |
1521 | Value *RHS = Cmp->getOperand(1); |
1522 | if ((TrueVal != LHS || FalseVal != RHS) && |
1523 | (TrueVal != RHS || FalseVal != LHS)) |
1524 | return false; |
1525 | typename CmpInst_t::Predicate Pred = |
1526 | LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); |
1527 | // Does "(x pred y) ? x : y" represent the desired max/min operation? |
1528 | if (!Pred_t::match(Pred)) |
1529 | return false; |
1530 | // It does! Bind the operands. |
1531 | return (L.match(LHS) && R.match(RHS)) || |
1532 | (Commutable && L.match(RHS) && R.match(LHS)); |
1533 | } |
1534 | }; |
1535 | |
1536 | /// Helper class for identifying signed max predicates. |
1537 | struct smax_pred_ty { |
1538 | static bool match(ICmpInst::Predicate Pred) { |
1539 | return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; |
1540 | } |
1541 | }; |
1542 | |
1543 | /// Helper class for identifying signed min predicates. |
1544 | struct smin_pred_ty { |
1545 | static bool match(ICmpInst::Predicate Pred) { |
1546 | return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE; |
1547 | } |
1548 | }; |
1549 | |
1550 | /// Helper class for identifying unsigned max predicates. |
1551 | struct umax_pred_ty { |
1552 | static bool match(ICmpInst::Predicate Pred) { |
1553 | return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE; |
1554 | } |
1555 | }; |
1556 | |
1557 | /// Helper class for identifying unsigned min predicates. |
1558 | struct umin_pred_ty { |
1559 | static bool match(ICmpInst::Predicate Pred) { |
1560 | return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE; |
1561 | } |
1562 | }; |
1563 | |
1564 | /// Helper class for identifying ordered max predicates. |
1565 | struct ofmax_pred_ty { |
1566 | static bool match(FCmpInst::Predicate Pred) { |
1567 | return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE; |
1568 | } |
1569 | }; |
1570 | |
1571 | /// Helper class for identifying ordered min predicates. |
1572 | struct ofmin_pred_ty { |
1573 | static bool match(FCmpInst::Predicate Pred) { |
1574 | return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE; |
1575 | } |
1576 | }; |
1577 | |
1578 | /// Helper class for identifying unordered max predicates. |
1579 | struct ufmax_pred_ty { |
1580 | static bool match(FCmpInst::Predicate Pred) { |
1581 | return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE; |
1582 | } |
1583 | }; |
1584 | |
1585 | /// Helper class for identifying unordered min predicates. |
1586 | struct ufmin_pred_ty { |
1587 | static bool match(FCmpInst::Predicate Pred) { |
1588 | return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE; |
1589 | } |
1590 | }; |
1591 | |
1592 | template <typename LHS, typename RHS> |
1593 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L, |
1594 | const RHS &R) { |
1595 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R); |
1596 | } |
1597 | |
1598 | template <typename LHS, typename RHS> |
1599 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L, |
1600 | const RHS &R) { |
1601 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R); |
1602 | } |
1603 | |
1604 | template <typename LHS, typename RHS> |
1605 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L, |
1606 | const RHS &R) { |
1607 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R); |
1608 | } |
1609 | |
1610 | template <typename LHS, typename RHS> |
1611 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, |
1612 | const RHS &R) { |
1613 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); |
1614 | } |
1615 | |
1616 | /// Match an 'ordered' floating point maximum function. |
1617 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1618 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1619 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' |
1620 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1621 | /// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate. |
1622 | /// |
1623 | /// max(L, R) iff L and R are not NaN |
1624 | /// m_OrdFMax(L, R) = R iff L or R are NaN |
1625 | template <typename LHS, typename RHS> |
1626 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L, |
1627 | const RHS &R) { |
1628 | return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R); |
1629 | } |
1630 | |
1631 | /// Match an 'ordered' floating point minimum function. |
1632 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1633 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1634 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' |
1635 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1636 | /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. |
1637 | /// |
1638 | /// min(L, R) iff L and R are not NaN |
1639 | /// m_OrdFMin(L, R) = R iff L or R are NaN |
1640 | template <typename LHS, typename RHS> |
1641 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L, |
1642 | const RHS &R) { |
1643 | return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R); |
1644 | } |
1645 | |
1646 | /// Match an 'unordered' floating point maximum function. |
1647 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1648 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1649 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' |
1650 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1651 | /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. |
1652 | /// |
1653 | /// max(L, R) iff L and R are not NaN |
1654 | /// m_UnordFMax(L, R) = L iff L or R are NaN |
1655 | template <typename LHS, typename RHS> |
1656 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty> |
1657 | m_UnordFMax(const LHS &L, const RHS &R) { |
1658 | return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R); |
1659 | } |
1660 | |
1661 | /// Match an 'unordered' floating point minimum function. |
1662 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1663 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1664 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' |
1665 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1666 | /// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. |
1667 | /// |
1668 | /// min(L, R) iff L and R are not NaN |
1669 | /// m_UnordFMin(L, R) = L iff L or R are NaN |
1670 | template <typename LHS, typename RHS> |
1671 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty> |
1672 | m_UnordFMin(const LHS &L, const RHS &R) { |
1673 | return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R); |
1674 | } |
1675 | |
1676 | //===----------------------------------------------------------------------===// |
1677 | // Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b |
1678 | // Note that S might be matched to other instructions than AddInst. |
1679 | // |
1680 | |
1681 | template <typename LHS_t, typename RHS_t, typename Sum_t> |
1682 | struct UAddWithOverflow_match { |
1683 | LHS_t L; |
1684 | RHS_t R; |
1685 | Sum_t S; |
1686 | |
1687 | UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) |
1688 | : L(L), R(R), S(S) {} |
1689 | |
1690 | template <typename OpTy> bool match(OpTy *V) { |
1691 | Value *ICmpLHS, *ICmpRHS; |
1692 | ICmpInst::Predicate Pred; |
1693 | if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) |
1694 | return false; |
1695 | |
1696 | Value *AddLHS, *AddRHS; |
1697 | auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); |
1698 | |
1699 | // (a + b) u< a, (a + b) u< b |
1700 | if (Pred == ICmpInst::ICMP_ULT) |
1701 | if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) |
1702 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); |
1703 | |
1704 | // a >u (a + b), b >u (a + b) |
1705 | if (Pred == ICmpInst::ICMP_UGT) |
1706 | if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) |
1707 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); |
1708 | |
1709 | Value *Op1; |
1710 | auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); |
1711 | // (a ^ -1) <u b |
1712 | if (Pred == ICmpInst::ICMP_ULT) { |
1713 | if (XorExpr.match(ICmpLHS)) |
1714 | return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS); |
1715 | } |
1716 | // b > u (a ^ -1) |
1717 | if (Pred == ICmpInst::ICMP_UGT) { |
1718 | if (XorExpr.match(ICmpRHS)) |
1719 | return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); |
1720 | } |
1721 | |
1722 | // Match special-case for increment-by-1. |
1723 | if (Pred == ICmpInst::ICMP_EQ) { |
1724 | // (a + 1) == 0 |
1725 | // (1 + a) == 0 |
1726 | if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && |
1727 | (m_One().match(AddLHS) || m_One().match(AddRHS))) |
1728 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); |
1729 | // 0 == (a + 1) |
1730 | // 0 == (1 + a) |
1731 | if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && |
1732 | (m_One().match(AddLHS) || m_One().match(AddRHS))) |
1733 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); |
1734 | } |
1735 | |
1736 | return false; |
1737 | } |
1738 | }; |
1739 | |
1740 | /// Match an icmp instruction checking for unsigned overflow on addition. |
1741 | /// |
1742 | /// S is matched to the addition whose result is being checked for overflow, and |
1743 | /// L and R are matched to the LHS and RHS of S. |
1744 | template <typename LHS_t, typename RHS_t, typename Sum_t> |
1745 | UAddWithOverflow_match<LHS_t, RHS_t, Sum_t> |
1746 | m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { |
1747 | return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S); |
1748 | } |
1749 | |
1750 | template <typename Opnd_t> struct Argument_match { |
1751 | unsigned OpI; |
1752 | Opnd_t Val; |
1753 | |
1754 | Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} |
1755 | |
1756 | template <typename OpTy> bool match(OpTy *V) { |
1757 | // FIXME: Should likely be switched to use `CallBase`. |
1758 | if (const auto *CI = dyn_cast<CallInst>(V)) |
1759 | return Val.match(CI->getArgOperand(OpI)); |
1760 | return false; |
1761 | } |
1762 | }; |
1763 | |
1764 | /// Match an argument. |
1765 | template <unsigned OpI, typename Opnd_t> |
1766 | inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) { |
1767 | return Argument_match<Opnd_t>(OpI, Op); |
1768 | } |
1769 | |
1770 | /// Intrinsic matchers. |
1771 | struct IntrinsicID_match { |
1772 | unsigned ID; |
1773 | |
1774 | IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} |
1775 | |
1776 | template <typename OpTy> bool match(OpTy *V) { |
1777 | if (const auto *CI = dyn_cast<CallInst>(V)) |
1778 | if (const auto *F = CI->getCalledFunction()) |
1779 | return F->getIntrinsicID() == ID; |
1780 | return false; |
1781 | } |
1782 | }; |
1783 | |
1784 | /// Intrinsic matches are combinations of ID matchers, and argument |
1785 | /// matchers. Higher arity matcher are defined recursively in terms of and-ing |
1786 | /// them with lower arity matchers. Here's some convenient typedefs for up to |
1787 | /// several arguments, and more can be added as needed |
1788 | template <typename T0 = void, typename T1 = void, typename T2 = void, |
1789 | typename T3 = void, typename T4 = void, typename T5 = void, |
1790 | typename T6 = void, typename T7 = void, typename T8 = void, |
1791 | typename T9 = void, typename T10 = void> |
1792 | struct m_Intrinsic_Ty; |
1793 | template <typename T0> struct m_Intrinsic_Ty<T0> { |
1794 | using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>; |
1795 | }; |
1796 | template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> { |
1797 | using Ty = |
1798 | match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>; |
1799 | }; |
1800 | template <typename T0, typename T1, typename T2> |
1801 | struct m_Intrinsic_Ty<T0, T1, T2> { |
1802 | using Ty = |
1803 | match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty, |
1804 | Argument_match<T2>>; |
1805 | }; |
1806 | template <typename T0, typename T1, typename T2, typename T3> |
1807 | struct m_Intrinsic_Ty<T0, T1, T2, T3> { |
1808 | using Ty = |
1809 | match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty, |
1810 | Argument_match<T3>>; |
1811 | }; |
1812 | |
1813 | template <typename T0, typename T1, typename T2, typename T3, typename T4> |
1814 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> { |
1815 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty, |
1816 | Argument_match<T4>>; |
1817 | }; |
1818 | |
1819 | /// Match intrinsic calls like this: |
1820 | /// m_Intrinsic<Intrinsic::fabs>(m_Value(X)) |
1821 | template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() { |
1822 | return IntrinsicID_match(IntrID); |
1823 | } |
1824 | |
1825 | template <Intrinsic::ID IntrID, typename T0> |
1826 | inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) { |
1827 | return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0)); |
1828 | } |
1829 | |
1830 | template <Intrinsic::ID IntrID, typename T0, typename T1> |
1831 | inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0, |
1832 | const T1 &Op1) { |
1833 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1)); |
1834 | } |
1835 | |
1836 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2> |
1837 | inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty |
1838 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) { |
1839 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2)); |
1840 | } |
1841 | |
1842 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, |
1843 | typename T3> |
1844 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty |
1845 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { |
1846 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3)); |
1847 | } |
1848 | |
1849 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, |
1850 | typename T3, typename T4> |
1851 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty |
1852 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, |
1853 | const T4 &Op4) { |
1854 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3), |
1855 | m_Argument<4>(Op4)); |
1856 | } |
1857 | |
1858 | // Helper intrinsic matching specializations. |
1859 | template <typename Opnd0> |
1860 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) { |
1861 | return m_Intrinsic<Intrinsic::bitreverse>(Op0); |
1862 | } |
1863 | |
1864 | template <typename Opnd0> |
1865 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) { |
1866 | return m_Intrinsic<Intrinsic::bswap>(Op0); |
1867 | } |
1868 | |
1869 | template <typename Opnd0> |
1870 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) { |
1871 | return m_Intrinsic<Intrinsic::fabs>(Op0); |
1872 | } |
1873 | |
1874 | template <typename Opnd0> |
1875 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) { |
1876 | return m_Intrinsic<Intrinsic::canonicalize>(Op0); |
1877 | } |
1878 | |
1879 | template <typename Opnd0, typename Opnd1> |
1880 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0, |
1881 | const Opnd1 &Op1) { |
1882 | return m_Intrinsic<Intrinsic::minnum>(Op0, Op1); |
1883 | } |
1884 | |
1885 | template <typename Opnd0, typename Opnd1> |
1886 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, |
1887 | const Opnd1 &Op1) { |
1888 | return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); |
1889 | } |
1890 | |
1891 | //===----------------------------------------------------------------------===// |
1892 | // Matchers for two-operands operators with the operators in either order |
1893 | // |
1894 | |
1895 | /// Matches a BinaryOperator with LHS and RHS in either order. |
1896 | template <typename LHS, typename RHS> |
1897 | inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { |
1898 | return AnyBinaryOp_match<LHS, RHS, true>(L, R); |
1899 | } |
1900 | |
1901 | /// Matches an ICmp with a predicate over LHS and RHS in either order. |
1902 | /// Swaps the predicate if operands are commuted. |
1903 | template <typename LHS, typename RHS> |
1904 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> |
1905 | m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1906 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, |
1907 | R); |
1908 | } |
1909 | |
1910 | /// Matches a Add with LHS and RHS in either order. |
1911 | template <typename LHS, typename RHS> |
1912 | inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, |
1913 | const RHS &R) { |
1914 | return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R); |
1915 | } |
1916 | |
1917 | /// Matches a Mul with LHS and RHS in either order. |
1918 | template <typename LHS, typename RHS> |
1919 | inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L, |
1920 | const RHS &R) { |
1921 | return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R); |
1922 | } |
1923 | |
1924 | /// Matches an And with LHS and RHS in either order. |
1925 | template <typename LHS, typename RHS> |
1926 | inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L, |
1927 | const RHS &R) { |
1928 | return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R); |
1929 | } |
1930 | |
1931 | /// Matches an Or with LHS and RHS in either order. |
1932 | template <typename LHS, typename RHS> |
1933 | inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L, |
1934 | const RHS &R) { |
1935 | return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R); |
1936 | } |
1937 | |
1938 | /// Matches an Xor with LHS and RHS in either order. |
1939 | template <typename LHS, typename RHS> |
1940 | inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L, |
1941 | const RHS &R) { |
1942 | return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R); |
1943 | } |
1944 | |
1945 | /// Matches a 'Neg' as 'sub 0, V'. |
1946 | template <typename ValTy> |
1947 | inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub> |
1948 | m_Neg(const ValTy &V) { |
1949 | return m_Sub(m_ZeroInt(), V); |
1950 | } |
1951 | |
1952 | /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. |
1953 | template <typename ValTy> |
1954 | inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> |
1955 | m_Not(const ValTy &V) { |
1956 | return m_c_Xor(V, m_AllOnes()); |
1957 | } |
1958 | |
1959 | /// Matches an SMin with LHS and RHS in either order. |
1960 | template <typename LHS, typename RHS> |
1961 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> |
1962 | m_c_SMin(const LHS &L, const RHS &R) { |
1963 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R); |
1964 | } |
1965 | /// Matches an SMax with LHS and RHS in either order. |
1966 | template <typename LHS, typename RHS> |
1967 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true> |
1968 | m_c_SMax(const LHS &L, const RHS &R) { |
1969 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R); |
1970 | } |
1971 | /// Matches a UMin with LHS and RHS in either order. |
1972 | template <typename LHS, typename RHS> |
1973 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true> |
1974 | m_c_UMin(const LHS &L, const RHS &R) { |
1975 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R); |
1976 | } |
1977 | /// Matches a UMax with LHS and RHS in either order. |
1978 | template <typename LHS, typename RHS> |
1979 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true> |
1980 | m_c_UMax(const LHS &L, const RHS &R) { |
1981 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); |
1982 | } |
1983 | |
1984 | /// Matches FAdd with LHS and RHS in either order. |
1985 | template <typename LHS, typename RHS> |
1986 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> |
1987 | m_c_FAdd(const LHS &L, const RHS &R) { |
1988 | return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R); |
1989 | } |
1990 | |
1991 | /// Matches FMul with LHS and RHS in either order. |
1992 | template <typename LHS, typename RHS> |
1993 | inline BinaryOp_match<LHS, RHS, Instruction::FMul, true> |
1994 | m_c_FMul(const LHS &L, const RHS &R) { |
1995 | return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R); |
1996 | } |
1997 | |
1998 | template <typename Opnd_t> struct Signum_match { |
1999 | Opnd_t Val; |
2000 | Signum_match(const Opnd_t &V) : Val(V) {} |
2001 | |
2002 | template <typename OpTy> bool match(OpTy *V) { |
2003 | unsigned TypeSize = V->getType()->getScalarSizeInBits(); |
2004 | if (TypeSize == 0) |
2005 | return false; |
2006 | |
2007 | unsigned ShiftWidth = TypeSize - 1; |
2008 | Value *OpL = nullptr, *OpR = nullptr; |
2009 | |
2010 | // This is the representation of signum we match: |
2011 | // |
2012 | // signum(x) == (x >> 63) | (-x >>u 63) |
2013 | // |
2014 | // An i1 value is its own signum, so it's correct to match |
2015 | // |
2016 | // signum(x) == (x >> 0) | (-x >>u 0) |
2017 | // |
2018 | // for i1 values. |
2019 | |
2020 | auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); |
2021 | auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); |
2022 | auto Signum = m_Or(LHS, RHS); |
2023 | |
2024 | return Signum.match(V) && OpL == OpR && Val.match(OpL); |
2025 | } |
2026 | }; |
2027 | |
2028 | /// Matches a signum pattern. |
2029 | /// |
2030 | /// signum(x) = |
2031 | /// x > 0 -> 1 |
2032 | /// x == 0 -> 0 |
2033 | /// x < 0 -> -1 |
2034 | template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) { |
2035 | return Signum_match<Val_t>(V); |
2036 | } |
2037 | |
2038 | template <int Ind, typename Opnd_t> struct ExtractValue_match { |
2039 | Opnd_t Val; |
2040 | ExtractValue_match(const Opnd_t &V) : Val(V) {} |
2041 | |
2042 | template <typename OpTy> bool match(OpTy *V) { |
2043 | if (auto *I = dyn_cast<ExtractValueInst>(V)) |
2044 | return I->getNumIndices() == 1 && I->getIndices()[0] == Ind && |
2045 | Val.match(I->getAggregateOperand()); |
2046 | return false; |
2047 | } |
2048 | }; |
2049 | |
2050 | /// Match a single index ExtractValue instruction. |
2051 | /// For example m_ExtractValue<1>(...) |
2052 | template <int Ind, typename Val_t> |
2053 | inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { |
2054 | return ExtractValue_match<Ind, Val_t>(V); |
2055 | } |
2056 | |
2057 | /// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or |
2058 | /// the constant expression |
2059 | /// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>` |
2060 | /// under the right conditions determined by DataLayout. |
2061 | struct VScaleVal_match { |
2062 | private: |
2063 | template <typename Base, typename Offset> |
2064 | inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr> |
2065 | m_OffsetGep(const Base &B, const Offset &O) { |
2066 | return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O); |
2067 | } |
2068 | |
2069 | public: |
2070 | const DataLayout &DL; |
2071 | VScaleVal_match(const DataLayout &DL) : DL(DL) {} |
2072 | |
2073 | template <typename ITy> bool match(ITy *V) { |
2074 | if (m_Intrinsic<Intrinsic::vscale>().match(V)) |
2075 | return true; |
2076 | |
2077 | if (m_PtrToInt(m_OffsetGep(m_Zero(), m_SpecificInt(1))).match(V)) { |
2078 | Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType(); |
2079 | Type *DerefTy = PtrTy->getPointerElementType(); |
2080 | if (DerefTy->isVectorTy() && DerefTy->getVectorIsScalable() && |
2081 | DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) |
2082 | return true; |
2083 | } |
2084 | |
2085 | return false; |
2086 | } |
2087 | }; |
2088 | |
2089 | inline VScaleVal_match m_VScale(const DataLayout &DL) { |
2090 | return VScaleVal_match(DL); |
2091 | } |
2092 | |
2093 | } // end namespace PatternMatch |
2094 | } // end namespace llvm |
2095 | |
2096 | #endif // LLVM_IR_PATTERNMATCH_H |