File: | llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp |
Warning: | line 5364, column 42 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InstCombineCompares.cpp --------------------------------------------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file implements the visitICmp and visitFCmp functions. | ||||
10 | // | ||||
11 | //===----------------------------------------------------------------------===// | ||||
12 | |||||
13 | #include "InstCombineInternal.h" | ||||
14 | #include "llvm/ADT/APSInt.h" | ||||
15 | #include "llvm/ADT/SetVector.h" | ||||
16 | #include "llvm/ADT/Statistic.h" | ||||
17 | #include "llvm/Analysis/ConstantFolding.h" | ||||
18 | #include "llvm/Analysis/InstructionSimplify.h" | ||||
19 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||
20 | #include "llvm/IR/ConstantRange.h" | ||||
21 | #include "llvm/IR/DataLayout.h" | ||||
22 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||
23 | #include "llvm/IR/IntrinsicInst.h" | ||||
24 | #include "llvm/IR/PatternMatch.h" | ||||
25 | #include "llvm/Support/Debug.h" | ||||
26 | #include "llvm/Support/KnownBits.h" | ||||
27 | |||||
28 | using namespace llvm; | ||||
29 | using namespace PatternMatch; | ||||
30 | |||||
31 | #define DEBUG_TYPE"instcombine" "instcombine" | ||||
32 | |||||
33 | // How many times is a select replaced by one of its operands? | ||||
34 | STATISTIC(NumSel, "Number of select opts")static llvm::Statistic NumSel = {"instcombine", "NumSel", "Number of select opts" }; | ||||
35 | |||||
36 | |||||
37 | /// Compute Result = In1+In2, returning true if the result overflowed for this | ||||
38 | /// type. | ||||
39 | static bool addWithOverflow(APInt &Result, const APInt &In1, | ||||
40 | const APInt &In2, bool IsSigned = false) { | ||||
41 | bool Overflow; | ||||
42 | if (IsSigned) | ||||
43 | Result = In1.sadd_ov(In2, Overflow); | ||||
44 | else | ||||
45 | Result = In1.uadd_ov(In2, Overflow); | ||||
46 | |||||
47 | return Overflow; | ||||
48 | } | ||||
49 | |||||
50 | /// Compute Result = In1-In2, returning true if the result overflowed for this | ||||
51 | /// type. | ||||
52 | static bool subWithOverflow(APInt &Result, const APInt &In1, | ||||
53 | const APInt &In2, bool IsSigned = false) { | ||||
54 | bool Overflow; | ||||
55 | if (IsSigned) | ||||
56 | Result = In1.ssub_ov(In2, Overflow); | ||||
57 | else | ||||
58 | Result = In1.usub_ov(In2, Overflow); | ||||
59 | |||||
60 | return Overflow; | ||||
61 | } | ||||
62 | |||||
63 | /// Given an icmp instruction, return true if any use of this comparison is a | ||||
64 | /// branch on sign bit comparison. | ||||
65 | static bool hasBranchUse(ICmpInst &I) { | ||||
66 | for (auto *U : I.users()) | ||||
67 | if (isa<BranchInst>(U)) | ||||
68 | return true; | ||||
69 | return false; | ||||
70 | } | ||||
71 | |||||
72 | /// Returns true if the exploded icmp can be expressed as a signed comparison | ||||
73 | /// to zero and updates the predicate accordingly. | ||||
74 | /// The signedness of the comparison is preserved. | ||||
75 | /// TODO: Refactor with decomposeBitTestICmp()? | ||||
76 | static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { | ||||
77 | if (!ICmpInst::isSigned(Pred)) | ||||
78 | return false; | ||||
79 | |||||
80 | if (C.isNullValue()) | ||||
81 | return ICmpInst::isRelational(Pred); | ||||
82 | |||||
83 | if (C.isOneValue()) { | ||||
84 | if (Pred == ICmpInst::ICMP_SLT) { | ||||
85 | Pred = ICmpInst::ICMP_SLE; | ||||
86 | return true; | ||||
87 | } | ||||
88 | } else if (C.isAllOnesValue()) { | ||||
89 | if (Pred == ICmpInst::ICMP_SGT) { | ||||
90 | Pred = ICmpInst::ICMP_SGE; | ||||
91 | return true; | ||||
92 | } | ||||
93 | } | ||||
94 | |||||
95 | return false; | ||||
96 | } | ||||
97 | |||||
98 | /// Given a signed integer type and a set of known zero and one bits, compute | ||||
99 | /// the maximum and minimum values that could have the specified known zero and | ||||
100 | /// known one bits, returning them in Min/Max. | ||||
101 | /// TODO: Move to method on KnownBits struct? | ||||
102 | static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, | ||||
103 | APInt &Min, APInt &Max) { | ||||
104 | assert(Known.getBitWidth() == Min.getBitWidth() &&((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 106, __PRETTY_FUNCTION__)) | ||||
105 | Known.getBitWidth() == Max.getBitWidth() &&((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 106, __PRETTY_FUNCTION__)) | ||||
106 | "KnownZero, KnownOne and Min, Max must have equal bitwidth.")((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 106, __PRETTY_FUNCTION__)); | ||||
107 | APInt UnknownBits = ~(Known.Zero|Known.One); | ||||
108 | |||||
109 | // The minimum value is when all unknown bits are zeros, EXCEPT for the sign | ||||
110 | // bit if it is unknown. | ||||
111 | Min = Known.One; | ||||
112 | Max = Known.One|UnknownBits; | ||||
113 | |||||
114 | if (UnknownBits.isNegative()) { // Sign bit is unknown | ||||
115 | Min.setSignBit(); | ||||
116 | Max.clearSignBit(); | ||||
117 | } | ||||
118 | } | ||||
119 | |||||
120 | /// Given an unsigned integer type and a set of known zero and one bits, compute | ||||
121 | /// the maximum and minimum values that could have the specified known zero and | ||||
122 | /// known one bits, returning them in Min/Max. | ||||
123 | /// TODO: Move to method on KnownBits struct? | ||||
124 | static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known, | ||||
125 | APInt &Min, APInt &Max) { | ||||
126 | assert(Known.getBitWidth() == Min.getBitWidth() &&((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 128, __PRETTY_FUNCTION__)) | ||||
127 | Known.getBitWidth() == Max.getBitWidth() &&((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 128, __PRETTY_FUNCTION__)) | ||||
128 | "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.")((Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth () == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth." ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == Min.getBitWidth() && Known.getBitWidth() == Max.getBitWidth() && \"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 128, __PRETTY_FUNCTION__)); | ||||
129 | APInt UnknownBits = ~(Known.Zero|Known.One); | ||||
130 | |||||
131 | // The minimum value is when the unknown bits are all zeros. | ||||
132 | Min = Known.One; | ||||
133 | // The maximum value is when the unknown bits are all ones. | ||||
134 | Max = Known.One|UnknownBits; | ||||
135 | } | ||||
136 | |||||
137 | /// This is called when we see this pattern: | ||||
138 | /// cmp pred (load (gep GV, ...)), cmpcst | ||||
139 | /// where GV is a global variable with a constant initializer. Try to simplify | ||||
140 | /// this into some simple computation that does not need the load. For example | ||||
141 | /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". | ||||
142 | /// | ||||
143 | /// If AndCst is non-null, then the loaded value is masked with that constant | ||||
144 | /// before doing the comparison. This handles cases like "A[i]&4 == 0". | ||||
145 | Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, | ||||
146 | GlobalVariable *GV, | ||||
147 | CmpInst &ICI, | ||||
148 | ConstantInt *AndCst) { | ||||
149 | Constant *Init = GV->getInitializer(); | ||||
150 | if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) | ||||
151 | return nullptr; | ||||
152 | |||||
153 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); | ||||
154 | // Don't blow up on huge arrays. | ||||
155 | if (ArrayElementCount > MaxArraySizeForCombine) | ||||
156 | return nullptr; | ||||
157 | |||||
158 | // There are many forms of this optimization we can handle, for now, just do | ||||
159 | // the simple index into a single-dimensional array. | ||||
160 | // | ||||
161 | // Require: GEP GV, 0, i {{, constant indices}} | ||||
162 | if (GEP->getNumOperands() < 3 || | ||||
163 | !isa<ConstantInt>(GEP->getOperand(1)) || | ||||
164 | !cast<ConstantInt>(GEP->getOperand(1))->isZero() || | ||||
165 | isa<Constant>(GEP->getOperand(2))) | ||||
166 | return nullptr; | ||||
167 | |||||
168 | // Check that indices after the variable are constants and in-range for the | ||||
169 | // type they index. Collect the indices. This is typically for arrays of | ||||
170 | // structs. | ||||
171 | SmallVector<unsigned, 4> LaterIndices; | ||||
172 | |||||
173 | Type *EltTy = Init->getType()->getArrayElementType(); | ||||
174 | for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { | ||||
175 | ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); | ||||
176 | if (!Idx) return nullptr; // Variable index. | ||||
177 | |||||
178 | uint64_t IdxVal = Idx->getZExtValue(); | ||||
179 | if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. | ||||
180 | |||||
181 | if (StructType *STy = dyn_cast<StructType>(EltTy)) | ||||
182 | EltTy = STy->getElementType(IdxVal); | ||||
183 | else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { | ||||
184 | if (IdxVal >= ATy->getNumElements()) return nullptr; | ||||
185 | EltTy = ATy->getElementType(); | ||||
186 | } else { | ||||
187 | return nullptr; // Unknown type. | ||||
188 | } | ||||
189 | |||||
190 | LaterIndices.push_back(IdxVal); | ||||
191 | } | ||||
192 | |||||
193 | enum { Overdefined = -3, Undefined = -2 }; | ||||
194 | |||||
195 | // Variables for our state machines. | ||||
196 | |||||
197 | // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form | ||||
198 | // "i == 47 | i == 87", where 47 is the first index the condition is true for, | ||||
199 | // and 87 is the second (and last) index. FirstTrueElement is -2 when | ||||
200 | // undefined, otherwise set to the first true element. SecondTrueElement is | ||||
201 | // -2 when undefined, -3 when overdefined and >= 0 when that index is true. | ||||
202 | int FirstTrueElement = Undefined, SecondTrueElement = Undefined; | ||||
203 | |||||
204 | // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the | ||||
205 | // form "i != 47 & i != 87". Same state transitions as for true elements. | ||||
206 | int FirstFalseElement = Undefined, SecondFalseElement = Undefined; | ||||
207 | |||||
208 | /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these | ||||
209 | /// define a state machine that triggers for ranges of values that the index | ||||
210 | /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. | ||||
211 | /// This is -2 when undefined, -3 when overdefined, and otherwise the last | ||||
212 | /// index in the range (inclusive). We use -2 for undefined here because we | ||||
213 | /// use relative comparisons and don't want 0-1 to match -1. | ||||
214 | int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; | ||||
215 | |||||
216 | // MagicBitvector - This is a magic bitvector where we set a bit if the | ||||
217 | // comparison is true for element 'i'. If there are 64 elements or less in | ||||
218 | // the array, this will fully represent all the comparison results. | ||||
219 | uint64_t MagicBitvector = 0; | ||||
220 | |||||
221 | // Scan the array and see if one of our patterns matches. | ||||
222 | Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); | ||||
223 | for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { | ||||
224 | Constant *Elt = Init->getAggregateElement(i); | ||||
225 | if (!Elt) return nullptr; | ||||
226 | |||||
227 | // If this is indexing an array of structures, get the structure element. | ||||
228 | if (!LaterIndices.empty()) | ||||
229 | Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); | ||||
230 | |||||
231 | // If the element is masked, handle it. | ||||
232 | if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); | ||||
233 | |||||
234 | // Find out if the comparison would be true or false for the i'th element. | ||||
235 | Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, | ||||
236 | CompareRHS, DL, &TLI); | ||||
237 | // If the result is undef for this element, ignore it. | ||||
238 | if (isa<UndefValue>(C)) { | ||||
239 | // Extend range state machines to cover this element in case there is an | ||||
240 | // undef in the middle of the range. | ||||
241 | if (TrueRangeEnd == (int)i-1) | ||||
242 | TrueRangeEnd = i; | ||||
243 | if (FalseRangeEnd == (int)i-1) | ||||
244 | FalseRangeEnd = i; | ||||
245 | continue; | ||||
246 | } | ||||
247 | |||||
248 | // If we can't compute the result for any of the elements, we have to give | ||||
249 | // up evaluating the entire conditional. | ||||
250 | if (!isa<ConstantInt>(C)) return nullptr; | ||||
251 | |||||
252 | // Otherwise, we know if the comparison is true or false for this element, | ||||
253 | // update our state machines. | ||||
254 | bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); | ||||
255 | |||||
256 | // State machine for single/double/range index comparison. | ||||
257 | if (IsTrueForElt) { | ||||
258 | // Update the TrueElement state machine. | ||||
259 | if (FirstTrueElement == Undefined) | ||||
260 | FirstTrueElement = TrueRangeEnd = i; // First true element. | ||||
261 | else { | ||||
262 | // Update double-compare state machine. | ||||
263 | if (SecondTrueElement == Undefined) | ||||
264 | SecondTrueElement = i; | ||||
265 | else | ||||
266 | SecondTrueElement = Overdefined; | ||||
267 | |||||
268 | // Update range state machine. | ||||
269 | if (TrueRangeEnd == (int)i-1) | ||||
270 | TrueRangeEnd = i; | ||||
271 | else | ||||
272 | TrueRangeEnd = Overdefined; | ||||
273 | } | ||||
274 | } else { | ||||
275 | // Update the FalseElement state machine. | ||||
276 | if (FirstFalseElement == Undefined) | ||||
277 | FirstFalseElement = FalseRangeEnd = i; // First false element. | ||||
278 | else { | ||||
279 | // Update double-compare state machine. | ||||
280 | if (SecondFalseElement == Undefined) | ||||
281 | SecondFalseElement = i; | ||||
282 | else | ||||
283 | SecondFalseElement = Overdefined; | ||||
284 | |||||
285 | // Update range state machine. | ||||
286 | if (FalseRangeEnd == (int)i-1) | ||||
287 | FalseRangeEnd = i; | ||||
288 | else | ||||
289 | FalseRangeEnd = Overdefined; | ||||
290 | } | ||||
291 | } | ||||
292 | |||||
293 | // If this element is in range, update our magic bitvector. | ||||
294 | if (i < 64 && IsTrueForElt) | ||||
295 | MagicBitvector |= 1ULL << i; | ||||
296 | |||||
297 | // If all of our states become overdefined, bail out early. Since the | ||||
298 | // predicate is expensive, only check it every 8 elements. This is only | ||||
299 | // really useful for really huge arrays. | ||||
300 | if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && | ||||
301 | SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && | ||||
302 | FalseRangeEnd == Overdefined) | ||||
303 | return nullptr; | ||||
304 | } | ||||
305 | |||||
306 | // Now that we've scanned the entire array, emit our new comparison(s). We | ||||
307 | // order the state machines in complexity of the generated code. | ||||
308 | Value *Idx = GEP->getOperand(2); | ||||
309 | |||||
310 | // If the index is larger than the pointer size of the target, truncate the | ||||
311 | // index down like the GEP would do implicitly. We don't have to do this for | ||||
312 | // an inbounds GEP because the index can't be out of range. | ||||
313 | if (!GEP->isInBounds()) { | ||||
314 | Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); | ||||
315 | unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); | ||||
316 | if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) | ||||
317 | Idx = Builder.CreateTrunc(Idx, IntPtrTy); | ||||
318 | } | ||||
319 | |||||
320 | // If the comparison is only true for one or two elements, emit direct | ||||
321 | // comparisons. | ||||
322 | if (SecondTrueElement != Overdefined) { | ||||
323 | // None true -> false. | ||||
324 | if (FirstTrueElement == Undefined) | ||||
325 | return replaceInstUsesWith(ICI, Builder.getFalse()); | ||||
326 | |||||
327 | Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); | ||||
328 | |||||
329 | // True for one element -> 'i == 47'. | ||||
330 | if (SecondTrueElement == Undefined) | ||||
331 | return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); | ||||
332 | |||||
333 | // True for two elements -> 'i == 47 | i == 72'. | ||||
334 | Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx); | ||||
335 | Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); | ||||
336 | Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx); | ||||
337 | return BinaryOperator::CreateOr(C1, C2); | ||||
338 | } | ||||
339 | |||||
340 | // If the comparison is only false for one or two elements, emit direct | ||||
341 | // comparisons. | ||||
342 | if (SecondFalseElement != Overdefined) { | ||||
343 | // None false -> true. | ||||
344 | if (FirstFalseElement == Undefined) | ||||
345 | return replaceInstUsesWith(ICI, Builder.getTrue()); | ||||
346 | |||||
347 | Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); | ||||
348 | |||||
349 | // False for one element -> 'i != 47'. | ||||
350 | if (SecondFalseElement == Undefined) | ||||
351 | return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); | ||||
352 | |||||
353 | // False for two elements -> 'i != 47 & i != 72'. | ||||
354 | Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); | ||||
355 | Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); | ||||
356 | Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); | ||||
357 | return BinaryOperator::CreateAnd(C1, C2); | ||||
358 | } | ||||
359 | |||||
360 | // If the comparison can be replaced with a range comparison for the elements | ||||
361 | // where it is true, emit the range check. | ||||
362 | if (TrueRangeEnd != Overdefined) { | ||||
363 | assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare")((TrueRangeEnd != FirstTrueElement && "Should emit single compare" ) ? static_cast<void> (0) : __assert_fail ("TrueRangeEnd != FirstTrueElement && \"Should emit single compare\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 363, __PRETTY_FUNCTION__)); | ||||
364 | |||||
365 | // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). | ||||
366 | if (FirstTrueElement) { | ||||
367 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); | ||||
368 | Idx = Builder.CreateAdd(Idx, Offs); | ||||
369 | } | ||||
370 | |||||
371 | Value *End = ConstantInt::get(Idx->getType(), | ||||
372 | TrueRangeEnd-FirstTrueElement+1); | ||||
373 | return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); | ||||
374 | } | ||||
375 | |||||
376 | // False range check. | ||||
377 | if (FalseRangeEnd != Overdefined) { | ||||
378 | assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare")((FalseRangeEnd != FirstFalseElement && "Should emit single compare" ) ? static_cast<void> (0) : __assert_fail ("FalseRangeEnd != FirstFalseElement && \"Should emit single compare\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 378, __PRETTY_FUNCTION__)); | ||||
379 | // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). | ||||
380 | if (FirstFalseElement) { | ||||
381 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); | ||||
382 | Idx = Builder.CreateAdd(Idx, Offs); | ||||
383 | } | ||||
384 | |||||
385 | Value *End = ConstantInt::get(Idx->getType(), | ||||
386 | FalseRangeEnd-FirstFalseElement); | ||||
387 | return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); | ||||
388 | } | ||||
389 | |||||
390 | // If a magic bitvector captures the entire comparison state | ||||
391 | // of this load, replace it with computation that does: | ||||
392 | // ((magic_cst >> i) & 1) != 0 | ||||
393 | { | ||||
394 | Type *Ty = nullptr; | ||||
395 | |||||
396 | // Look for an appropriate type: | ||||
397 | // - The type of Idx if the magic fits | ||||
398 | // - The smallest fitting legal type | ||||
399 | if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) | ||||
400 | Ty = Idx->getType(); | ||||
401 | else | ||||
402 | Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); | ||||
403 | |||||
404 | if (Ty) { | ||||
405 | Value *V = Builder.CreateIntCast(Idx, Ty, false); | ||||
406 | V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); | ||||
407 | V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); | ||||
408 | return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); | ||||
409 | } | ||||
410 | } | ||||
411 | |||||
412 | return nullptr; | ||||
413 | } | ||||
414 | |||||
415 | /// Return a value that can be used to compare the *offset* implied by a GEP to | ||||
416 | /// zero. For example, if we have &A[i], we want to return 'i' for | ||||
417 | /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales | ||||
418 | /// are involved. The above expression would also be legal to codegen as | ||||
419 | /// "icmp ne (i*4), 0" (assuming A is a pointer to i32). | ||||
420 | /// This latter form is less amenable to optimization though, and we are allowed | ||||
421 | /// to generate the first by knowing that pointer arithmetic doesn't overflow. | ||||
422 | /// | ||||
423 | /// If we can't emit an optimized form for this expression, this returns null. | ||||
424 | /// | ||||
425 | static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, | ||||
426 | const DataLayout &DL) { | ||||
427 | gep_type_iterator GTI = gep_type_begin(GEP); | ||||
428 | |||||
429 | // Check to see if this gep only has a single variable index. If so, and if | ||||
430 | // any constant indices are a multiple of its scale, then we can compute this | ||||
431 | // in terms of the scale of the variable index. For example, if the GEP | ||||
432 | // implies an offset of "12 + i*4", then we can codegen this as "3 + i", | ||||
433 | // because the expression will cross zero at the same point. | ||||
434 | unsigned i, e = GEP->getNumOperands(); | ||||
435 | int64_t Offset = 0; | ||||
436 | for (i = 1; i != e; ++i, ++GTI) { | ||||
437 | if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { | ||||
438 | // Compute the aggregate offset of constant indices. | ||||
439 | if (CI->isZero()) continue; | ||||
440 | |||||
441 | // Handle a struct index, which adds its field offset to the pointer. | ||||
442 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||
443 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); | ||||
444 | } else { | ||||
445 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||
446 | Offset += Size*CI->getSExtValue(); | ||||
447 | } | ||||
448 | } else { | ||||
449 | // Found our variable index. | ||||
450 | break; | ||||
451 | } | ||||
452 | } | ||||
453 | |||||
454 | // If there are no variable indices, we must have a constant offset, just | ||||
455 | // evaluate it the general way. | ||||
456 | if (i == e) return nullptr; | ||||
457 | |||||
458 | Value *VariableIdx = GEP->getOperand(i); | ||||
459 | // Determine the scale factor of the variable element. For example, this is | ||||
460 | // 4 if the variable index is into an array of i32. | ||||
461 | uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||
462 | |||||
463 | // Verify that there are no other variable indices. If so, emit the hard way. | ||||
464 | for (++i, ++GTI; i != e; ++i, ++GTI) { | ||||
465 | ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); | ||||
466 | if (!CI) return nullptr; | ||||
467 | |||||
468 | // Compute the aggregate offset of constant indices. | ||||
469 | if (CI->isZero()) continue; | ||||
470 | |||||
471 | // Handle a struct index, which adds its field offset to the pointer. | ||||
472 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||
473 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); | ||||
474 | } else { | ||||
475 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||
476 | Offset += Size*CI->getSExtValue(); | ||||
477 | } | ||||
478 | } | ||||
479 | |||||
480 | // Okay, we know we have a single variable index, which must be a | ||||
481 | // pointer/array/vector index. If there is no offset, life is simple, return | ||||
482 | // the index. | ||||
483 | Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType()); | ||||
484 | unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); | ||||
485 | if (Offset == 0) { | ||||
486 | // Cast to intptrty in case a truncation occurs. If an extension is needed, | ||||
487 | // we don't need to bother extending: the extension won't affect where the | ||||
488 | // computation crosses zero. | ||||
489 | if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { | ||||
490 | VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy); | ||||
491 | } | ||||
492 | return VariableIdx; | ||||
493 | } | ||||
494 | |||||
495 | // Otherwise, there is an index. The computation we will do will be modulo | ||||
496 | // the pointer size. | ||||
497 | Offset = SignExtend64(Offset, IntPtrWidth); | ||||
498 | VariableScale = SignExtend64(VariableScale, IntPtrWidth); | ||||
499 | |||||
500 | // To do this transformation, any constant index must be a multiple of the | ||||
501 | // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", | ||||
502 | // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a | ||||
503 | // multiple of the variable scale. | ||||
504 | int64_t NewOffs = Offset / (int64_t)VariableScale; | ||||
505 | if (Offset != NewOffs*(int64_t)VariableScale) | ||||
506 | return nullptr; | ||||
507 | |||||
508 | // Okay, we can do this evaluation. Start by converting the index to intptr. | ||||
509 | if (VariableIdx->getType() != IntPtrTy) | ||||
510 | VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy, | ||||
511 | true /*Signed*/); | ||||
512 | Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); | ||||
513 | return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset"); | ||||
514 | } | ||||
515 | |||||
516 | /// Returns true if we can rewrite Start as a GEP with pointer Base | ||||
517 | /// and some integer offset. The nodes that need to be re-written | ||||
518 | /// for this transformation will be added to Explored. | ||||
519 | static bool canRewriteGEPAsOffset(Value *Start, Value *Base, | ||||
520 | const DataLayout &DL, | ||||
521 | SetVector<Value *> &Explored) { | ||||
522 | SmallVector<Value *, 16> WorkList(1, Start); | ||||
523 | Explored.insert(Base); | ||||
524 | |||||
525 | // The following traversal gives us an order which can be used | ||||
526 | // when doing the final transformation. Since in the final | ||||
527 | // transformation we create the PHI replacement instructions first, | ||||
528 | // we don't have to get them in any particular order. | ||||
529 | // | ||||
530 | // However, for other instructions we will have to traverse the | ||||
531 | // operands of an instruction first, which means that we have to | ||||
532 | // do a post-order traversal. | ||||
533 | while (!WorkList.empty()) { | ||||
534 | SetVector<PHINode *> PHIs; | ||||
535 | |||||
536 | while (!WorkList.empty()) { | ||||
537 | if (Explored.size() >= 100) | ||||
538 | return false; | ||||
539 | |||||
540 | Value *V = WorkList.back(); | ||||
541 | |||||
542 | if (Explored.count(V) != 0) { | ||||
543 | WorkList.pop_back(); | ||||
544 | continue; | ||||
545 | } | ||||
546 | |||||
547 | if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) && | ||||
548 | !isa<GetElementPtrInst>(V) && !isa<PHINode>(V)) | ||||
549 | // We've found some value that we can't explore which is different from | ||||
550 | // the base. Therefore we can't do this transformation. | ||||
551 | return false; | ||||
552 | |||||
553 | if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) { | ||||
554 | auto *CI = dyn_cast<CastInst>(V); | ||||
555 | if (!CI->isNoopCast(DL)) | ||||
556 | return false; | ||||
557 | |||||
558 | if (Explored.count(CI->getOperand(0)) == 0) | ||||
559 | WorkList.push_back(CI->getOperand(0)); | ||||
560 | } | ||||
561 | |||||
562 | if (auto *GEP = dyn_cast<GEPOperator>(V)) { | ||||
563 | // We're limiting the GEP to having one index. This will preserve | ||||
564 | // the original pointer type. We could handle more cases in the | ||||
565 | // future. | ||||
566 | if (GEP->getNumIndices() != 1 || !GEP->isInBounds() || | ||||
567 | GEP->getType() != Start->getType()) | ||||
568 | return false; | ||||
569 | |||||
570 | if (Explored.count(GEP->getOperand(0)) == 0) | ||||
571 | WorkList.push_back(GEP->getOperand(0)); | ||||
572 | } | ||||
573 | |||||
574 | if (WorkList.back() == V) { | ||||
575 | WorkList.pop_back(); | ||||
576 | // We've finished visiting this node, mark it as such. | ||||
577 | Explored.insert(V); | ||||
578 | } | ||||
579 | |||||
580 | if (auto *PN = dyn_cast<PHINode>(V)) { | ||||
581 | // We cannot transform PHIs on unsplittable basic blocks. | ||||
582 | if (isa<CatchSwitchInst>(PN->getParent()->getTerminator())) | ||||
583 | return false; | ||||
584 | Explored.insert(PN); | ||||
585 | PHIs.insert(PN); | ||||
586 | } | ||||
587 | } | ||||
588 | |||||
589 | // Explore the PHI nodes further. | ||||
590 | for (auto *PN : PHIs) | ||||
591 | for (Value *Op : PN->incoming_values()) | ||||
592 | if (Explored.count(Op) == 0) | ||||
593 | WorkList.push_back(Op); | ||||
594 | } | ||||
595 | |||||
596 | // Make sure that we can do this. Since we can't insert GEPs in a basic | ||||
597 | // block before a PHI node, we can't easily do this transformation if | ||||
598 | // we have PHI node users of transformed instructions. | ||||
599 | for (Value *Val : Explored) { | ||||
600 | for (Value *Use : Val->uses()) { | ||||
601 | |||||
602 | auto *PHI = dyn_cast<PHINode>(Use); | ||||
603 | auto *Inst = dyn_cast<Instruction>(Val); | ||||
604 | |||||
605 | if (Inst == Base || Inst == PHI || !Inst || !PHI || | ||||
606 | Explored.count(PHI) == 0) | ||||
607 | continue; | ||||
608 | |||||
609 | if (PHI->getParent() == Inst->getParent()) | ||||
610 | return false; | ||||
611 | } | ||||
612 | } | ||||
613 | return true; | ||||
614 | } | ||||
615 | |||||
616 | // Sets the appropriate insert point on Builder where we can add | ||||
617 | // a replacement Instruction for V (if that is possible). | ||||
618 | static void setInsertionPoint(IRBuilder<> &Builder, Value *V, | ||||
619 | bool Before = true) { | ||||
620 | if (auto *PHI = dyn_cast<PHINode>(V)) { | ||||
621 | Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt()); | ||||
622 | return; | ||||
623 | } | ||||
624 | if (auto *I = dyn_cast<Instruction>(V)) { | ||||
625 | if (!Before) | ||||
626 | I = &*std::next(I->getIterator()); | ||||
627 | Builder.SetInsertPoint(I); | ||||
628 | return; | ||||
629 | } | ||||
630 | if (auto *A = dyn_cast<Argument>(V)) { | ||||
631 | // Set the insertion point in the entry block. | ||||
632 | BasicBlock &Entry = A->getParent()->getEntryBlock(); | ||||
633 | Builder.SetInsertPoint(&*Entry.getFirstInsertionPt()); | ||||
634 | return; | ||||
635 | } | ||||
636 | // Otherwise, this is a constant and we don't need to set a new | ||||
637 | // insertion point. | ||||
638 | assert(isa<Constant>(V) && "Setting insertion point for unknown value!")((isa<Constant>(V) && "Setting insertion point for unknown value!" ) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(V) && \"Setting insertion point for unknown value!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 638, __PRETTY_FUNCTION__)); | ||||
639 | } | ||||
640 | |||||
641 | /// Returns a re-written value of Start as an indexed GEP using Base as a | ||||
642 | /// pointer. | ||||
643 | static Value *rewriteGEPAsOffset(Value *Start, Value *Base, | ||||
644 | const DataLayout &DL, | ||||
645 | SetVector<Value *> &Explored) { | ||||
646 | // Perform all the substitutions. This is a bit tricky because we can | ||||
647 | // have cycles in our use-def chains. | ||||
648 | // 1. Create the PHI nodes without any incoming values. | ||||
649 | // 2. Create all the other values. | ||||
650 | // 3. Add the edges for the PHI nodes. | ||||
651 | // 4. Emit GEPs to get the original pointers. | ||||
652 | // 5. Remove the original instructions. | ||||
653 | Type *IndexType = IntegerType::get( | ||||
654 | Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType())); | ||||
655 | |||||
656 | DenseMap<Value *, Value *> NewInsts; | ||||
657 | NewInsts[Base] = ConstantInt::getNullValue(IndexType); | ||||
658 | |||||
659 | // Create the new PHI nodes, without adding any incoming values. | ||||
660 | for (Value *Val : Explored) { | ||||
661 | if (Val == Base) | ||||
662 | continue; | ||||
663 | // Create empty phi nodes. This avoids cyclic dependencies when creating | ||||
664 | // the remaining instructions. | ||||
665 | if (auto *PHI = dyn_cast<PHINode>(Val)) | ||||
666 | NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(), | ||||
667 | PHI->getName() + ".idx", PHI); | ||||
668 | } | ||||
669 | IRBuilder<> Builder(Base->getContext()); | ||||
670 | |||||
671 | // Create all the other instructions. | ||||
672 | for (Value *Val : Explored) { | ||||
673 | |||||
674 | if (NewInsts.find(Val) != NewInsts.end()) | ||||
675 | continue; | ||||
676 | |||||
677 | if (auto *CI = dyn_cast<CastInst>(Val)) { | ||||
678 | // Don't get rid of the intermediate variable here; the store can grow | ||||
679 | // the map which will invalidate the reference to the input value. | ||||
680 | Value *V = NewInsts[CI->getOperand(0)]; | ||||
681 | NewInsts[CI] = V; | ||||
682 | continue; | ||||
683 | } | ||||
684 | if (auto *GEP = dyn_cast<GEPOperator>(Val)) { | ||||
685 | Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)] | ||||
686 | : GEP->getOperand(1); | ||||
687 | setInsertionPoint(Builder, GEP); | ||||
688 | // Indices might need to be sign extended. GEPs will magically do | ||||
689 | // this, but we need to do it ourselves here. | ||||
690 | if (Index->getType()->getScalarSizeInBits() != | ||||
691 | NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) { | ||||
692 | Index = Builder.CreateSExtOrTrunc( | ||||
693 | Index, NewInsts[GEP->getOperand(0)]->getType(), | ||||
694 | GEP->getOperand(0)->getName() + ".sext"); | ||||
695 | } | ||||
696 | |||||
697 | auto *Op = NewInsts[GEP->getOperand(0)]; | ||||
698 | if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) | ||||
699 | NewInsts[GEP] = Index; | ||||
700 | else | ||||
701 | NewInsts[GEP] = Builder.CreateNSWAdd( | ||||
702 | Op, Index, GEP->getOperand(0)->getName() + ".add"); | ||||
703 | continue; | ||||
704 | } | ||||
705 | if (isa<PHINode>(Val)) | ||||
706 | continue; | ||||
707 | |||||
708 | llvm_unreachable("Unexpected instruction type")::llvm::llvm_unreachable_internal("Unexpected instruction type" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 708); | ||||
709 | } | ||||
710 | |||||
711 | // Add the incoming values to the PHI nodes. | ||||
712 | for (Value *Val : Explored) { | ||||
713 | if (Val == Base) | ||||
714 | continue; | ||||
715 | // All the instructions have been created, we can now add edges to the | ||||
716 | // phi nodes. | ||||
717 | if (auto *PHI = dyn_cast<PHINode>(Val)) { | ||||
718 | PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]); | ||||
719 | for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { | ||||
720 | Value *NewIncoming = PHI->getIncomingValue(I); | ||||
721 | |||||
722 | if (NewInsts.find(NewIncoming) != NewInsts.end()) | ||||
723 | NewIncoming = NewInsts[NewIncoming]; | ||||
724 | |||||
725 | NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); | ||||
726 | } | ||||
727 | } | ||||
728 | } | ||||
729 | |||||
730 | for (Value *Val : Explored) { | ||||
731 | if (Val == Base) | ||||
732 | continue; | ||||
733 | |||||
734 | // Depending on the type, for external users we have to emit | ||||
735 | // a GEP or a GEP + ptrtoint. | ||||
736 | setInsertionPoint(Builder, Val, false); | ||||
737 | |||||
738 | // If required, create an inttoptr instruction for Base. | ||||
739 | Value *NewBase = Base; | ||||
740 | if (!Base->getType()->isPointerTy()) | ||||
741 | NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(), | ||||
742 | Start->getName() + "to.ptr"); | ||||
743 | |||||
744 | Value *GEP = Builder.CreateInBoundsGEP( | ||||
745 | Start->getType()->getPointerElementType(), NewBase, | ||||
746 | makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr"); | ||||
747 | |||||
748 | if (!Val->getType()->isPointerTy()) { | ||||
749 | Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(), | ||||
750 | Val->getName() + ".conv"); | ||||
751 | GEP = Cast; | ||||
752 | } | ||||
753 | Val->replaceAllUsesWith(GEP); | ||||
754 | } | ||||
755 | |||||
756 | return NewInsts[Start]; | ||||
757 | } | ||||
758 | |||||
759 | /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express | ||||
760 | /// the input Value as a constant indexed GEP. Returns a pair containing | ||||
761 | /// the GEPs Pointer and Index. | ||||
762 | static std::pair<Value *, Value *> | ||||
763 | getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { | ||||
764 | Type *IndexType = IntegerType::get(V->getContext(), | ||||
765 | DL.getIndexTypeSizeInBits(V->getType())); | ||||
766 | |||||
767 | Constant *Index = ConstantInt::getNullValue(IndexType); | ||||
768 | while (true) { | ||||
769 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { | ||||
770 | // We accept only inbouds GEPs here to exclude the possibility of | ||||
771 | // overflow. | ||||
772 | if (!GEP->isInBounds()) | ||||
773 | break; | ||||
774 | if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 && | ||||
775 | GEP->getType() == V->getType()) { | ||||
776 | V = GEP->getOperand(0); | ||||
777 | Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1)); | ||||
778 | Index = ConstantExpr::getAdd( | ||||
779 | Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType)); | ||||
780 | continue; | ||||
781 | } | ||||
782 | break; | ||||
783 | } | ||||
784 | if (auto *CI = dyn_cast<IntToPtrInst>(V)) { | ||||
785 | if (!CI->isNoopCast(DL)) | ||||
786 | break; | ||||
787 | V = CI->getOperand(0); | ||||
788 | continue; | ||||
789 | } | ||||
790 | if (auto *CI = dyn_cast<PtrToIntInst>(V)) { | ||||
791 | if (!CI->isNoopCast(DL)) | ||||
792 | break; | ||||
793 | V = CI->getOperand(0); | ||||
794 | continue; | ||||
795 | } | ||||
796 | break; | ||||
797 | } | ||||
798 | return {V, Index}; | ||||
799 | } | ||||
800 | |||||
801 | /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. | ||||
802 | /// We can look through PHIs, GEPs and casts in order to determine a common base | ||||
803 | /// between GEPLHS and RHS. | ||||
804 | static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, | ||||
805 | ICmpInst::Predicate Cond, | ||||
806 | const DataLayout &DL) { | ||||
807 | // FIXME: Support vector of pointers. | ||||
808 | if (GEPLHS->getType()->isVectorTy()) | ||||
809 | return nullptr; | ||||
810 | |||||
811 | if (!GEPLHS->hasAllConstantIndices()) | ||||
812 | return nullptr; | ||||
813 | |||||
814 | // Make sure the pointers have the same type. | ||||
815 | if (GEPLHS->getType() != RHS->getType()) | ||||
816 | return nullptr; | ||||
817 | |||||
818 | Value *PtrBase, *Index; | ||||
819 | std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); | ||||
820 | |||||
821 | // The set of nodes that will take part in this transformation. | ||||
822 | SetVector<Value *> Nodes; | ||||
823 | |||||
824 | if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) | ||||
825 | return nullptr; | ||||
826 | |||||
827 | // We know we can re-write this as | ||||
828 | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) | ||||
829 | // Since we've only looked through inbouds GEPs we know that we | ||||
830 | // can't have overflow on either side. We can therefore re-write | ||||
831 | // this as: | ||||
832 | // OFFSET1 cmp OFFSET2 | ||||
833 | Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes); | ||||
834 | |||||
835 | // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written | ||||
836 | // GEP having PtrBase as the pointer base, and has returned in NewRHS the | ||||
837 | // offset. Since Index is the offset of LHS to the base pointer, we will now | ||||
838 | // compare the offsets instead of comparing the pointers. | ||||
839 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS); | ||||
840 | } | ||||
841 | |||||
842 | /// Fold comparisons between a GEP instruction and something else. At this point | ||||
843 | /// we know that the GEP is on the LHS of the comparison. | ||||
844 | Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, | ||||
845 | ICmpInst::Predicate Cond, | ||||
846 | Instruction &I) { | ||||
847 | // Don't transform signed compares of GEPs into index compares. Even if the | ||||
848 | // GEP is inbounds, the final add of the base pointer can have signed overflow | ||||
849 | // and would change the result of the icmp. | ||||
850 | // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be | ||||
851 | // the maximum signed value for the pointer type. | ||||
852 | if (ICmpInst::isSigned(Cond)) | ||||
853 | return nullptr; | ||||
854 | |||||
855 | // Look through bitcasts and addrspacecasts. We do not however want to remove | ||||
856 | // 0 GEPs. | ||||
857 | if (!isa<GetElementPtrInst>(RHS)) | ||||
858 | RHS = RHS->stripPointerCasts(); | ||||
859 | |||||
860 | Value *PtrBase = GEPLHS->getOperand(0); | ||||
861 | // FIXME: Support vector pointer GEPs. | ||||
862 | if (PtrBase == RHS && GEPLHS->isInBounds() && | ||||
863 | !GEPLHS->getType()->isVectorTy()) { | ||||
864 | // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). | ||||
865 | // This transformation (ignoring the base and scales) is valid because we | ||||
866 | // know pointers can't overflow since the gep is inbounds. See if we can | ||||
867 | // output an optimized form. | ||||
868 | Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL); | ||||
869 | |||||
870 | // If not, synthesize the offset the hard way. | ||||
871 | if (!Offset) | ||||
872 | Offset = EmitGEPOffset(GEPLHS); | ||||
873 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, | ||||
874 | Constant::getNullValue(Offset->getType())); | ||||
875 | } | ||||
876 | |||||
877 | if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) && | ||||
878 | isa<Constant>(RHS) && cast<Constant>(RHS)->isNullValue() && | ||||
879 | !NullPointerIsDefined(I.getFunction(), | ||||
880 | RHS->getType()->getPointerAddressSpace())) { | ||||
881 | // For most address spaces, an allocation can't be placed at null, but null | ||||
882 | // itself is treated as a 0 size allocation in the in bounds rules. Thus, | ||||
883 | // the only valid inbounds address derived from null, is null itself. | ||||
884 | // Thus, we have four cases to consider: | ||||
885 | // 1) Base == nullptr, Offset == 0 -> inbounds, null | ||||
886 | // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds | ||||
887 | // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations) | ||||
888 | // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison) | ||||
889 | // | ||||
890 | // (Note if we're indexing a type of size 0, that simply collapses into one | ||||
891 | // of the buckets above.) | ||||
892 | // | ||||
893 | // In general, we're allowed to make values less poison (i.e. remove | ||||
894 | // sources of full UB), so in this case, we just select between the two | ||||
895 | // non-poison cases (1 and 4 above). | ||||
896 | // | ||||
897 | // For vectors, we apply the same reasoning on a per-lane basis. | ||||
898 | auto *Base = GEPLHS->getPointerOperand(); | ||||
899 | if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) { | ||||
900 | int NumElts = GEPLHS->getType()->getVectorNumElements(); | ||||
901 | Base = Builder.CreateVectorSplat(NumElts, Base); | ||||
902 | } | ||||
903 | return new ICmpInst(Cond, Base, | ||||
904 | ConstantExpr::getPointerBitCastOrAddrSpaceCast( | ||||
905 | cast<Constant>(RHS), Base->getType())); | ||||
906 | } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { | ||||
907 | // If the base pointers are different, but the indices are the same, just | ||||
908 | // compare the base pointer. | ||||
909 | if (PtrBase != GEPRHS->getOperand(0)) { | ||||
910 | bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); | ||||
911 | IndicesTheSame &= GEPLHS->getOperand(0)->getType() == | ||||
912 | GEPRHS->getOperand(0)->getType(); | ||||
913 | if (IndicesTheSame) | ||||
914 | for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) | ||||
915 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | ||||
916 | IndicesTheSame = false; | ||||
917 | break; | ||||
918 | } | ||||
919 | |||||
920 | // If all indices are the same, just compare the base pointers. | ||||
921 | Type *BaseType = GEPLHS->getOperand(0)->getType(); | ||||
922 | if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType()) | ||||
923 | return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); | ||||
924 | |||||
925 | // If we're comparing GEPs with two base pointers that only differ in type | ||||
926 | // and both GEPs have only constant indices or just one use, then fold | ||||
927 | // the compare with the adjusted indices. | ||||
928 | // FIXME: Support vector of pointers. | ||||
929 | if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && | ||||
930 | (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && | ||||
931 | (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && | ||||
932 | PtrBase->stripPointerCasts() == | ||||
933 | GEPRHS->getOperand(0)->stripPointerCasts() && | ||||
934 | !GEPLHS->getType()->isVectorTy()) { | ||||
935 | Value *LOffset = EmitGEPOffset(GEPLHS); | ||||
936 | Value *ROffset = EmitGEPOffset(GEPRHS); | ||||
937 | |||||
938 | // If we looked through an addrspacecast between different sized address | ||||
939 | // spaces, the LHS and RHS pointers are different sized | ||||
940 | // integers. Truncate to the smaller one. | ||||
941 | Type *LHSIndexTy = LOffset->getType(); | ||||
942 | Type *RHSIndexTy = ROffset->getType(); | ||||
943 | if (LHSIndexTy != RHSIndexTy) { | ||||
944 | if (LHSIndexTy->getPrimitiveSizeInBits() < | ||||
945 | RHSIndexTy->getPrimitiveSizeInBits()) { | ||||
946 | ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); | ||||
947 | } else | ||||
948 | LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); | ||||
949 | } | ||||
950 | |||||
951 | Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond), | ||||
952 | LOffset, ROffset); | ||||
953 | return replaceInstUsesWith(I, Cmp); | ||||
954 | } | ||||
955 | |||||
956 | // Otherwise, the base pointers are different and the indices are | ||||
957 | // different. Try convert this to an indexed compare by looking through | ||||
958 | // PHIs/casts. | ||||
959 | return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); | ||||
960 | } | ||||
961 | |||||
962 | // If one of the GEPs has all zero indices, recurse. | ||||
963 | // FIXME: Handle vector of pointers. | ||||
964 | if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices()) | ||||
965 | return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0), | ||||
966 | ICmpInst::getSwappedPredicate(Cond), I); | ||||
967 | |||||
968 | // If the other GEP has all zero indices, recurse. | ||||
969 | // FIXME: Handle vector of pointers. | ||||
970 | if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices()) | ||||
971 | return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); | ||||
972 | |||||
973 | bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); | ||||
974 | if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { | ||||
975 | // If the GEPs only differ by one index, compare it. | ||||
976 | unsigned NumDifferences = 0; // Keep track of # differences. | ||||
977 | unsigned DiffOperand = 0; // The operand that differs. | ||||
978 | for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) | ||||
979 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | ||||
980 | Type *LHSType = GEPLHS->getOperand(i)->getType(); | ||||
981 | Type *RHSType = GEPRHS->getOperand(i)->getType(); | ||||
982 | // FIXME: Better support for vector of pointers. | ||||
983 | if (LHSType->getPrimitiveSizeInBits() != | ||||
984 | RHSType->getPrimitiveSizeInBits() || | ||||
985 | (GEPLHS->getType()->isVectorTy() && | ||||
986 | (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) { | ||||
987 | // Irreconcilable differences. | ||||
988 | NumDifferences = 2; | ||||
989 | break; | ||||
990 | } | ||||
991 | |||||
992 | if (NumDifferences++) break; | ||||
993 | DiffOperand = i; | ||||
994 | } | ||||
995 | |||||
996 | if (NumDifferences == 0) // SAME GEP? | ||||
997 | return replaceInstUsesWith(I, // No comparison is needed here. | ||||
998 | ConstantInt::get(I.getType(), ICmpInst::isTrueWhenEqual(Cond))); | ||||
999 | |||||
1000 | else if (NumDifferences == 1 && GEPsInBounds) { | ||||
1001 | Value *LHSV = GEPLHS->getOperand(DiffOperand); | ||||
1002 | Value *RHSV = GEPRHS->getOperand(DiffOperand); | ||||
1003 | // Make sure we do a signed comparison here. | ||||
1004 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); | ||||
1005 | } | ||||
1006 | } | ||||
1007 | |||||
1008 | // Only lower this if the icmp is the only user of the GEP or if we expect | ||||
1009 | // the result to fold to a constant! | ||||
1010 | if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && | ||||
1011 | (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { | ||||
1012 | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) | ||||
1013 | Value *L = EmitGEPOffset(GEPLHS); | ||||
1014 | Value *R = EmitGEPOffset(GEPRHS); | ||||
1015 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); | ||||
1016 | } | ||||
1017 | } | ||||
1018 | |||||
1019 | // Try convert this to an indexed compare by looking through PHIs/casts as a | ||||
1020 | // last resort. | ||||
1021 | return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); | ||||
1022 | } | ||||
1023 | |||||
1024 | Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI, | ||||
1025 | const AllocaInst *Alloca, | ||||
1026 | const Value *Other) { | ||||
1027 | assert(ICI.isEquality() && "Cannot fold non-equality comparison.")((ICI.isEquality() && "Cannot fold non-equality comparison." ) ? static_cast<void> (0) : __assert_fail ("ICI.isEquality() && \"Cannot fold non-equality comparison.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1027, __PRETTY_FUNCTION__)); | ||||
1028 | |||||
1029 | // It would be tempting to fold away comparisons between allocas and any | ||||
1030 | // pointer not based on that alloca (e.g. an argument). However, even | ||||
1031 | // though such pointers cannot alias, they can still compare equal. | ||||
1032 | // | ||||
1033 | // But LLVM doesn't specify where allocas get their memory, so if the alloca | ||||
1034 | // doesn't escape we can argue that it's impossible to guess its value, and we | ||||
1035 | // can therefore act as if any such guesses are wrong. | ||||
1036 | // | ||||
1037 | // The code below checks that the alloca doesn't escape, and that it's only | ||||
1038 | // used in a comparison once (the current instruction). The | ||||
1039 | // single-comparison-use condition ensures that we're trivially folding all | ||||
1040 | // comparisons against the alloca consistently, and avoids the risk of | ||||
1041 | // erroneously folding a comparison of the pointer with itself. | ||||
1042 | |||||
1043 | unsigned MaxIter = 32; // Break cycles and bound to constant-time. | ||||
1044 | |||||
1045 | SmallVector<const Use *, 32> Worklist; | ||||
1046 | for (const Use &U : Alloca->uses()) { | ||||
1047 | if (Worklist.size() >= MaxIter) | ||||
1048 | return nullptr; | ||||
1049 | Worklist.push_back(&U); | ||||
1050 | } | ||||
1051 | |||||
1052 | unsigned NumCmps = 0; | ||||
1053 | while (!Worklist.empty()) { | ||||
1054 | assert(Worklist.size() <= MaxIter)((Worklist.size() <= MaxIter) ? static_cast<void> (0 ) : __assert_fail ("Worklist.size() <= MaxIter", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1054, __PRETTY_FUNCTION__)); | ||||
1055 | const Use *U = Worklist.pop_back_val(); | ||||
1056 | const Value *V = U->getUser(); | ||||
1057 | --MaxIter; | ||||
1058 | |||||
1059 | if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) || | ||||
1060 | isa<SelectInst>(V)) { | ||||
1061 | // Track the uses. | ||||
1062 | } else if (isa<LoadInst>(V)) { | ||||
1063 | // Loading from the pointer doesn't escape it. | ||||
1064 | continue; | ||||
1065 | } else if (const auto *SI = dyn_cast<StoreInst>(V)) { | ||||
1066 | // Storing *to* the pointer is fine, but storing the pointer escapes it. | ||||
1067 | if (SI->getValueOperand() == U->get()) | ||||
1068 | return nullptr; | ||||
1069 | continue; | ||||
1070 | } else if (isa<ICmpInst>(V)) { | ||||
1071 | if (NumCmps++) | ||||
1072 | return nullptr; // Found more than one cmp. | ||||
1073 | continue; | ||||
1074 | } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) { | ||||
1075 | switch (Intrin->getIntrinsicID()) { | ||||
1076 | // These intrinsics don't escape or compare the pointer. Memset is safe | ||||
1077 | // because we don't allow ptrtoint. Memcpy and memmove are safe because | ||||
1078 | // we don't allow stores, so src cannot point to V. | ||||
1079 | case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: | ||||
1080 | case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: | ||||
1081 | continue; | ||||
1082 | default: | ||||
1083 | return nullptr; | ||||
1084 | } | ||||
1085 | } else { | ||||
1086 | return nullptr; | ||||
1087 | } | ||||
1088 | for (const Use &U : V->uses()) { | ||||
1089 | if (Worklist.size() >= MaxIter) | ||||
1090 | return nullptr; | ||||
1091 | Worklist.push_back(&U); | ||||
1092 | } | ||||
1093 | } | ||||
1094 | |||||
1095 | Type *CmpTy = CmpInst::makeCmpResultType(Other->getType()); | ||||
1096 | return replaceInstUsesWith( | ||||
1097 | ICI, | ||||
1098 | ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate()))); | ||||
1099 | } | ||||
1100 | |||||
1101 | /// Fold "icmp pred (X+C), X". | ||||
1102 | Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C, | ||||
1103 | ICmpInst::Predicate Pred) { | ||||
1104 | // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, | ||||
1105 | // so the values can never be equal. Similarly for all other "or equals" | ||||
1106 | // operators. | ||||
1107 | assert(!!C && "C should not be zero!")((!!C && "C should not be zero!") ? static_cast<void > (0) : __assert_fail ("!!C && \"C should not be zero!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1107, __PRETTY_FUNCTION__)); | ||||
1108 | |||||
1109 | // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 | ||||
1110 | // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 | ||||
1111 | // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 | ||||
1112 | if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { | ||||
1113 | Constant *R = ConstantInt::get(X->getType(), | ||||
1114 | APInt::getMaxValue(C.getBitWidth()) - C); | ||||
1115 | return new ICmpInst(ICmpInst::ICMP_UGT, X, R); | ||||
1116 | } | ||||
1117 | |||||
1118 | // (X+1) >u X --> X <u (0-1) --> X != 255 | ||||
1119 | // (X+2) >u X --> X <u (0-2) --> X <u 254 | ||||
1120 | // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 | ||||
1121 | if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) | ||||
1122 | return new ICmpInst(ICmpInst::ICMP_ULT, X, | ||||
1123 | ConstantInt::get(X->getType(), -C)); | ||||
1124 | |||||
1125 | APInt SMax = APInt::getSignedMaxValue(C.getBitWidth()); | ||||
1126 | |||||
1127 | // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 | ||||
1128 | // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 | ||||
1129 | // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 | ||||
1130 | // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 | ||||
1131 | // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 | ||||
1132 | // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 | ||||
1133 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | ||||
1134 | return new ICmpInst(ICmpInst::ICMP_SGT, X, | ||||
1135 | ConstantInt::get(X->getType(), SMax - C)); | ||||
1136 | |||||
1137 | // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 | ||||
1138 | // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 | ||||
1139 | // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 | ||||
1140 | // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 | ||||
1141 | // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 | ||||
1142 | // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 | ||||
1143 | |||||
1144 | assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) ? static_cast<void> (0) : __assert_fail ("Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1144, __PRETTY_FUNCTION__)); | ||||
1145 | return new ICmpInst(ICmpInst::ICMP_SLT, X, | ||||
1146 | ConstantInt::get(X->getType(), SMax - (C - 1))); | ||||
1147 | } | ||||
1148 | |||||
1149 | /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" -> | ||||
1150 | /// (icmp eq/ne A, Log2(AP2/AP1)) -> | ||||
1151 | /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)). | ||||
1152 | Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A, | ||||
1153 | const APInt &AP1, | ||||
1154 | const APInt &AP2) { | ||||
1155 | assert(I.isEquality() && "Cannot fold icmp gt/lt")((I.isEquality() && "Cannot fold icmp gt/lt") ? static_cast <void> (0) : __assert_fail ("I.isEquality() && \"Cannot fold icmp gt/lt\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1155, __PRETTY_FUNCTION__)); | ||||
1156 | |||||
1157 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { | ||||
1158 | if (I.getPredicate() == I.ICMP_NE) | ||||
1159 | Pred = CmpInst::getInversePredicate(Pred); | ||||
1160 | return new ICmpInst(Pred, LHS, RHS); | ||||
1161 | }; | ||||
1162 | |||||
1163 | // Don't bother doing any work for cases which InstSimplify handles. | ||||
1164 | if (AP2.isNullValue()) | ||||
1165 | return nullptr; | ||||
1166 | |||||
1167 | bool IsAShr = isa<AShrOperator>(I.getOperand(0)); | ||||
1168 | if (IsAShr) { | ||||
1169 | if (AP2.isAllOnesValue()) | ||||
1170 | return nullptr; | ||||
1171 | if (AP2.isNegative() != AP1.isNegative()) | ||||
1172 | return nullptr; | ||||
1173 | if (AP2.sgt(AP1)) | ||||
1174 | return nullptr; | ||||
1175 | } | ||||
1176 | |||||
1177 | if (!AP1) | ||||
1178 | // 'A' must be large enough to shift out the highest set bit. | ||||
1179 | return getICmp(I.ICMP_UGT, A, | ||||
1180 | ConstantInt::get(A->getType(), AP2.logBase2())); | ||||
1181 | |||||
1182 | if (AP1 == AP2) | ||||
1183 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); | ||||
1184 | |||||
1185 | int Shift; | ||||
1186 | if (IsAShr && AP1.isNegative()) | ||||
1187 | Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes(); | ||||
1188 | else | ||||
1189 | Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros(); | ||||
1190 | |||||
1191 | if (Shift > 0) { | ||||
1192 | if (IsAShr && AP1 == AP2.ashr(Shift)) { | ||||
1193 | // There are multiple solutions if we are comparing against -1 and the LHS | ||||
1194 | // of the ashr is not a power of two. | ||||
1195 | if (AP1.isAllOnesValue() && !AP2.isPowerOf2()) | ||||
1196 | return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift)); | ||||
1197 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); | ||||
1198 | } else if (AP1 == AP2.lshr(Shift)) { | ||||
1199 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); | ||||
1200 | } | ||||
1201 | } | ||||
1202 | |||||
1203 | // Shifting const2 will never be equal to const1. | ||||
1204 | // FIXME: This should always be handled by InstSimplify? | ||||
1205 | auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE); | ||||
1206 | return replaceInstUsesWith(I, TorF); | ||||
1207 | } | ||||
1208 | |||||
1209 | /// Handle "(icmp eq/ne (shl AP2, A), AP1)" -> | ||||
1210 | /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)). | ||||
1211 | Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A, | ||||
1212 | const APInt &AP1, | ||||
1213 | const APInt &AP2) { | ||||
1214 | assert(I.isEquality() && "Cannot fold icmp gt/lt")((I.isEquality() && "Cannot fold icmp gt/lt") ? static_cast <void> (0) : __assert_fail ("I.isEquality() && \"Cannot fold icmp gt/lt\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1214, __PRETTY_FUNCTION__)); | ||||
1215 | |||||
1216 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { | ||||
1217 | if (I.getPredicate() == I.ICMP_NE) | ||||
1218 | Pred = CmpInst::getInversePredicate(Pred); | ||||
1219 | return new ICmpInst(Pred, LHS, RHS); | ||||
1220 | }; | ||||
1221 | |||||
1222 | // Don't bother doing any work for cases which InstSimplify handles. | ||||
1223 | if (AP2.isNullValue()) | ||||
1224 | return nullptr; | ||||
1225 | |||||
1226 | unsigned AP2TrailingZeros = AP2.countTrailingZeros(); | ||||
1227 | |||||
1228 | if (!AP1 && AP2TrailingZeros != 0) | ||||
1229 | return getICmp( | ||||
1230 | I.ICMP_UGE, A, | ||||
1231 | ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); | ||||
1232 | |||||
1233 | if (AP1 == AP2) | ||||
1234 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); | ||||
1235 | |||||
1236 | // Get the distance between the lowest bits that are set. | ||||
1237 | int Shift = AP1.countTrailingZeros() - AP2TrailingZeros; | ||||
1238 | |||||
1239 | if (Shift > 0 && AP2.shl(Shift) == AP1) | ||||
1240 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); | ||||
1241 | |||||
1242 | // Shifting const2 will never be equal to const1. | ||||
1243 | // FIXME: This should always be handled by InstSimplify? | ||||
1244 | auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE); | ||||
1245 | return replaceInstUsesWith(I, TorF); | ||||
1246 | } | ||||
1247 | |||||
1248 | /// The caller has matched a pattern of the form: | ||||
1249 | /// I = icmp ugt (add (add A, B), CI2), CI1 | ||||
1250 | /// If this is of the form: | ||||
1251 | /// sum = a + b | ||||
1252 | /// if (sum+128 >u 255) | ||||
1253 | /// Then replace it with llvm.sadd.with.overflow.i8. | ||||
1254 | /// | ||||
1255 | static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, | ||||
1256 | ConstantInt *CI2, ConstantInt *CI1, | ||||
1257 | InstCombiner &IC) { | ||||
1258 | // The transformation we're trying to do here is to transform this into an | ||||
1259 | // llvm.sadd.with.overflow. To do this, we have to replace the original add | ||||
1260 | // with a narrower add, and discard the add-with-constant that is part of the | ||||
1261 | // range check (if we can't eliminate it, this isn't profitable). | ||||
1262 | |||||
1263 | // In order to eliminate the add-with-constant, the compare can be its only | ||||
1264 | // use. | ||||
1265 | Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); | ||||
1266 | if (!AddWithCst->hasOneUse()) | ||||
1267 | return nullptr; | ||||
1268 | |||||
1269 | // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. | ||||
1270 | if (!CI2->getValue().isPowerOf2()) | ||||
1271 | return nullptr; | ||||
1272 | unsigned NewWidth = CI2->getValue().countTrailingZeros(); | ||||
1273 | if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) | ||||
1274 | return nullptr; | ||||
1275 | |||||
1276 | // The width of the new add formed is 1 more than the bias. | ||||
1277 | ++NewWidth; | ||||
1278 | |||||
1279 | // Check to see that CI1 is an all-ones value with NewWidth bits. | ||||
1280 | if (CI1->getBitWidth() == NewWidth || | ||||
1281 | CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) | ||||
1282 | return nullptr; | ||||
1283 | |||||
1284 | // This is only really a signed overflow check if the inputs have been | ||||
1285 | // sign-extended; check for that condition. For example, if CI2 is 2^31 and | ||||
1286 | // the operands of the add are 64 bits wide, we need at least 33 sign bits. | ||||
1287 | unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; | ||||
1288 | if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || | ||||
1289 | IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) | ||||
1290 | return nullptr; | ||||
1291 | |||||
1292 | // In order to replace the original add with a narrower | ||||
1293 | // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant | ||||
1294 | // and truncates that discard the high bits of the add. Verify that this is | ||||
1295 | // the case. | ||||
1296 | Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); | ||||
1297 | for (User *U : OrigAdd->users()) { | ||||
1298 | if (U == AddWithCst) | ||||
1299 | continue; | ||||
1300 | |||||
1301 | // Only accept truncates for now. We would really like a nice recursive | ||||
1302 | // predicate like SimplifyDemandedBits, but which goes downwards the use-def | ||||
1303 | // chain to see which bits of a value are actually demanded. If the | ||||
1304 | // original add had another add which was then immediately truncated, we | ||||
1305 | // could still do the transformation. | ||||
1306 | TruncInst *TI = dyn_cast<TruncInst>(U); | ||||
1307 | if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) | ||||
1308 | return nullptr; | ||||
1309 | } | ||||
1310 | |||||
1311 | // If the pattern matches, truncate the inputs to the narrower type and | ||||
1312 | // use the sadd_with_overflow intrinsic to efficiently compute both the | ||||
1313 | // result and the overflow bit. | ||||
1314 | Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); | ||||
1315 | Function *F = Intrinsic::getDeclaration( | ||||
1316 | I.getModule(), Intrinsic::sadd_with_overflow, NewType); | ||||
1317 | |||||
1318 | InstCombiner::BuilderTy &Builder = IC.Builder; | ||||
1319 | |||||
1320 | // Put the new code above the original add, in case there are any uses of the | ||||
1321 | // add between the add and the compare. | ||||
1322 | Builder.SetInsertPoint(OrigAdd); | ||||
1323 | |||||
1324 | Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc"); | ||||
1325 | Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc"); | ||||
1326 | CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd"); | ||||
1327 | Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result"); | ||||
1328 | Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType()); | ||||
1329 | |||||
1330 | // The inner add was the result of the narrow add, zero extended to the | ||||
1331 | // wider type. Replace it with the result computed by the intrinsic. | ||||
1332 | IC.replaceInstUsesWith(*OrigAdd, ZExt); | ||||
1333 | |||||
1334 | // The original icmp gets replaced with the overflow value. | ||||
1335 | return ExtractValueInst::Create(Call, 1, "sadd.overflow"); | ||||
1336 | } | ||||
1337 | |||||
1338 | /// If we have: | ||||
1339 | /// icmp eq/ne (urem/srem %x, %y), 0 | ||||
1340 | /// iff %y is a power-of-two, we can replace this with a bit test: | ||||
1341 | /// icmp eq/ne (and %x, (add %y, -1)), 0 | ||||
1342 | Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { | ||||
1343 | // This fold is only valid for equality predicates. | ||||
1344 | if (!I.isEquality()) | ||||
1345 | return nullptr; | ||||
1346 | ICmpInst::Predicate Pred; | ||||
1347 | Value *X, *Y, *Zero; | ||||
1348 | if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))), | ||||
1349 | m_CombineAnd(m_Zero(), m_Value(Zero))))) | ||||
1350 | return nullptr; | ||||
1351 | if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I)) | ||||
1352 | return nullptr; | ||||
1353 | // This may increase instruction count, we don't enforce that Y is a constant. | ||||
1354 | Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType())); | ||||
1355 | Value *Masked = Builder.CreateAnd(X, Mask); | ||||
1356 | return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); | ||||
1357 | } | ||||
1358 | |||||
1359 | /// Fold equality-comparison between zero and any (maybe truncated) right-shift | ||||
1360 | /// by one-less-than-bitwidth into a sign test on the original value. | ||||
1361 | Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) { | ||||
1362 | Instruction *Val; | ||||
1363 | ICmpInst::Predicate Pred; | ||||
1364 | if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) | ||||
1365 | return nullptr; | ||||
1366 | |||||
1367 | Value *X; | ||||
1368 | Type *XTy; | ||||
1369 | |||||
1370 | Constant *C; | ||||
1371 | if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) { | ||||
1372 | XTy = X->getType(); | ||||
1373 | unsigned XBitWidth = XTy->getScalarSizeInBits(); | ||||
1374 | if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, | ||||
1375 | APInt(XBitWidth, XBitWidth - 1)))) | ||||
1376 | return nullptr; | ||||
1377 | } else if (isa<BinaryOperator>(Val) && | ||||
1378 | (X = reassociateShiftAmtsOfTwoSameDirectionShifts( | ||||
1379 | cast<BinaryOperator>(Val), SQ.getWithInstruction(Val), | ||||
1380 | /*AnalyzeForSignBitExtraction=*/true))) { | ||||
1381 | XTy = X->getType(); | ||||
1382 | } else | ||||
1383 | return nullptr; | ||||
1384 | |||||
1385 | return ICmpInst::Create(Instruction::ICmp, | ||||
1386 | Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE | ||||
1387 | : ICmpInst::ICMP_SLT, | ||||
1388 | X, ConstantInt::getNullValue(XTy)); | ||||
1389 | } | ||||
1390 | |||||
1391 | // Handle icmp pred X, 0 | ||||
1392 | Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { | ||||
1393 | CmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1394 | if (!match(Cmp.getOperand(1), m_Zero())) | ||||
1395 | return nullptr; | ||||
1396 | |||||
1397 | // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) | ||||
1398 | if (Pred == ICmpInst::ICMP_SGT) { | ||||
1399 | Value *A, *B; | ||||
1400 | SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B); | ||||
1401 | if (SPR.Flavor == SPF_SMIN) { | ||||
1402 | if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) | ||||
1403 | return new ICmpInst(Pred, B, Cmp.getOperand(1)); | ||||
1404 | if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) | ||||
1405 | return new ICmpInst(Pred, A, Cmp.getOperand(1)); | ||||
1406 | } | ||||
1407 | } | ||||
1408 | |||||
1409 | if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp)) | ||||
1410 | return New; | ||||
1411 | |||||
1412 | // Given: | ||||
1413 | // icmp eq/ne (urem %x, %y), 0 | ||||
1414 | // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': | ||||
1415 | // icmp eq/ne %x, 0 | ||||
1416 | Value *X, *Y; | ||||
1417 | if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) && | ||||
1418 | ICmpInst::isEquality(Pred)) { | ||||
1419 | KnownBits XKnown = computeKnownBits(X, 0, &Cmp); | ||||
1420 | KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); | ||||
1421 | if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) | ||||
1422 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); | ||||
1423 | } | ||||
1424 | |||||
1425 | return nullptr; | ||||
1426 | } | ||||
1427 | |||||
1428 | /// Fold icmp Pred X, C. | ||||
1429 | /// TODO: This code structure does not make sense. The saturating add fold | ||||
1430 | /// should be moved to some other helper and extended as noted below (it is also | ||||
1431 | /// possible that code has been made unnecessary - do we canonicalize IR to | ||||
1432 | /// overflow/saturating intrinsics or not?). | ||||
1433 | Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { | ||||
1434 | // Match the following pattern, which is a common idiom when writing | ||||
1435 | // overflow-safe integer arithmetic functions. The source performs an addition | ||||
1436 | // in wider type and explicitly checks for overflow using comparisons against | ||||
1437 | // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic. | ||||
1438 | // | ||||
1439 | // TODO: This could probably be generalized to handle other overflow-safe | ||||
1440 | // operations if we worked out the formulas to compute the appropriate magic | ||||
1441 | // constants. | ||||
1442 | // | ||||
1443 | // sum = a + b | ||||
1444 | // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 | ||||
1445 | CmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1446 | Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1); | ||||
1447 | Value *A, *B; | ||||
1448 | ConstantInt *CI, *CI2; // I = icmp ugt (add (add A, B), CI2), CI | ||||
1449 | if (Pred == ICmpInst::ICMP_UGT && match(Op1, m_ConstantInt(CI)) && | ||||
1450 | match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) | ||||
1451 | if (Instruction *Res = processUGT_ADDCST_ADD(Cmp, A, B, CI2, CI, *this)) | ||||
1452 | return Res; | ||||
1453 | |||||
1454 | return nullptr; | ||||
1455 | } | ||||
1456 | |||||
1457 | /// Canonicalize icmp instructions based on dominating conditions. | ||||
1458 | Instruction *InstCombiner::foldICmpWithDominatingICmp(ICmpInst &Cmp) { | ||||
1459 | // This is a cheap/incomplete check for dominance - just match a single | ||||
1460 | // predecessor with a conditional branch. | ||||
1461 | BasicBlock *CmpBB = Cmp.getParent(); | ||||
1462 | BasicBlock *DomBB = CmpBB->getSinglePredecessor(); | ||||
1463 | if (!DomBB) | ||||
1464 | return nullptr; | ||||
1465 | |||||
1466 | Value *DomCond; | ||||
1467 | BasicBlock *TrueBB, *FalseBB; | ||||
1468 | if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) | ||||
1469 | return nullptr; | ||||
1470 | |||||
1471 | assert((TrueBB == CmpBB || FalseBB == CmpBB) &&(((TrueBB == CmpBB || FalseBB == CmpBB) && "Predecessor block does not point to successor?" ) ? static_cast<void> (0) : __assert_fail ("(TrueBB == CmpBB || FalseBB == CmpBB) && \"Predecessor block does not point to successor?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1472, __PRETTY_FUNCTION__)) | ||||
1472 | "Predecessor block does not point to successor?")(((TrueBB == CmpBB || FalseBB == CmpBB) && "Predecessor block does not point to successor?" ) ? static_cast<void> (0) : __assert_fail ("(TrueBB == CmpBB || FalseBB == CmpBB) && \"Predecessor block does not point to successor?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 1472, __PRETTY_FUNCTION__)); | ||||
1473 | |||||
1474 | // The branch should get simplified. Don't bother simplifying this condition. | ||||
1475 | if (TrueBB == FalseBB) | ||||
1476 | return nullptr; | ||||
1477 | |||||
1478 | // Try to simplify this compare to T/F based on the dominating condition. | ||||
1479 | Optional<bool> Imp = isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB); | ||||
1480 | if (Imp) | ||||
1481 | return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp)); | ||||
1482 | |||||
1483 | CmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1484 | Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1); | ||||
1485 | ICmpInst::Predicate DomPred; | ||||
1486 | const APInt *C, *DomC; | ||||
1487 | if (match(DomCond, m_ICmp(DomPred, m_Specific(X), m_APInt(DomC))) && | ||||
1488 | match(Y, m_APInt(C))) { | ||||
1489 | // We have 2 compares of a variable with constants. Calculate the constant | ||||
1490 | // ranges of those compares to see if we can transform the 2nd compare: | ||||
1491 | // DomBB: | ||||
1492 | // DomCond = icmp DomPred X, DomC | ||||
1493 | // br DomCond, CmpBB, FalseBB | ||||
1494 | // CmpBB: | ||||
1495 | // Cmp = icmp Pred X, C | ||||
1496 | ConstantRange CR = ConstantRange::makeAllowedICmpRegion(Pred, *C); | ||||
1497 | ConstantRange DominatingCR = | ||||
1498 | (CmpBB == TrueBB) ? ConstantRange::makeExactICmpRegion(DomPred, *DomC) | ||||
1499 | : ConstantRange::makeExactICmpRegion( | ||||
1500 | CmpInst::getInversePredicate(DomPred), *DomC); | ||||
1501 | ConstantRange Intersection = DominatingCR.intersectWith(CR); | ||||
1502 | ConstantRange Difference = DominatingCR.difference(CR); | ||||
1503 | if (Intersection.isEmptySet()) | ||||
1504 | return replaceInstUsesWith(Cmp, Builder.getFalse()); | ||||
1505 | if (Difference.isEmptySet()) | ||||
1506 | return replaceInstUsesWith(Cmp, Builder.getTrue()); | ||||
1507 | |||||
1508 | // Canonicalizing a sign bit comparison that gets used in a branch, | ||||
1509 | // pessimizes codegen by generating branch on zero instruction instead | ||||
1510 | // of a test and branch. So we avoid canonicalizing in such situations | ||||
1511 | // because test and branch instruction has better branch displacement | ||||
1512 | // than compare and branch instruction. | ||||
1513 | bool UnusedBit; | ||||
1514 | bool IsSignBit = isSignBitCheck(Pred, *C, UnusedBit); | ||||
1515 | if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp))) | ||||
1516 | return nullptr; | ||||
1517 | |||||
1518 | if (const APInt *EqC = Intersection.getSingleElement()) | ||||
1519 | return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*EqC)); | ||||
1520 | if (const APInt *NeC = Difference.getSingleElement()) | ||||
1521 | return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*NeC)); | ||||
1522 | } | ||||
1523 | |||||
1524 | return nullptr; | ||||
1525 | } | ||||
1526 | |||||
1527 | /// Fold icmp (trunc X, Y), C. | ||||
1528 | Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, | ||||
1529 | TruncInst *Trunc, | ||||
1530 | const APInt &C) { | ||||
1531 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1532 | Value *X = Trunc->getOperand(0); | ||||
1533 | if (C.isOneValue() && C.getBitWidth() > 1) { | ||||
1534 | // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 | ||||
1535 | Value *V = nullptr; | ||||
1536 | if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) | ||||
1537 | return new ICmpInst(ICmpInst::ICMP_SLT, V, | ||||
1538 | ConstantInt::get(V->getType(), 1)); | ||||
1539 | } | ||||
1540 | |||||
1541 | if (Cmp.isEquality() && Trunc->hasOneUse()) { | ||||
1542 | // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all | ||||
1543 | // of the high bits truncated out of x are known. | ||||
1544 | unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), | ||||
1545 | SrcBits = X->getType()->getScalarSizeInBits(); | ||||
1546 | KnownBits Known = computeKnownBits(X, 0, &Cmp); | ||||
1547 | |||||
1548 | // If all the high bits are known, we can do this xform. | ||||
1549 | if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { | ||||
1550 | // Pull in the high bits from known-ones set. | ||||
1551 | APInt NewRHS = C.zext(SrcBits); | ||||
1552 | NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); | ||||
1553 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS)); | ||||
1554 | } | ||||
1555 | } | ||||
1556 | |||||
1557 | return nullptr; | ||||
1558 | } | ||||
1559 | |||||
1560 | /// Fold icmp (xor X, Y), C. | ||||
1561 | Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, | ||||
1562 | BinaryOperator *Xor, | ||||
1563 | const APInt &C) { | ||||
1564 | Value *X = Xor->getOperand(0); | ||||
1565 | Value *Y = Xor->getOperand(1); | ||||
1566 | const APInt *XorC; | ||||
1567 | if (!match(Y, m_APInt(XorC))) | ||||
1568 | return nullptr; | ||||
1569 | |||||
1570 | // If this is a comparison that tests the signbit (X < 0) or (x > -1), | ||||
1571 | // fold the xor. | ||||
1572 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1573 | bool TrueIfSigned = false; | ||||
1574 | if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) { | ||||
1575 | |||||
1576 | // If the sign bit of the XorCst is not set, there is no change to | ||||
1577 | // the operation, just stop using the Xor. | ||||
1578 | if (!XorC->isNegative()) { | ||||
1579 | Cmp.setOperand(0, X); | ||||
1580 | Worklist.Add(Xor); | ||||
1581 | return &Cmp; | ||||
1582 | } | ||||
1583 | |||||
1584 | // Emit the opposite comparison. | ||||
1585 | if (TrueIfSigned) | ||||
1586 | return new ICmpInst(ICmpInst::ICMP_SGT, X, | ||||
1587 | ConstantInt::getAllOnesValue(X->getType())); | ||||
1588 | else | ||||
1589 | return new ICmpInst(ICmpInst::ICMP_SLT, X, | ||||
1590 | ConstantInt::getNullValue(X->getType())); | ||||
1591 | } | ||||
1592 | |||||
1593 | if (Xor->hasOneUse()) { | ||||
1594 | // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) | ||||
1595 | if (!Cmp.isEquality() && XorC->isSignMask()) { | ||||
1596 | Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() | ||||
1597 | : Cmp.getSignedPredicate(); | ||||
1598 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); | ||||
1599 | } | ||||
1600 | |||||
1601 | // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) | ||||
1602 | if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { | ||||
1603 | Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() | ||||
1604 | : Cmp.getSignedPredicate(); | ||||
1605 | Pred = Cmp.getSwappedPredicate(Pred); | ||||
1606 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); | ||||
1607 | } | ||||
1608 | } | ||||
1609 | |||||
1610 | // Mask constant magic can eliminate an 'xor' with unsigned compares. | ||||
1611 | if (Pred == ICmpInst::ICMP_UGT) { | ||||
1612 | // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2) | ||||
1613 | if (*XorC == ~C && (C + 1).isPowerOf2()) | ||||
1614 | return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); | ||||
1615 | // (xor X, C) >u C --> X >u C (when C+1 is a power of 2) | ||||
1616 | if (*XorC == C && (C + 1).isPowerOf2()) | ||||
1617 | return new ICmpInst(ICmpInst::ICMP_UGT, X, Y); | ||||
1618 | } | ||||
1619 | if (Pred == ICmpInst::ICMP_ULT) { | ||||
1620 | // (xor X, -C) <u C --> X >u ~C (when C is a power of 2) | ||||
1621 | if (*XorC == -C && C.isPowerOf2()) | ||||
1622 | return new ICmpInst(ICmpInst::ICMP_UGT, X, | ||||
1623 | ConstantInt::get(X->getType(), ~C)); | ||||
1624 | // (xor X, C) <u C --> X >u ~C (when -C is a power of 2) | ||||
1625 | if (*XorC == C && (-C).isPowerOf2()) | ||||
1626 | return new ICmpInst(ICmpInst::ICMP_UGT, X, | ||||
1627 | ConstantInt::get(X->getType(), ~C)); | ||||
1628 | } | ||||
1629 | return nullptr; | ||||
1630 | } | ||||
1631 | |||||
1632 | /// Fold icmp (and (sh X, Y), C2), C1. | ||||
1633 | Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, | ||||
1634 | const APInt &C1, const APInt &C2) { | ||||
1635 | BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0)); | ||||
1636 | if (!Shift || !Shift->isShift()) | ||||
1637 | return nullptr; | ||||
1638 | |||||
1639 | // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could | ||||
1640 | // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in | ||||
1641 | // code produced by the clang front-end, for bitfield access. | ||||
1642 | // This seemingly simple opportunity to fold away a shift turns out to be | ||||
1643 | // rather complicated. See PR17827 for details. | ||||
1644 | unsigned ShiftOpcode = Shift->getOpcode(); | ||||
1645 | bool IsShl = ShiftOpcode == Instruction::Shl; | ||||
1646 | const APInt *C3; | ||||
1647 | if (match(Shift->getOperand(1), m_APInt(C3))) { | ||||
1648 | bool CanFold = false; | ||||
1649 | if (ShiftOpcode == Instruction::Shl) { | ||||
1650 | // For a left shift, we can fold if the comparison is not signed. We can | ||||
1651 | // also fold a signed comparison if the mask value and comparison value | ||||
1652 | // are not negative. These constraints may not be obvious, but we can | ||||
1653 | // prove that they are correct using an SMT solver. | ||||
1654 | if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative())) | ||||
1655 | CanFold = true; | ||||
1656 | } else { | ||||
1657 | bool IsAshr = ShiftOpcode == Instruction::AShr; | ||||
1658 | // For a logical right shift, we can fold if the comparison is not signed. | ||||
1659 | // We can also fold a signed comparison if the shifted mask value and the | ||||
1660 | // shifted comparison value are not negative. These constraints may not be | ||||
1661 | // obvious, but we can prove that they are correct using an SMT solver. | ||||
1662 | // For an arithmetic shift right we can do the same, if we ensure | ||||
1663 | // the And doesn't use any bits being shifted in. Normally these would | ||||
1664 | // be turned into lshr by SimplifyDemandedBits, but not if there is an | ||||
1665 | // additional user. | ||||
1666 | if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) { | ||||
1667 | if (!Cmp.isSigned() || | ||||
1668 | (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative())) | ||||
1669 | CanFold = true; | ||||
1670 | } | ||||
1671 | } | ||||
1672 | |||||
1673 | if (CanFold) { | ||||
1674 | APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3); | ||||
1675 | APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3); | ||||
1676 | // Check to see if we are shifting out any of the bits being compared. | ||||
1677 | if (SameAsC1 != C1) { | ||||
1678 | // If we shifted bits out, the fold is not going to work out. As a | ||||
1679 | // special case, check to see if this means that the result is always | ||||
1680 | // true or false now. | ||||
1681 | if (Cmp.getPredicate() == ICmpInst::ICMP_EQ) | ||||
1682 | return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType())); | ||||
1683 | if (Cmp.getPredicate() == ICmpInst::ICMP_NE) | ||||
1684 | return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType())); | ||||
1685 | } else { | ||||
1686 | Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst)); | ||||
1687 | APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3); | ||||
1688 | And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst)); | ||||
1689 | And->setOperand(0, Shift->getOperand(0)); | ||||
1690 | Worklist.Add(Shift); // Shift is dead. | ||||
1691 | return &Cmp; | ||||
1692 | } | ||||
1693 | } | ||||
1694 | } | ||||
1695 | |||||
1696 | // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is | ||||
1697 | // preferable because it allows the C2 << Y expression to be hoisted out of a | ||||
1698 | // loop if Y is invariant and X is not. | ||||
1699 | if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() && | ||||
1700 | !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) { | ||||
1701 | // Compute C2 << Y. | ||||
1702 | Value *NewShift = | ||||
1703 | IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1)) | ||||
1704 | : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1)); | ||||
1705 | |||||
1706 | // Compute X & (C2 << Y). | ||||
1707 | Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift); | ||||
1708 | Cmp.setOperand(0, NewAnd); | ||||
1709 | return &Cmp; | ||||
1710 | } | ||||
1711 | |||||
1712 | return nullptr; | ||||
1713 | } | ||||
1714 | |||||
1715 | /// Fold icmp (and X, C2), C1. | ||||
1716 | Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, | ||||
1717 | BinaryOperator *And, | ||||
1718 | const APInt &C1) { | ||||
1719 | bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE; | ||||
1720 | |||||
1721 | // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1 | ||||
1722 | // TODO: We canonicalize to the longer form for scalars because we have | ||||
1723 | // better analysis/folds for icmp, and codegen may be better with icmp. | ||||
1724 | if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() && | ||||
1725 | match(And->getOperand(1), m_One())) | ||||
1726 | return new TruncInst(And->getOperand(0), Cmp.getType()); | ||||
1727 | |||||
1728 | const APInt *C2; | ||||
1729 | Value *X; | ||||
1730 | if (!match(And, m_And(m_Value(X), m_APInt(C2)))) | ||||
1731 | return nullptr; | ||||
1732 | |||||
1733 | // Don't perform the following transforms if the AND has multiple uses | ||||
1734 | if (!And->hasOneUse()) | ||||
1735 | return nullptr; | ||||
1736 | |||||
1737 | if (Cmp.isEquality() && C1.isNullValue()) { | ||||
1738 | // Restrict this fold to single-use 'and' (PR10267). | ||||
1739 | // Replace (and X, (1 << size(X)-1) != 0) with X s< 0 | ||||
1740 | if (C2->isSignMask()) { | ||||
1741 | Constant *Zero = Constant::getNullValue(X->getType()); | ||||
1742 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; | ||||
1743 | return new ICmpInst(NewPred, X, Zero); | ||||
1744 | } | ||||
1745 | |||||
1746 | // Restrict this fold only for single-use 'and' (PR10267). | ||||
1747 | // ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two. | ||||
1748 | if ((~(*C2) + 1).isPowerOf2()) { | ||||
1749 | Constant *NegBOC = | ||||
1750 | ConstantExpr::getNeg(cast<Constant>(And->getOperand(1))); | ||||
1751 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; | ||||
1752 | return new ICmpInst(NewPred, X, NegBOC); | ||||
1753 | } | ||||
1754 | } | ||||
1755 | |||||
1756 | // If the LHS is an 'and' of a truncate and we can widen the and/compare to | ||||
1757 | // the input width without changing the value produced, eliminate the cast: | ||||
1758 | // | ||||
1759 | // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1' | ||||
1760 | // | ||||
1761 | // We can do this transformation if the constants do not have their sign bits | ||||
1762 | // set or if it is an equality comparison. Extending a relational comparison | ||||
1763 | // when we're checking the sign bit would not work. | ||||
1764 | Value *W; | ||||
1765 | if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) && | ||||
1766 | (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) { | ||||
1767 | // TODO: Is this a good transform for vectors? Wider types may reduce | ||||
1768 | // throughput. Should this transform be limited (even for scalars) by using | ||||
1769 | // shouldChangeType()? | ||||
1770 | if (!Cmp.getType()->isVectorTy()) { | ||||
1771 | Type *WideType = W->getType(); | ||||
1772 | unsigned WideScalarBits = WideType->getScalarSizeInBits(); | ||||
1773 | Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits)); | ||||
1774 | Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); | ||||
1775 | Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); | ||||
1776 | return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); | ||||
1777 | } | ||||
1778 | } | ||||
1779 | |||||
1780 | if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2)) | ||||
1781 | return I; | ||||
1782 | |||||
1783 | // (icmp pred (and (or (lshr A, B), A), 1), 0) --> | ||||
1784 | // (icmp pred (and A, (or (shl 1, B), 1), 0)) | ||||
1785 | // | ||||
1786 | // iff pred isn't signed | ||||
1787 | if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() && | ||||
1788 | match(And->getOperand(1), m_One())) { | ||||
1789 | Constant *One = cast<Constant>(And->getOperand(1)); | ||||
1790 | Value *Or = And->getOperand(0); | ||||
1791 | Value *A, *B, *LShr; | ||||
1792 | if (match(Or, m_Or(m_Value(LShr), m_Value(A))) && | ||||
1793 | match(LShr, m_LShr(m_Specific(A), m_Value(B)))) { | ||||
1794 | unsigned UsesRemoved = 0; | ||||
1795 | if (And->hasOneUse()) | ||||
1796 | ++UsesRemoved; | ||||
1797 | if (Or->hasOneUse()) | ||||
1798 | ++UsesRemoved; | ||||
1799 | if (LShr->hasOneUse()) | ||||
1800 | ++UsesRemoved; | ||||
1801 | |||||
1802 | // Compute A & ((1 << B) | 1) | ||||
1803 | Value *NewOr = nullptr; | ||||
1804 | if (auto *C = dyn_cast<Constant>(B)) { | ||||
1805 | if (UsesRemoved >= 1) | ||||
1806 | NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); | ||||
1807 | } else { | ||||
1808 | if (UsesRemoved >= 3) | ||||
1809 | NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(), | ||||
1810 | /*HasNUW=*/true), | ||||
1811 | One, Or->getName()); | ||||
1812 | } | ||||
1813 | if (NewOr) { | ||||
1814 | Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName()); | ||||
1815 | Cmp.setOperand(0, NewAnd); | ||||
1816 | return &Cmp; | ||||
1817 | } | ||||
1818 | } | ||||
1819 | } | ||||
1820 | |||||
1821 | return nullptr; | ||||
1822 | } | ||||
1823 | |||||
1824 | /// Fold icmp (and X, Y), C. | ||||
1825 | Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, | ||||
1826 | BinaryOperator *And, | ||||
1827 | const APInt &C) { | ||||
1828 | if (Instruction *I = foldICmpAndConstConst(Cmp, And, C)) | ||||
1829 | return I; | ||||
1830 | |||||
1831 | // TODO: These all require that Y is constant too, so refactor with the above. | ||||
1832 | |||||
1833 | // Try to optimize things like "A[i] & 42 == 0" to index computations. | ||||
1834 | Value *X = And->getOperand(0); | ||||
1835 | Value *Y = And->getOperand(1); | ||||
1836 | if (auto *LI = dyn_cast<LoadInst>(X)) | ||||
1837 | if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) | ||||
1838 | if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | ||||
1839 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | ||||
1840 | !LI->isVolatile() && isa<ConstantInt>(Y)) { | ||||
1841 | ConstantInt *C2 = cast<ConstantInt>(Y); | ||||
1842 | if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2)) | ||||
1843 | return Res; | ||||
1844 | } | ||||
1845 | |||||
1846 | if (!Cmp.isEquality()) | ||||
1847 | return nullptr; | ||||
1848 | |||||
1849 | // X & -C == -C -> X > u ~C | ||||
1850 | // X & -C != -C -> X <= u ~C | ||||
1851 | // iff C is a power of 2 | ||||
1852 | if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) { | ||||
1853 | auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT | ||||
1854 | : CmpInst::ICMP_ULE; | ||||
1855 | return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1)))); | ||||
1856 | } | ||||
1857 | |||||
1858 | // (X & C2) == 0 -> (trunc X) >= 0 | ||||
1859 | // (X & C2) != 0 -> (trunc X) < 0 | ||||
1860 | // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. | ||||
1861 | const APInt *C2; | ||||
1862 | if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) { | ||||
1863 | int32_t ExactLogBase2 = C2->exactLogBase2(); | ||||
1864 | if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { | ||||
1865 | Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); | ||||
1866 | if (And->getType()->isVectorTy()) | ||||
1867 | NTy = VectorType::get(NTy, And->getType()->getVectorNumElements()); | ||||
1868 | Value *Trunc = Builder.CreateTrunc(X, NTy); | ||||
1869 | auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE | ||||
1870 | : CmpInst::ICMP_SLT; | ||||
1871 | return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); | ||||
1872 | } | ||||
1873 | } | ||||
1874 | |||||
1875 | return nullptr; | ||||
1876 | } | ||||
1877 | |||||
1878 | /// Fold icmp (or X, Y), C. | ||||
1879 | Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, | ||||
1880 | const APInt &C) { | ||||
1881 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1882 | if (C.isOneValue()) { | ||||
1883 | // icmp slt signum(V) 1 --> icmp slt V, 1 | ||||
1884 | Value *V = nullptr; | ||||
1885 | if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) | ||||
1886 | return new ICmpInst(ICmpInst::ICMP_SLT, V, | ||||
1887 | ConstantInt::get(V->getType(), 1)); | ||||
1888 | } | ||||
1889 | |||||
1890 | Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1); | ||||
1891 | if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) { | ||||
1892 | // X | C == C --> X <=u C | ||||
1893 | // X | C != C --> X >u C | ||||
1894 | // iff C+1 is a power of 2 (C is a bitmask of the low bits) | ||||
1895 | if ((C + 1).isPowerOf2()) { | ||||
1896 | Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; | ||||
1897 | return new ICmpInst(Pred, OrOp0, OrOp1); | ||||
1898 | } | ||||
1899 | // More general: are all bits outside of a mask constant set or not set? | ||||
1900 | // X | C == C --> (X & ~C) == 0 | ||||
1901 | // X | C != C --> (X & ~C) != 0 | ||||
1902 | if (Or->hasOneUse()) { | ||||
1903 | Value *A = Builder.CreateAnd(OrOp0, ~C); | ||||
1904 | return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType())); | ||||
1905 | } | ||||
1906 | } | ||||
1907 | |||||
1908 | if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse()) | ||||
1909 | return nullptr; | ||||
1910 | |||||
1911 | Value *P, *Q; | ||||
1912 | if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { | ||||
1913 | // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 | ||||
1914 | // -> and (icmp eq P, null), (icmp eq Q, null). | ||||
1915 | Value *CmpP = | ||||
1916 | Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); | ||||
1917 | Value *CmpQ = | ||||
1918 | Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); | ||||
1919 | auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; | ||||
1920 | return BinaryOperator::Create(BOpc, CmpP, CmpQ); | ||||
1921 | } | ||||
1922 | |||||
1923 | // Are we using xors to bitwise check for a pair of (in)equalities? Convert to | ||||
1924 | // a shorter form that has more potential to be folded even further. | ||||
1925 | Value *X1, *X2, *X3, *X4; | ||||
1926 | if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) && | ||||
1927 | match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) { | ||||
1928 | // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4) | ||||
1929 | // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4) | ||||
1930 | Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2); | ||||
1931 | Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4); | ||||
1932 | auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; | ||||
1933 | return BinaryOperator::Create(BOpc, Cmp12, Cmp34); | ||||
1934 | } | ||||
1935 | |||||
1936 | return nullptr; | ||||
1937 | } | ||||
1938 | |||||
1939 | /// Fold icmp (mul X, Y), C. | ||||
1940 | Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp, | ||||
1941 | BinaryOperator *Mul, | ||||
1942 | const APInt &C) { | ||||
1943 | const APInt *MulC; | ||||
1944 | if (!match(Mul->getOperand(1), m_APInt(MulC))) | ||||
1945 | return nullptr; | ||||
1946 | |||||
1947 | // If this is a test of the sign bit and the multiply is sign-preserving with | ||||
1948 | // a constant operand, use the multiply LHS operand instead. | ||||
1949 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1950 | if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) { | ||||
1951 | if (MulC->isNegative()) | ||||
1952 | Pred = ICmpInst::getSwappedPredicate(Pred); | ||||
1953 | return new ICmpInst(Pred, Mul->getOperand(0), | ||||
1954 | Constant::getNullValue(Mul->getType())); | ||||
1955 | } | ||||
1956 | |||||
1957 | return nullptr; | ||||
1958 | } | ||||
1959 | |||||
1960 | /// Fold icmp (shl 1, Y), C. | ||||
1961 | static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, | ||||
1962 | const APInt &C) { | ||||
1963 | Value *Y; | ||||
1964 | if (!match(Shl, m_Shl(m_One(), m_Value(Y)))) | ||||
1965 | return nullptr; | ||||
1966 | |||||
1967 | Type *ShiftType = Shl->getType(); | ||||
1968 | unsigned TypeBits = C.getBitWidth(); | ||||
1969 | bool CIsPowerOf2 = C.isPowerOf2(); | ||||
1970 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
1971 | if (Cmp.isUnsigned()) { | ||||
1972 | // (1 << Y) pred C -> Y pred Log2(C) | ||||
1973 | if (!CIsPowerOf2) { | ||||
1974 | // (1 << Y) < 30 -> Y <= 4 | ||||
1975 | // (1 << Y) <= 30 -> Y <= 4 | ||||
1976 | // (1 << Y) >= 30 -> Y > 4 | ||||
1977 | // (1 << Y) > 30 -> Y > 4 | ||||
1978 | if (Pred == ICmpInst::ICMP_ULT) | ||||
1979 | Pred = ICmpInst::ICMP_ULE; | ||||
1980 | else if (Pred == ICmpInst::ICMP_UGE) | ||||
1981 | Pred = ICmpInst::ICMP_UGT; | ||||
1982 | } | ||||
1983 | |||||
1984 | // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31 | ||||
1985 | // (1 << Y) < 2147483648 -> Y < 31 -> Y != 31 | ||||
1986 | unsigned CLog2 = C.logBase2(); | ||||
1987 | if (CLog2 == TypeBits - 1) { | ||||
1988 | if (Pred == ICmpInst::ICMP_UGE) | ||||
1989 | Pred = ICmpInst::ICMP_EQ; | ||||
1990 | else if (Pred == ICmpInst::ICMP_ULT) | ||||
1991 | Pred = ICmpInst::ICMP_NE; | ||||
1992 | } | ||||
1993 | return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2)); | ||||
1994 | } else if (Cmp.isSigned()) { | ||||
1995 | Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1); | ||||
1996 | if (C.isAllOnesValue()) { | ||||
1997 | // (1 << Y) <= -1 -> Y == 31 | ||||
1998 | if (Pred == ICmpInst::ICMP_SLE) | ||||
1999 | return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne); | ||||
2000 | |||||
2001 | // (1 << Y) > -1 -> Y != 31 | ||||
2002 | if (Pred == ICmpInst::ICMP_SGT) | ||||
2003 | return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne); | ||||
2004 | } else if (!C) { | ||||
2005 | // (1 << Y) < 0 -> Y == 31 | ||||
2006 | // (1 << Y) <= 0 -> Y == 31 | ||||
2007 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | ||||
2008 | return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne); | ||||
2009 | |||||
2010 | // (1 << Y) >= 0 -> Y != 31 | ||||
2011 | // (1 << Y) > 0 -> Y != 31 | ||||
2012 | if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) | ||||
2013 | return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne); | ||||
2014 | } | ||||
2015 | } else if (Cmp.isEquality() && CIsPowerOf2) { | ||||
2016 | return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2())); | ||||
2017 | } | ||||
2018 | |||||
2019 | return nullptr; | ||||
2020 | } | ||||
2021 | |||||
2022 | /// Fold icmp (shl X, Y), C. | ||||
2023 | Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, | ||||
2024 | BinaryOperator *Shl, | ||||
2025 | const APInt &C) { | ||||
2026 | const APInt *ShiftVal; | ||||
2027 | if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal))) | ||||
2028 | return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal); | ||||
2029 | |||||
2030 | const APInt *ShiftAmt; | ||||
2031 | if (!match(Shl->getOperand(1), m_APInt(ShiftAmt))) | ||||
2032 | return foldICmpShlOne(Cmp, Shl, C); | ||||
2033 | |||||
2034 | // Check that the shift amount is in range. If not, don't perform undefined | ||||
2035 | // shifts. When the shift is visited, it will be simplified. | ||||
2036 | unsigned TypeBits = C.getBitWidth(); | ||||
2037 | if (ShiftAmt->uge(TypeBits)) | ||||
2038 | return nullptr; | ||||
2039 | |||||
2040 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2041 | Value *X = Shl->getOperand(0); | ||||
2042 | Type *ShType = Shl->getType(); | ||||
2043 | |||||
2044 | // NSW guarantees that we are only shifting out sign bits from the high bits, | ||||
2045 | // so we can ASHR the compare constant without needing a mask and eliminate | ||||
2046 | // the shift. | ||||
2047 | if (Shl->hasNoSignedWrap()) { | ||||
2048 | if (Pred == ICmpInst::ICMP_SGT) { | ||||
2049 | // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt) | ||||
2050 | APInt ShiftedC = C.ashr(*ShiftAmt); | ||||
2051 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2052 | } | ||||
2053 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && | ||||
2054 | C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) { | ||||
2055 | APInt ShiftedC = C.ashr(*ShiftAmt); | ||||
2056 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2057 | } | ||||
2058 | if (Pred == ICmpInst::ICMP_SLT) { | ||||
2059 | // SLE is the same as above, but SLE is canonicalized to SLT, so convert: | ||||
2060 | // (X << S) <=s C is equiv to X <=s (C >> S) for all C | ||||
2061 | // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX | ||||
2062 | // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN | ||||
2063 | assert(!C.isMinSignedValue() && "Unexpected icmp slt")((!C.isMinSignedValue() && "Unexpected icmp slt") ? static_cast <void> (0) : __assert_fail ("!C.isMinSignedValue() && \"Unexpected icmp slt\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2063, __PRETTY_FUNCTION__)); | ||||
2064 | APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1; | ||||
2065 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2066 | } | ||||
2067 | // If this is a signed comparison to 0 and the shift is sign preserving, | ||||
2068 | // use the shift LHS operand instead; isSignTest may change 'Pred', so only | ||||
2069 | // do that if we're sure to not continue on in this function. | ||||
2070 | if (isSignTest(Pred, C)) | ||||
2071 | return new ICmpInst(Pred, X, Constant::getNullValue(ShType)); | ||||
2072 | } | ||||
2073 | |||||
2074 | // NUW guarantees that we are only shifting out zero bits from the high bits, | ||||
2075 | // so we can LSHR the compare constant without needing a mask and eliminate | ||||
2076 | // the shift. | ||||
2077 | if (Shl->hasNoUnsignedWrap()) { | ||||
2078 | if (Pred == ICmpInst::ICMP_UGT) { | ||||
2079 | // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt) | ||||
2080 | APInt ShiftedC = C.lshr(*ShiftAmt); | ||||
2081 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2082 | } | ||||
2083 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && | ||||
2084 | C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) { | ||||
2085 | APInt ShiftedC = C.lshr(*ShiftAmt); | ||||
2086 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2087 | } | ||||
2088 | if (Pred == ICmpInst::ICMP_ULT) { | ||||
2089 | // ULE is the same as above, but ULE is canonicalized to ULT, so convert: | ||||
2090 | // (X << S) <=u C is equiv to X <=u (C >> S) for all C | ||||
2091 | // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u | ||||
2092 | // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0 | ||||
2093 | assert(C.ugt(0) && "ult 0 should have been eliminated")((C.ugt(0) && "ult 0 should have been eliminated") ? static_cast <void> (0) : __assert_fail ("C.ugt(0) && \"ult 0 should have been eliminated\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2093, __PRETTY_FUNCTION__)); | ||||
2094 | APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1; | ||||
2095 | return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); | ||||
2096 | } | ||||
2097 | } | ||||
2098 | |||||
2099 | if (Cmp.isEquality() && Shl->hasOneUse()) { | ||||
2100 | // Strength-reduce the shift into an 'and'. | ||||
2101 | Constant *Mask = ConstantInt::get( | ||||
2102 | ShType, | ||||
2103 | APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); | ||||
2104 | Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); | ||||
2105 | Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt)); | ||||
2106 | return new ICmpInst(Pred, And, LShrC); | ||||
2107 | } | ||||
2108 | |||||
2109 | // Otherwise, if this is a comparison of the sign bit, simplify to and/test. | ||||
2110 | bool TrueIfSigned = false; | ||||
2111 | if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) { | ||||
2112 | // (X << 31) <s 0 --> (X & 1) != 0 | ||||
2113 | Constant *Mask = ConstantInt::get( | ||||
2114 | ShType, | ||||
2115 | APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1)); | ||||
2116 | Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); | ||||
2117 | return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, | ||||
2118 | And, Constant::getNullValue(ShType)); | ||||
2119 | } | ||||
2120 | |||||
2121 | // Simplify 'shl' inequality test into 'and' equality test. | ||||
2122 | if (Cmp.isUnsigned() && Shl->hasOneUse()) { | ||||
2123 | // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0 | ||||
2124 | if ((C + 1).isPowerOf2() && | ||||
2125 | (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) { | ||||
2126 | Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue())); | ||||
2127 | return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ | ||||
2128 | : ICmpInst::ICMP_NE, | ||||
2129 | And, Constant::getNullValue(ShType)); | ||||
2130 | } | ||||
2131 | // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0 | ||||
2132 | if (C.isPowerOf2() && | ||||
2133 | (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) { | ||||
2134 | Value *And = | ||||
2135 | Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue())); | ||||
2136 | return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ | ||||
2137 | : ICmpInst::ICMP_NE, | ||||
2138 | And, Constant::getNullValue(ShType)); | ||||
2139 | } | ||||
2140 | } | ||||
2141 | |||||
2142 | // Transform (icmp pred iM (shl iM %v, N), C) | ||||
2143 | // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N)) | ||||
2144 | // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N. | ||||
2145 | // This enables us to get rid of the shift in favor of a trunc that may be | ||||
2146 | // free on the target. It has the additional benefit of comparing to a | ||||
2147 | // smaller constant that may be more target-friendly. | ||||
2148 | unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1); | ||||
2149 | if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt && | ||||
2150 | DL.isLegalInteger(TypeBits - Amt)) { | ||||
2151 | Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt); | ||||
2152 | if (ShType->isVectorTy()) | ||||
2153 | TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements()); | ||||
2154 | Constant *NewC = | ||||
2155 | ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt)); | ||||
2156 | return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); | ||||
2157 | } | ||||
2158 | |||||
2159 | return nullptr; | ||||
2160 | } | ||||
2161 | |||||
2162 | /// Fold icmp ({al}shr X, Y), C. | ||||
2163 | Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, | ||||
2164 | BinaryOperator *Shr, | ||||
2165 | const APInt &C) { | ||||
2166 | // An exact shr only shifts out zero bits, so: | ||||
2167 | // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 | ||||
2168 | Value *X = Shr->getOperand(0); | ||||
2169 | CmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2170 | if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && | ||||
2171 | C.isNullValue()) | ||||
2172 | return new ICmpInst(Pred, X, Cmp.getOperand(1)); | ||||
2173 | |||||
2174 | const APInt *ShiftVal; | ||||
2175 | if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal))) | ||||
2176 | return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal); | ||||
2177 | |||||
2178 | const APInt *ShiftAmt; | ||||
2179 | if (!match(Shr->getOperand(1), m_APInt(ShiftAmt))) | ||||
2180 | return nullptr; | ||||
2181 | |||||
2182 | // Check that the shift amount is in range. If not, don't perform undefined | ||||
2183 | // shifts. When the shift is visited it will be simplified. | ||||
2184 | unsigned TypeBits = C.getBitWidth(); | ||||
2185 | unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits); | ||||
2186 | if (ShAmtVal >= TypeBits || ShAmtVal == 0) | ||||
2187 | return nullptr; | ||||
2188 | |||||
2189 | bool IsAShr = Shr->getOpcode() == Instruction::AShr; | ||||
2190 | bool IsExact = Shr->isExact(); | ||||
2191 | Type *ShrTy = Shr->getType(); | ||||
2192 | // TODO: If we could guarantee that InstSimplify would handle all of the | ||||
2193 | // constant-value-based preconditions in the folds below, then we could assert | ||||
2194 | // those conditions rather than checking them. This is difficult because of | ||||
2195 | // undef/poison (PR34838). | ||||
2196 | if (IsAShr) { | ||||
2197 | if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) { | ||||
2198 | // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC) | ||||
2199 | // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC) | ||||
2200 | APInt ShiftedC = C.shl(ShAmtVal); | ||||
2201 | if (ShiftedC.ashr(ShAmtVal) == C) | ||||
2202 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); | ||||
2203 | } | ||||
2204 | if (Pred == CmpInst::ICMP_SGT) { | ||||
2205 | // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1 | ||||
2206 | APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; | ||||
2207 | if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() && | ||||
2208 | (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) | ||||
2209 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); | ||||
2210 | } | ||||
2211 | } else { | ||||
2212 | if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { | ||||
2213 | // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) | ||||
2214 | // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC) | ||||
2215 | APInt ShiftedC = C.shl(ShAmtVal); | ||||
2216 | if (ShiftedC.lshr(ShAmtVal) == C) | ||||
2217 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); | ||||
2218 | } | ||||
2219 | if (Pred == CmpInst::ICMP_UGT) { | ||||
2220 | // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 | ||||
2221 | APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; | ||||
2222 | if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1)) | ||||
2223 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); | ||||
2224 | } | ||||
2225 | } | ||||
2226 | |||||
2227 | if (!Cmp.isEquality()) | ||||
2228 | return nullptr; | ||||
2229 | |||||
2230 | // Handle equality comparisons of shift-by-constant. | ||||
2231 | |||||
2232 | // If the comparison constant changes with the shift, the comparison cannot | ||||
2233 | // succeed (bits of the comparison constant cannot match the shifted value). | ||||
2234 | // This should be known by InstSimplify and already be folded to true/false. | ||||
2235 | assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||((((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && "Expected icmp+shr simplify did not occur.") ? static_cast< void> (0) : __assert_fail ("((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && \"Expected icmp+shr simplify did not occur.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2237, __PRETTY_FUNCTION__)) | ||||
2236 | (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&((((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && "Expected icmp+shr simplify did not occur.") ? static_cast< void> (0) : __assert_fail ("((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && \"Expected icmp+shr simplify did not occur.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2237, __PRETTY_FUNCTION__)) | ||||
2237 | "Expected icmp+shr simplify did not occur.")((((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && "Expected icmp+shr simplify did not occur.") ? static_cast< void> (0) : __assert_fail ("((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && \"Expected icmp+shr simplify did not occur.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2237, __PRETTY_FUNCTION__)); | ||||
2238 | |||||
2239 | // If the bits shifted out are known zero, compare the unshifted value: | ||||
2240 | // (X & 4) >> 1 == 2 --> (X & 4) == 4. | ||||
2241 | if (Shr->isExact()) | ||||
2242 | return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal)); | ||||
2243 | |||||
2244 | if (Shr->hasOneUse()) { | ||||
2245 | // Canonicalize the shift into an 'and': | ||||
2246 | // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt) | ||||
2247 | APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); | ||||
2248 | Constant *Mask = ConstantInt::get(ShrTy, Val); | ||||
2249 | Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); | ||||
2250 | return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal)); | ||||
2251 | } | ||||
2252 | |||||
2253 | return nullptr; | ||||
2254 | } | ||||
2255 | |||||
2256 | Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp, | ||||
2257 | BinaryOperator *SRem, | ||||
2258 | const APInt &C) { | ||||
2259 | // Match an 'is positive' or 'is negative' comparison of remainder by a | ||||
2260 | // constant power-of-2 value: | ||||
2261 | // (X % pow2C) sgt/slt 0 | ||||
2262 | const ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2263 | if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT) | ||||
2264 | return nullptr; | ||||
2265 | |||||
2266 | // TODO: The one-use check is standard because we do not typically want to | ||||
2267 | // create longer instruction sequences, but this might be a special-case | ||||
2268 | // because srem is not good for analysis or codegen. | ||||
2269 | if (!SRem->hasOneUse()) | ||||
2270 | return nullptr; | ||||
2271 | |||||
2272 | const APInt *DivisorC; | ||||
2273 | if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC))) | ||||
2274 | return nullptr; | ||||
2275 | |||||
2276 | // Mask off the sign bit and the modulo bits (low-bits). | ||||
2277 | Type *Ty = SRem->getType(); | ||||
2278 | APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits()); | ||||
2279 | Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); | ||||
2280 | Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); | ||||
2281 | |||||
2282 | // For 'is positive?' check that the sign-bit is clear and at least 1 masked | ||||
2283 | // bit is set. Example: | ||||
2284 | // (i8 X % 32) s> 0 --> (X & 159) s> 0 | ||||
2285 | if (Pred == ICmpInst::ICMP_SGT) | ||||
2286 | return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty)); | ||||
2287 | |||||
2288 | // For 'is negative?' check that the sign-bit is set and at least 1 masked | ||||
2289 | // bit is set. Example: | ||||
2290 | // (i16 X % 4) s< 0 --> (X & 32771) u> 32768 | ||||
2291 | return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask)); | ||||
2292 | } | ||||
2293 | |||||
2294 | /// Fold icmp (udiv X, Y), C. | ||||
2295 | Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, | ||||
2296 | BinaryOperator *UDiv, | ||||
2297 | const APInt &C) { | ||||
2298 | const APInt *C2; | ||||
2299 | if (!match(UDiv->getOperand(0), m_APInt(C2))) | ||||
2300 | return nullptr; | ||||
2301 | |||||
2302 | assert(*C2 != 0 && "udiv 0, X should have been simplified already.")((*C2 != 0 && "udiv 0, X should have been simplified already." ) ? static_cast<void> (0) : __assert_fail ("*C2 != 0 && \"udiv 0, X should have been simplified already.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2302, __PRETTY_FUNCTION__)); | ||||
2303 | |||||
2304 | // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) | ||||
2305 | Value *Y = UDiv->getOperand(1); | ||||
2306 | if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) { | ||||
2307 | assert(!C.isMaxValue() &&((!C.isMaxValue() && "icmp ugt X, UINT_MAX should have been simplified already." ) ? static_cast<void> (0) : __assert_fail ("!C.isMaxValue() && \"icmp ugt X, UINT_MAX should have been simplified already.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2308, __PRETTY_FUNCTION__)) | ||||
2308 | "icmp ugt X, UINT_MAX should have been simplified already.")((!C.isMaxValue() && "icmp ugt X, UINT_MAX should have been simplified already." ) ? static_cast<void> (0) : __assert_fail ("!C.isMaxValue() && \"icmp ugt X, UINT_MAX should have been simplified already.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2308, __PRETTY_FUNCTION__)); | ||||
2309 | return new ICmpInst(ICmpInst::ICMP_ULE, Y, | ||||
2310 | ConstantInt::get(Y->getType(), C2->udiv(C + 1))); | ||||
2311 | } | ||||
2312 | |||||
2313 | // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) | ||||
2314 | if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { | ||||
2315 | assert(C != 0 && "icmp ult X, 0 should have been simplified already.")((C != 0 && "icmp ult X, 0 should have been simplified already." ) ? static_cast<void> (0) : __assert_fail ("C != 0 && \"icmp ult X, 0 should have been simplified already.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2315, __PRETTY_FUNCTION__)); | ||||
2316 | return new ICmpInst(ICmpInst::ICMP_UGT, Y, | ||||
2317 | ConstantInt::get(Y->getType(), C2->udiv(C))); | ||||
2318 | } | ||||
2319 | |||||
2320 | return nullptr; | ||||
2321 | } | ||||
2322 | |||||
2323 | /// Fold icmp ({su}div X, Y), C. | ||||
2324 | Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, | ||||
2325 | BinaryOperator *Div, | ||||
2326 | const APInt &C) { | ||||
2327 | // Fold: icmp pred ([us]div X, C2), C -> range test | ||||
2328 | // Fold this div into the comparison, producing a range check. | ||||
2329 | // Determine, based on the divide type, what the range is being | ||||
2330 | // checked. If there is an overflow on the low or high side, remember | ||||
2331 | // it, otherwise compute the range [low, hi) bounding the new value. | ||||
2332 | // See: InsertRangeTest above for the kinds of replacements possible. | ||||
2333 | const APInt *C2; | ||||
2334 | if (!match(Div->getOperand(1), m_APInt(C2))) | ||||
2335 | return nullptr; | ||||
2336 | |||||
2337 | // FIXME: If the operand types don't match the type of the divide | ||||
2338 | // then don't attempt this transform. The code below doesn't have the | ||||
2339 | // logic to deal with a signed divide and an unsigned compare (and | ||||
2340 | // vice versa). This is because (x /s C2) <s C produces different | ||||
2341 | // results than (x /s C2) <u C or (x /u C2) <s C or even | ||||
2342 | // (x /u C2) <u C. Simply casting the operands and result won't | ||||
2343 | // work. :( The if statement below tests that condition and bails | ||||
2344 | // if it finds it. | ||||
2345 | bool DivIsSigned = Div->getOpcode() == Instruction::SDiv; | ||||
2346 | if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned()) | ||||
2347 | return nullptr; | ||||
2348 | |||||
2349 | // The ProdOV computation fails on divide by 0 and divide by -1. Cases with | ||||
2350 | // INT_MIN will also fail if the divisor is 1. Although folds of all these | ||||
2351 | // division-by-constant cases should be present, we can not assert that they | ||||
2352 | // have happened before we reach this icmp instruction. | ||||
2353 | if (C2->isNullValue() || C2->isOneValue() || | ||||
2354 | (DivIsSigned && C2->isAllOnesValue())) | ||||
2355 | return nullptr; | ||||
2356 | |||||
2357 | // Compute Prod = C * C2. We are essentially solving an equation of | ||||
2358 | // form X / C2 = C. We solve for X by multiplying C2 and C. | ||||
2359 | // By solving for X, we can turn this into a range check instead of computing | ||||
2360 | // a divide. | ||||
2361 | APInt Prod = C * *C2; | ||||
2362 | |||||
2363 | // Determine if the product overflows by seeing if the product is not equal to | ||||
2364 | // the divide. Make sure we do the same kind of divide as in the LHS | ||||
2365 | // instruction that we're folding. | ||||
2366 | bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C; | ||||
2367 | |||||
2368 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2369 | |||||
2370 | // If the division is known to be exact, then there is no remainder from the | ||||
2371 | // divide, so the covered range size is unit, otherwise it is the divisor. | ||||
2372 | APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2; | ||||
2373 | |||||
2374 | // Figure out the interval that is being checked. For example, a comparison | ||||
2375 | // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). | ||||
2376 | // Compute this interval based on the constants involved and the signedness of | ||||
2377 | // the compare/divide. This computes a half-open interval, keeping track of | ||||
2378 | // whether either value in the interval overflows. After analysis each | ||||
2379 | // overflow variable is set to 0 if it's corresponding bound variable is valid | ||||
2380 | // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. | ||||
2381 | int LoOverflow = 0, HiOverflow = 0; | ||||
2382 | APInt LoBound, HiBound; | ||||
2383 | |||||
2384 | if (!DivIsSigned) { // udiv | ||||
2385 | // e.g. X/5 op 3 --> [15, 20) | ||||
2386 | LoBound = Prod; | ||||
2387 | HiOverflow = LoOverflow = ProdOV; | ||||
2388 | if (!HiOverflow) { | ||||
2389 | // If this is not an exact divide, then many values in the range collapse | ||||
2390 | // to the same result value. | ||||
2391 | HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); | ||||
2392 | } | ||||
2393 | } else if (C2->isStrictlyPositive()) { // Divisor is > 0. | ||||
2394 | if (C.isNullValue()) { // (X / pos) op 0 | ||||
2395 | // Can't overflow. e.g. X/2 op 0 --> [-1, 2) | ||||
2396 | LoBound = -(RangeSize - 1); | ||||
2397 | HiBound = RangeSize; | ||||
2398 | } else if (C.isStrictlyPositive()) { // (X / pos) op pos | ||||
2399 | LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) | ||||
2400 | HiOverflow = LoOverflow = ProdOV; | ||||
2401 | if (!HiOverflow) | ||||
2402 | HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true); | ||||
2403 | } else { // (X / pos) op neg | ||||
2404 | // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) | ||||
2405 | HiBound = Prod + 1; | ||||
2406 | LoOverflow = HiOverflow = ProdOV ? -1 : 0; | ||||
2407 | if (!LoOverflow) { | ||||
2408 | APInt DivNeg = -RangeSize; | ||||
2409 | LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; | ||||
2410 | } | ||||
2411 | } | ||||
2412 | } else if (C2->isNegative()) { // Divisor is < 0. | ||||
2413 | if (Div->isExact()) | ||||
2414 | RangeSize.negate(); | ||||
2415 | if (C.isNullValue()) { // (X / neg) op 0 | ||||
2416 | // e.g. X/-5 op 0 --> [-4, 5) | ||||
2417 | LoBound = RangeSize + 1; | ||||
2418 | HiBound = -RangeSize; | ||||
2419 | if (HiBound == *C2) { // -INTMIN = INTMIN | ||||
2420 | HiOverflow = 1; // [INTMIN+1, overflow) | ||||
2421 | HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN | ||||
2422 | } | ||||
2423 | } else if (C.isStrictlyPositive()) { // (X / neg) op pos | ||||
2424 | // e.g. X/-5 op 3 --> [-19, -14) | ||||
2425 | HiBound = Prod + 1; | ||||
2426 | HiOverflow = LoOverflow = ProdOV ? -1 : 0; | ||||
2427 | if (!LoOverflow) | ||||
2428 | LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; | ||||
2429 | } else { // (X / neg) op neg | ||||
2430 | LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) | ||||
2431 | LoOverflow = HiOverflow = ProdOV; | ||||
2432 | if (!HiOverflow) | ||||
2433 | HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true); | ||||
2434 | } | ||||
2435 | |||||
2436 | // Dividing by a negative swaps the condition. LT <-> GT | ||||
2437 | Pred = ICmpInst::getSwappedPredicate(Pred); | ||||
2438 | } | ||||
2439 | |||||
2440 | Value *X = Div->getOperand(0); | ||||
2441 | switch (Pred) { | ||||
2442 | default: llvm_unreachable("Unhandled icmp opcode!")::llvm::llvm_unreachable_internal("Unhandled icmp opcode!", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2442); | ||||
2443 | case ICmpInst::ICMP_EQ: | ||||
2444 | if (LoOverflow && HiOverflow) | ||||
2445 | return replaceInstUsesWith(Cmp, Builder.getFalse()); | ||||
2446 | if (HiOverflow) | ||||
2447 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | ||||
2448 | ICmpInst::ICMP_UGE, X, | ||||
2449 | ConstantInt::get(Div->getType(), LoBound)); | ||||
2450 | if (LoOverflow) | ||||
2451 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | ||||
2452 | ICmpInst::ICMP_ULT, X, | ||||
2453 | ConstantInt::get(Div->getType(), HiBound)); | ||||
2454 | return replaceInstUsesWith( | ||||
2455 | Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); | ||||
2456 | case ICmpInst::ICMP_NE: | ||||
2457 | if (LoOverflow && HiOverflow) | ||||
2458 | return replaceInstUsesWith(Cmp, Builder.getTrue()); | ||||
2459 | if (HiOverflow) | ||||
2460 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | ||||
2461 | ICmpInst::ICMP_ULT, X, | ||||
2462 | ConstantInt::get(Div->getType(), LoBound)); | ||||
2463 | if (LoOverflow) | ||||
2464 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | ||||
2465 | ICmpInst::ICMP_UGE, X, | ||||
2466 | ConstantInt::get(Div->getType(), HiBound)); | ||||
2467 | return replaceInstUsesWith(Cmp, | ||||
2468 | insertRangeTest(X, LoBound, HiBound, | ||||
2469 | DivIsSigned, false)); | ||||
2470 | case ICmpInst::ICMP_ULT: | ||||
2471 | case ICmpInst::ICMP_SLT: | ||||
2472 | if (LoOverflow == +1) // Low bound is greater than input range. | ||||
2473 | return replaceInstUsesWith(Cmp, Builder.getTrue()); | ||||
2474 | if (LoOverflow == -1) // Low bound is less than input range. | ||||
2475 | return replaceInstUsesWith(Cmp, Builder.getFalse()); | ||||
2476 | return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound)); | ||||
2477 | case ICmpInst::ICMP_UGT: | ||||
2478 | case ICmpInst::ICMP_SGT: | ||||
2479 | if (HiOverflow == +1) // High bound greater than input range. | ||||
2480 | return replaceInstUsesWith(Cmp, Builder.getFalse()); | ||||
2481 | if (HiOverflow == -1) // High bound less than input range. | ||||
2482 | return replaceInstUsesWith(Cmp, Builder.getTrue()); | ||||
2483 | if (Pred == ICmpInst::ICMP_UGT) | ||||
2484 | return new ICmpInst(ICmpInst::ICMP_UGE, X, | ||||
2485 | ConstantInt::get(Div->getType(), HiBound)); | ||||
2486 | return new ICmpInst(ICmpInst::ICMP_SGE, X, | ||||
2487 | ConstantInt::get(Div->getType(), HiBound)); | ||||
2488 | } | ||||
2489 | |||||
2490 | return nullptr; | ||||
2491 | } | ||||
2492 | |||||
2493 | /// Fold icmp (sub X, Y), C. | ||||
2494 | Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, | ||||
2495 | BinaryOperator *Sub, | ||||
2496 | const APInt &C) { | ||||
2497 | Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1); | ||||
2498 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2499 | const APInt *C2; | ||||
2500 | APInt SubResult; | ||||
2501 | |||||
2502 | // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0 | ||||
2503 | if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality()) | ||||
2504 | return new ICmpInst(Cmp.getPredicate(), Y, | ||||
2505 | ConstantInt::get(Y->getType(), 0)); | ||||
2506 | |||||
2507 | // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C) | ||||
2508 | if (match(X, m_APInt(C2)) && | ||||
2509 | ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) || | ||||
2510 | (Cmp.isSigned() && Sub->hasNoSignedWrap())) && | ||||
2511 | !subWithOverflow(SubResult, *C2, C, Cmp.isSigned())) | ||||
2512 | return new ICmpInst(Cmp.getSwappedPredicate(), Y, | ||||
2513 | ConstantInt::get(Y->getType(), SubResult)); | ||||
2514 | |||||
2515 | // The following transforms are only worth it if the only user of the subtract | ||||
2516 | // is the icmp. | ||||
2517 | if (!Sub->hasOneUse()) | ||||
2518 | return nullptr; | ||||
2519 | |||||
2520 | if (Sub->hasNoSignedWrap()) { | ||||
2521 | // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y) | ||||
2522 | if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue()) | ||||
2523 | return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); | ||||
2524 | |||||
2525 | // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) | ||||
2526 | if (Pred == ICmpInst::ICMP_SGT && C.isNullValue()) | ||||
2527 | return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); | ||||
2528 | |||||
2529 | // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) | ||||
2530 | if (Pred == ICmpInst::ICMP_SLT && C.isNullValue()) | ||||
2531 | return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); | ||||
2532 | |||||
2533 | // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) | ||||
2534 | if (Pred == ICmpInst::ICMP_SLT && C.isOneValue()) | ||||
2535 | return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); | ||||
2536 | } | ||||
2537 | |||||
2538 | if (!match(X, m_APInt(C2))) | ||||
2539 | return nullptr; | ||||
2540 | |||||
2541 | // C2 - Y <u C -> (Y | (C - 1)) == C2 | ||||
2542 | // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 | ||||
2543 | if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && | ||||
2544 | (*C2 & (C - 1)) == (C - 1)) | ||||
2545 | return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X); | ||||
2546 | |||||
2547 | // C2 - Y >u C -> (Y | C) != C2 | ||||
2548 | // iff C2 & C == C and C + 1 is a power of 2 | ||||
2549 | if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C) | ||||
2550 | return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X); | ||||
2551 | |||||
2552 | return nullptr; | ||||
2553 | } | ||||
2554 | |||||
2555 | /// Fold icmp (add X, Y), C. | ||||
2556 | Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, | ||||
2557 | BinaryOperator *Add, | ||||
2558 | const APInt &C) { | ||||
2559 | Value *Y = Add->getOperand(1); | ||||
2560 | const APInt *C2; | ||||
2561 | if (Cmp.isEquality() || !match(Y, m_APInt(C2))) | ||||
2562 | return nullptr; | ||||
2563 | |||||
2564 | // Fold icmp pred (add X, C2), C. | ||||
2565 | Value *X = Add->getOperand(0); | ||||
2566 | Type *Ty = Add->getType(); | ||||
2567 | CmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2568 | |||||
2569 | // If the add does not wrap, we can always adjust the compare by subtracting | ||||
2570 | // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE | ||||
2571 | // are canonicalized to SGT/SLT/UGT/ULT. | ||||
2572 | if ((Add->hasNoSignedWrap() && | ||||
2573 | (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) || | ||||
2574 | (Add->hasNoUnsignedWrap() && | ||||
2575 | (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) { | ||||
2576 | bool Overflow; | ||||
2577 | APInt NewC = | ||||
2578 | Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow); | ||||
2579 | // If there is overflow, the result must be true or false. | ||||
2580 | // TODO: Can we assert there is no overflow because InstSimplify always | ||||
2581 | // handles those cases? | ||||
2582 | if (!Overflow) | ||||
2583 | // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2) | ||||
2584 | return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC)); | ||||
2585 | } | ||||
2586 | |||||
2587 | auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2); | ||||
2588 | const APInt &Upper = CR.getUpper(); | ||||
2589 | const APInt &Lower = CR.getLower(); | ||||
2590 | if (Cmp.isSigned()) { | ||||
2591 | if (Lower.isSignMask()) | ||||
2592 | return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); | ||||
2593 | if (Upper.isSignMask()) | ||||
2594 | return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); | ||||
2595 | } else { | ||||
2596 | if (Lower.isMinValue()) | ||||
2597 | return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper)); | ||||
2598 | if (Upper.isMinValue()) | ||||
2599 | return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower)); | ||||
2600 | } | ||||
2601 | |||||
2602 | if (!Add->hasOneUse()) | ||||
2603 | return nullptr; | ||||
2604 | |||||
2605 | // X+C <u C2 -> (X & -C2) == C | ||||
2606 | // iff C & (C2-1) == 0 | ||||
2607 | // C2 is a power of 2 | ||||
2608 | if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0) | ||||
2609 | return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C), | ||||
2610 | ConstantExpr::getNeg(cast<Constant>(Y))); | ||||
2611 | |||||
2612 | // X+C >u C2 -> (X & ~C2) != C | ||||
2613 | // iff C & C2 == 0 | ||||
2614 | // C2+1 is a power of 2 | ||||
2615 | if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0) | ||||
2616 | return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C), | ||||
2617 | ConstantExpr::getNeg(cast<Constant>(Y))); | ||||
2618 | |||||
2619 | return nullptr; | ||||
2620 | } | ||||
2621 | |||||
2622 | bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, | ||||
2623 | Value *&RHS, ConstantInt *&Less, | ||||
2624 | ConstantInt *&Equal, | ||||
2625 | ConstantInt *&Greater) { | ||||
2626 | // TODO: Generalize this to work with other comparison idioms or ensure | ||||
2627 | // they get canonicalized into this form. | ||||
2628 | |||||
2629 | // select i1 (a == b), | ||||
2630 | // i32 Equal, | ||||
2631 | // i32 (select i1 (a < b), i32 Less, i32 Greater) | ||||
2632 | // where Equal, Less and Greater are placeholders for any three constants. | ||||
2633 | ICmpInst::Predicate PredA; | ||||
2634 | if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) || | ||||
2635 | !ICmpInst::isEquality(PredA)) | ||||
2636 | return false; | ||||
2637 | Value *EqualVal = SI->getTrueValue(); | ||||
2638 | Value *UnequalVal = SI->getFalseValue(); | ||||
2639 | // We still can get non-canonical predicate here, so canonicalize. | ||||
2640 | if (PredA == ICmpInst::ICMP_NE) | ||||
2641 | std::swap(EqualVal, UnequalVal); | ||||
2642 | if (!match(EqualVal, m_ConstantInt(Equal))) | ||||
2643 | return false; | ||||
2644 | ICmpInst::Predicate PredB; | ||||
2645 | Value *LHS2, *RHS2; | ||||
2646 | if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)), | ||||
2647 | m_ConstantInt(Less), m_ConstantInt(Greater)))) | ||||
2648 | return false; | ||||
2649 | // We can get predicate mismatch here, so canonicalize if possible: | ||||
2650 | // First, ensure that 'LHS' match. | ||||
2651 | if (LHS2 != LHS) { | ||||
2652 | // x sgt y <--> y slt x | ||||
2653 | std::swap(LHS2, RHS2); | ||||
2654 | PredB = ICmpInst::getSwappedPredicate(PredB); | ||||
2655 | } | ||||
2656 | if (LHS2 != LHS) | ||||
2657 | return false; | ||||
2658 | // We also need to canonicalize 'RHS'. | ||||
2659 | if (PredB == ICmpInst::ICMP_SGT && isa<Constant>(RHS2)) { | ||||
2660 | // x sgt C-1 <--> x sge C <--> not(x slt C) | ||||
2661 | auto FlippedStrictness = | ||||
2662 | getFlippedStrictnessPredicateAndConstant(PredB, cast<Constant>(RHS2)); | ||||
2663 | if (!FlippedStrictness) | ||||
2664 | return false; | ||||
2665 | assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check")((FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check") ? static_cast<void> (0) : __assert_fail ("FlippedStrictness->first == ICmpInst::ICMP_SGE && \"Sanity check\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2665, __PRETTY_FUNCTION__)); | ||||
2666 | RHS2 = FlippedStrictness->second; | ||||
2667 | // And kind-of perform the result swap. | ||||
2668 | std::swap(Less, Greater); | ||||
2669 | PredB = ICmpInst::ICMP_SLT; | ||||
2670 | } | ||||
2671 | return PredB == ICmpInst::ICMP_SLT && RHS == RHS2; | ||||
2672 | } | ||||
2673 | |||||
2674 | Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, | ||||
2675 | SelectInst *Select, | ||||
2676 | ConstantInt *C) { | ||||
2677 | |||||
2678 | assert(C && "Cmp RHS should be a constant int!")((C && "Cmp RHS should be a constant int!") ? static_cast <void> (0) : __assert_fail ("C && \"Cmp RHS should be a constant int!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2678, __PRETTY_FUNCTION__)); | ||||
2679 | // If we're testing a constant value against the result of a three way | ||||
2680 | // comparison, the result can be expressed directly in terms of the | ||||
2681 | // original values being compared. Note: We could possibly be more | ||||
2682 | // aggressive here and remove the hasOneUse test. The original select is | ||||
2683 | // really likely to simplify or sink when we remove a test of the result. | ||||
2684 | Value *OrigLHS, *OrigRHS; | ||||
2685 | ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan; | ||||
2686 | if (Cmp.hasOneUse() && | ||||
2687 | matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal, | ||||
2688 | C3GreaterThan)) { | ||||
2689 | assert(C1LessThan && C2Equal && C3GreaterThan)((C1LessThan && C2Equal && C3GreaterThan) ? static_cast <void> (0) : __assert_fail ("C1LessThan && C2Equal && C3GreaterThan" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 2689, __PRETTY_FUNCTION__)); | ||||
2690 | |||||
2691 | bool TrueWhenLessThan = | ||||
2692 | ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C) | ||||
2693 | ->isAllOnesValue(); | ||||
2694 | bool TrueWhenEqual = | ||||
2695 | ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C) | ||||
2696 | ->isAllOnesValue(); | ||||
2697 | bool TrueWhenGreaterThan = | ||||
2698 | ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C) | ||||
2699 | ->isAllOnesValue(); | ||||
2700 | |||||
2701 | // This generates the new instruction that will replace the original Cmp | ||||
2702 | // Instruction. Instead of enumerating the various combinations when | ||||
2703 | // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus | ||||
2704 | // false, we rely on chaining of ORs and future passes of InstCombine to | ||||
2705 | // simplify the OR further (i.e. a s< b || a == b becomes a s<= b). | ||||
2706 | |||||
2707 | // When none of the three constants satisfy the predicate for the RHS (C), | ||||
2708 | // the entire original Cmp can be simplified to a false. | ||||
2709 | Value *Cond = Builder.getFalse(); | ||||
2710 | if (TrueWhenLessThan) | ||||
2711 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, | ||||
2712 | OrigLHS, OrigRHS)); | ||||
2713 | if (TrueWhenEqual) | ||||
2714 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, | ||||
2715 | OrigLHS, OrigRHS)); | ||||
2716 | if (TrueWhenGreaterThan) | ||||
2717 | Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, | ||||
2718 | OrigLHS, OrigRHS)); | ||||
2719 | |||||
2720 | return replaceInstUsesWith(Cmp, Cond); | ||||
2721 | } | ||||
2722 | return nullptr; | ||||
2723 | } | ||||
2724 | |||||
2725 | static Instruction *foldICmpBitCast(ICmpInst &Cmp, | ||||
2726 | InstCombiner::BuilderTy &Builder) { | ||||
2727 | auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0)); | ||||
2728 | if (!Bitcast) | ||||
2729 | return nullptr; | ||||
2730 | |||||
2731 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2732 | Value *Op1 = Cmp.getOperand(1); | ||||
2733 | Value *BCSrcOp = Bitcast->getOperand(0); | ||||
2734 | |||||
2735 | // Make sure the bitcast doesn't change the number of vector elements. | ||||
2736 | if (Bitcast->getSrcTy()->getScalarSizeInBits() == | ||||
2737 | Bitcast->getDestTy()->getScalarSizeInBits()) { | ||||
2738 | // Zero-equality and sign-bit checks are preserved through sitofp + bitcast. | ||||
2739 | Value *X; | ||||
2740 | if (match(BCSrcOp, m_SIToFP(m_Value(X)))) { | ||||
2741 | // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0 | ||||
2742 | // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0 | ||||
2743 | // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0 | ||||
2744 | // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0 | ||||
2745 | if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT || | ||||
2746 | Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) && | ||||
2747 | match(Op1, m_Zero())) | ||||
2748 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType())); | ||||
2749 | |||||
2750 | // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1 | ||||
2751 | if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One())) | ||||
2752 | return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1)); | ||||
2753 | |||||
2754 | // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1 | ||||
2755 | if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes())) | ||||
2756 | return new ICmpInst(Pred, X, | ||||
2757 | ConstantInt::getAllOnesValue(X->getType())); | ||||
2758 | } | ||||
2759 | |||||
2760 | // Zero-equality checks are preserved through unsigned floating-point casts: | ||||
2761 | // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0 | ||||
2762 | // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0 | ||||
2763 | if (match(BCSrcOp, m_UIToFP(m_Value(X)))) | ||||
2764 | if (Cmp.isEquality() && match(Op1, m_Zero())) | ||||
2765 | return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType())); | ||||
2766 | } | ||||
2767 | |||||
2768 | // Test to see if the operands of the icmp are casted versions of other | ||||
2769 | // values. If the ptr->ptr cast can be stripped off both arguments, do so. | ||||
2770 | if (Bitcast->getType()->isPointerTy() && | ||||
2771 | (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { | ||||
2772 | // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast | ||||
2773 | // so eliminate it as well. | ||||
2774 | if (auto *BC2 = dyn_cast<BitCastInst>(Op1)) | ||||
2775 | Op1 = BC2->getOperand(0); | ||||
2776 | |||||
2777 | Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType()); | ||||
2778 | return new ICmpInst(Pred, BCSrcOp, Op1); | ||||
2779 | } | ||||
2780 | |||||
2781 | // Folding: icmp <pred> iN X, C | ||||
2782 | // where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN | ||||
2783 | // and C is a splat of a K-bit pattern | ||||
2784 | // and SC is a constant vector = <C', C', C', ..., C'> | ||||
2785 | // Into: | ||||
2786 | // %E = extractelement <M x iK> %vec, i32 C' | ||||
2787 | // icmp <pred> iK %E, trunc(C) | ||||
2788 | const APInt *C; | ||||
2789 | if (!match(Cmp.getOperand(1), m_APInt(C)) || | ||||
2790 | !Bitcast->getType()->isIntegerTy() || | ||||
2791 | !Bitcast->getSrcTy()->isIntOrIntVectorTy()) | ||||
2792 | return nullptr; | ||||
2793 | |||||
2794 | Value *Vec; | ||||
2795 | Constant *Mask; | ||||
2796 | if (match(BCSrcOp, | ||||
2797 | m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) { | ||||
2798 | // Check whether every element of Mask is the same constant | ||||
2799 | if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) { | ||||
2800 | auto *VecTy = cast<VectorType>(BCSrcOp->getType()); | ||||
2801 | auto *EltTy = cast<IntegerType>(VecTy->getElementType()); | ||||
2802 | if (C->isSplat(EltTy->getBitWidth())) { | ||||
2803 | // Fold the icmp based on the value of C | ||||
2804 | // If C is M copies of an iK sized bit pattern, | ||||
2805 | // then: | ||||
2806 | // => %E = extractelement <N x iK> %vec, i32 Elem | ||||
2807 | // icmp <pred> iK %SplatVal, <pattern> | ||||
2808 | Value *Extract = Builder.CreateExtractElement(Vec, Elem); | ||||
2809 | Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth())); | ||||
2810 | return new ICmpInst(Pred, Extract, NewC); | ||||
2811 | } | ||||
2812 | } | ||||
2813 | } | ||||
2814 | return nullptr; | ||||
2815 | } | ||||
2816 | |||||
2817 | /// Try to fold integer comparisons with a constant operand: icmp Pred X, C | ||||
2818 | /// where X is some kind of instruction. | ||||
2819 | Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { | ||||
2820 | const APInt *C; | ||||
2821 | if (!match(Cmp.getOperand(1), m_APInt(C))) | ||||
2822 | return nullptr; | ||||
2823 | |||||
2824 | if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) { | ||||
2825 | switch (BO->getOpcode()) { | ||||
2826 | case Instruction::Xor: | ||||
2827 | if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C)) | ||||
2828 | return I; | ||||
2829 | break; | ||||
2830 | case Instruction::And: | ||||
2831 | if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C)) | ||||
2832 | return I; | ||||
2833 | break; | ||||
2834 | case Instruction::Or: | ||||
2835 | if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C)) | ||||
2836 | return I; | ||||
2837 | break; | ||||
2838 | case Instruction::Mul: | ||||
2839 | if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C)) | ||||
2840 | return I; | ||||
2841 | break; | ||||
2842 | case Instruction::Shl: | ||||
2843 | if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C)) | ||||
2844 | return I; | ||||
2845 | break; | ||||
2846 | case Instruction::LShr: | ||||
2847 | case Instruction::AShr: | ||||
2848 | if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) | ||||
2849 | return I; | ||||
2850 | break; | ||||
2851 | case Instruction::SRem: | ||||
2852 | if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C)) | ||||
2853 | return I; | ||||
2854 | break; | ||||
2855 | case Instruction::UDiv: | ||||
2856 | if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) | ||||
2857 | return I; | ||||
2858 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
2859 | case Instruction::SDiv: | ||||
2860 | if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C)) | ||||
2861 | return I; | ||||
2862 | break; | ||||
2863 | case Instruction::Sub: | ||||
2864 | if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C)) | ||||
2865 | return I; | ||||
2866 | break; | ||||
2867 | case Instruction::Add: | ||||
2868 | if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C)) | ||||
2869 | return I; | ||||
2870 | break; | ||||
2871 | default: | ||||
2872 | break; | ||||
2873 | } | ||||
2874 | // TODO: These folds could be refactored to be part of the above calls. | ||||
2875 | if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C)) | ||||
2876 | return I; | ||||
2877 | } | ||||
2878 | |||||
2879 | // Match against CmpInst LHS being instructions other than binary operators. | ||||
2880 | |||||
2881 | if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) { | ||||
2882 | // For now, we only support constant integers while folding the | ||||
2883 | // ICMP(SELECT)) pattern. We can extend this to support vector of integers | ||||
2884 | // similar to the cases handled by binary ops above. | ||||
2885 | if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1))) | ||||
2886 | if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS)) | ||||
2887 | return I; | ||||
2888 | } | ||||
2889 | |||||
2890 | if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) { | ||||
2891 | if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) | ||||
2892 | return I; | ||||
2893 | } | ||||
2894 | |||||
2895 | if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) | ||||
2896 | if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) | ||||
2897 | return I; | ||||
2898 | |||||
2899 | return nullptr; | ||||
2900 | } | ||||
2901 | |||||
2902 | /// Fold an icmp equality instruction with binary operator LHS and constant RHS: | ||||
2903 | /// icmp eq/ne BO, C. | ||||
2904 | Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, | ||||
2905 | BinaryOperator *BO, | ||||
2906 | const APInt &C) { | ||||
2907 | // TODO: Some of these folds could work with arbitrary constants, but this | ||||
2908 | // function is limited to scalar and vector splat constants. | ||||
2909 | if (!Cmp.isEquality()) | ||||
2910 | return nullptr; | ||||
2911 | |||||
2912 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
2913 | bool isICMP_NE = Pred == ICmpInst::ICMP_NE; | ||||
2914 | Constant *RHS = cast<Constant>(Cmp.getOperand(1)); | ||||
2915 | Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); | ||||
2916 | |||||
2917 | switch (BO->getOpcode()) { | ||||
2918 | case Instruction::SRem: | ||||
2919 | // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. | ||||
2920 | if (C.isNullValue() && BO->hasOneUse()) { | ||||
2921 | const APInt *BOC; | ||||
2922 | if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { | ||||
2923 | Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); | ||||
2924 | return new ICmpInst(Pred, NewRem, | ||||
2925 | Constant::getNullValue(BO->getType())); | ||||
2926 | } | ||||
2927 | } | ||||
2928 | break; | ||||
2929 | case Instruction::Add: { | ||||
2930 | // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. | ||||
2931 | const APInt *BOC; | ||||
2932 | if (match(BOp1, m_APInt(BOC))) { | ||||
2933 | if (BO->hasOneUse()) { | ||||
2934 | Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1)); | ||||
2935 | return new ICmpInst(Pred, BOp0, SubC); | ||||
2936 | } | ||||
2937 | } else if (C.isNullValue()) { | ||||
2938 | // Replace ((add A, B) != 0) with (A != -B) if A or B is | ||||
2939 | // efficiently invertible, or if the add has just this one use. | ||||
2940 | if (Value *NegVal = dyn_castNegVal(BOp1)) | ||||
2941 | return new ICmpInst(Pred, BOp0, NegVal); | ||||
2942 | if (Value *NegVal = dyn_castNegVal(BOp0)) | ||||
2943 | return new ICmpInst(Pred, NegVal, BOp1); | ||||
2944 | if (BO->hasOneUse()) { | ||||
2945 | Value *Neg = Builder.CreateNeg(BOp1); | ||||
2946 | Neg->takeName(BO); | ||||
2947 | return new ICmpInst(Pred, BOp0, Neg); | ||||
2948 | } | ||||
2949 | } | ||||
2950 | break; | ||||
2951 | } | ||||
2952 | case Instruction::Xor: | ||||
2953 | if (BO->hasOneUse()) { | ||||
2954 | if (Constant *BOC = dyn_cast<Constant>(BOp1)) { | ||||
2955 | // For the xor case, we can xor two constants together, eliminating | ||||
2956 | // the explicit xor. | ||||
2957 | return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); | ||||
2958 | } else if (C.isNullValue()) { | ||||
2959 | // Replace ((xor A, B) != 0) with (A != B) | ||||
2960 | return new ICmpInst(Pred, BOp0, BOp1); | ||||
2961 | } | ||||
2962 | } | ||||
2963 | break; | ||||
2964 | case Instruction::Sub: | ||||
2965 | if (BO->hasOneUse()) { | ||||
2966 | const APInt *BOC; | ||||
2967 | if (match(BOp0, m_APInt(BOC))) { | ||||
2968 | // Replace ((sub BOC, B) != C) with (B != BOC-C). | ||||
2969 | Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS); | ||||
2970 | return new ICmpInst(Pred, BOp1, SubC); | ||||
2971 | } else if (C.isNullValue()) { | ||||
2972 | // Replace ((sub A, B) != 0) with (A != B). | ||||
2973 | return new ICmpInst(Pred, BOp0, BOp1); | ||||
2974 | } | ||||
2975 | } | ||||
2976 | break; | ||||
2977 | case Instruction::Or: { | ||||
2978 | const APInt *BOC; | ||||
2979 | if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) { | ||||
2980 | // Comparing if all bits outside of a constant mask are set? | ||||
2981 | // Replace (X | C) == -1 with (X & ~C) == ~C. | ||||
2982 | // This removes the -1 constant. | ||||
2983 | Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1)); | ||||
2984 | Value *And = Builder.CreateAnd(BOp0, NotBOC); | ||||
2985 | return new ICmpInst(Pred, And, NotBOC); | ||||
2986 | } | ||||
2987 | break; | ||||
2988 | } | ||||
2989 | case Instruction::And: { | ||||
2990 | const APInt *BOC; | ||||
2991 | if (match(BOp1, m_APInt(BOC))) { | ||||
2992 | // If we have ((X & C) == C), turn it into ((X & C) != 0). | ||||
2993 | if (C == *BOC && C.isPowerOf2()) | ||||
2994 | return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, | ||||
2995 | BO, Constant::getNullValue(RHS->getType())); | ||||
2996 | } | ||||
2997 | break; | ||||
2998 | } | ||||
2999 | case Instruction::Mul: | ||||
3000 | if (C.isNullValue() && BO->hasNoSignedWrap()) { | ||||
3001 | const APInt *BOC; | ||||
3002 | if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) { | ||||
3003 | // The trivial case (mul X, 0) is handled by InstSimplify. | ||||
3004 | // General case : (mul X, C) != 0 iff X != 0 | ||||
3005 | // (mul X, C) == 0 iff X == 0 | ||||
3006 | return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType())); | ||||
3007 | } | ||||
3008 | } | ||||
3009 | break; | ||||
3010 | case Instruction::UDiv: | ||||
3011 | if (C.isNullValue()) { | ||||
3012 | // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) | ||||
3013 | auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; | ||||
3014 | return new ICmpInst(NewPred, BOp1, BOp0); | ||||
3015 | } | ||||
3016 | break; | ||||
3017 | default: | ||||
3018 | break; | ||||
3019 | } | ||||
3020 | return nullptr; | ||||
3021 | } | ||||
3022 | |||||
3023 | /// Fold an equality icmp with LLVM intrinsic and constant operand. | ||||
3024 | Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp, | ||||
3025 | IntrinsicInst *II, | ||||
3026 | const APInt &C) { | ||||
3027 | Type *Ty = II->getType(); | ||||
3028 | unsigned BitWidth = C.getBitWidth(); | ||||
3029 | switch (II->getIntrinsicID()) { | ||||
3030 | case Intrinsic::bswap: | ||||
3031 | Worklist.Add(II); | ||||
3032 | Cmp.setOperand(0, II->getArgOperand(0)); | ||||
3033 | Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap())); | ||||
3034 | return &Cmp; | ||||
3035 | |||||
3036 | case Intrinsic::ctlz: | ||||
3037 | case Intrinsic::cttz: { | ||||
3038 | // ctz(A) == bitwidth(A) -> A == 0 and likewise for != | ||||
3039 | if (C == BitWidth) { | ||||
3040 | Worklist.Add(II); | ||||
3041 | Cmp.setOperand(0, II->getArgOperand(0)); | ||||
3042 | Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); | ||||
3043 | return &Cmp; | ||||
3044 | } | ||||
3045 | |||||
3046 | // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set | ||||
3047 | // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. | ||||
3048 | // Limit to one use to ensure we don't increase instruction count. | ||||
3049 | unsigned Num = C.getLimitedValue(BitWidth); | ||||
3050 | if (Num != BitWidth && II->hasOneUse()) { | ||||
3051 | bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; | ||||
3052 | APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) | ||||
3053 | : APInt::getHighBitsSet(BitWidth, Num + 1); | ||||
3054 | APInt Mask2 = IsTrailing | ||||
3055 | ? APInt::getOneBitSet(BitWidth, Num) | ||||
3056 | : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); | ||||
3057 | Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1)); | ||||
3058 | Cmp.setOperand(1, ConstantInt::get(Ty, Mask2)); | ||||
3059 | Worklist.Add(II); | ||||
3060 | return &Cmp; | ||||
3061 | } | ||||
3062 | break; | ||||
3063 | } | ||||
3064 | |||||
3065 | case Intrinsic::ctpop: { | ||||
3066 | // popcount(A) == 0 -> A == 0 and likewise for != | ||||
3067 | // popcount(A) == bitwidth(A) -> A == -1 and likewise for != | ||||
3068 | bool IsZero = C.isNullValue(); | ||||
3069 | if (IsZero || C == BitWidth) { | ||||
3070 | Worklist.Add(II); | ||||
3071 | Cmp.setOperand(0, II->getArgOperand(0)); | ||||
3072 | auto *NewOp = | ||||
3073 | IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty); | ||||
3074 | Cmp.setOperand(1, NewOp); | ||||
3075 | return &Cmp; | ||||
3076 | } | ||||
3077 | break; | ||||
3078 | } | ||||
3079 | |||||
3080 | case Intrinsic::uadd_sat: { | ||||
3081 | // uadd.sat(a, b) == 0 -> (a | b) == 0 | ||||
3082 | if (C.isNullValue()) { | ||||
3083 | Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1)); | ||||
3084 | return replaceInstUsesWith(Cmp, Builder.CreateICmp( | ||||
3085 | Cmp.getPredicate(), Or, Constant::getNullValue(Ty))); | ||||
3086 | |||||
3087 | } | ||||
3088 | break; | ||||
3089 | } | ||||
3090 | |||||
3091 | case Intrinsic::usub_sat: { | ||||
3092 | // usub.sat(a, b) == 0 -> a <= b | ||||
3093 | if (C.isNullValue()) { | ||||
3094 | ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ | ||||
3095 | ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; | ||||
3096 | return ICmpInst::Create(Instruction::ICmp, NewPred, | ||||
3097 | II->getArgOperand(0), II->getArgOperand(1)); | ||||
3098 | } | ||||
3099 | break; | ||||
3100 | } | ||||
3101 | default: | ||||
3102 | break; | ||||
3103 | } | ||||
3104 | |||||
3105 | return nullptr; | ||||
3106 | } | ||||
3107 | |||||
3108 | /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. | ||||
3109 | Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, | ||||
3110 | IntrinsicInst *II, | ||||
3111 | const APInt &C) { | ||||
3112 | if (Cmp.isEquality()) | ||||
3113 | return foldICmpEqIntrinsicWithConstant(Cmp, II, C); | ||||
3114 | |||||
3115 | Type *Ty = II->getType(); | ||||
3116 | unsigned BitWidth = C.getBitWidth(); | ||||
3117 | switch (II->getIntrinsicID()) { | ||||
3118 | case Intrinsic::ctlz: { | ||||
3119 | // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000 | ||||
3120 | if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { | ||||
3121 | unsigned Num = C.getLimitedValue(); | ||||
3122 | APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); | ||||
3123 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT, | ||||
3124 | II->getArgOperand(0), ConstantInt::get(Ty, Limit)); | ||||
3125 | } | ||||
3126 | |||||
3127 | // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111 | ||||
3128 | if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && | ||||
3129 | C.uge(1) && C.ule(BitWidth)) { | ||||
3130 | unsigned Num = C.getLimitedValue(); | ||||
3131 | APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num); | ||||
3132 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT, | ||||
3133 | II->getArgOperand(0), ConstantInt::get(Ty, Limit)); | ||||
3134 | } | ||||
3135 | break; | ||||
3136 | } | ||||
3137 | case Intrinsic::cttz: { | ||||
3138 | // Limit to one use to ensure we don't increase instruction count. | ||||
3139 | if (!II->hasOneUse()) | ||||
3140 | return nullptr; | ||||
3141 | |||||
3142 | // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0 | ||||
3143 | if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) { | ||||
3144 | APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1); | ||||
3145 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, | ||||
3146 | Builder.CreateAnd(II->getArgOperand(0), Mask), | ||||
3147 | ConstantInt::getNullValue(Ty)); | ||||
3148 | } | ||||
3149 | |||||
3150 | // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0 | ||||
3151 | if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && | ||||
3152 | C.uge(1) && C.ule(BitWidth)) { | ||||
3153 | APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue()); | ||||
3154 | return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, | ||||
3155 | Builder.CreateAnd(II->getArgOperand(0), Mask), | ||||
3156 | ConstantInt::getNullValue(Ty)); | ||||
3157 | } | ||||
3158 | break; | ||||
3159 | } | ||||
3160 | default: | ||||
3161 | break; | ||||
3162 | } | ||||
3163 | |||||
3164 | return nullptr; | ||||
3165 | } | ||||
3166 | |||||
3167 | /// Handle icmp with constant (but not simple integer constant) RHS. | ||||
3168 | Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { | ||||
3169 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
3170 | Constant *RHSC = dyn_cast<Constant>(Op1); | ||||
3171 | Instruction *LHSI = dyn_cast<Instruction>(Op0); | ||||
3172 | if (!RHSC || !LHSI) | ||||
3173 | return nullptr; | ||||
3174 | |||||
3175 | switch (LHSI->getOpcode()) { | ||||
3176 | case Instruction::GetElementPtr: | ||||
3177 | // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null | ||||
3178 | if (RHSC->isNullValue() && | ||||
3179 | cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) | ||||
3180 | return new ICmpInst( | ||||
3181 | I.getPredicate(), LHSI->getOperand(0), | ||||
3182 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | ||||
3183 | break; | ||||
3184 | case Instruction::PHI: | ||||
3185 | // Only fold icmp into the PHI if the phi and icmp are in the same | ||||
3186 | // block. If in the same block, we're encouraging jump threading. If | ||||
3187 | // not, we are just pessimizing the code by making an i1 phi. | ||||
3188 | if (LHSI->getParent() == I.getParent()) | ||||
3189 | if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) | ||||
3190 | return NV; | ||||
3191 | break; | ||||
3192 | case Instruction::Select: { | ||||
3193 | // If either operand of the select is a constant, we can fold the | ||||
3194 | // comparison into the select arms, which will cause one to be | ||||
3195 | // constant folded and the select turned into a bitwise or. | ||||
3196 | Value *Op1 = nullptr, *Op2 = nullptr; | ||||
3197 | ConstantInt *CI = nullptr; | ||||
3198 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { | ||||
3199 | Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); | ||||
3200 | CI = dyn_cast<ConstantInt>(Op1); | ||||
3201 | } | ||||
3202 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { | ||||
3203 | Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); | ||||
3204 | CI = dyn_cast<ConstantInt>(Op2); | ||||
3205 | } | ||||
3206 | |||||
3207 | // We only want to perform this transformation if it will not lead to | ||||
3208 | // additional code. This is true if either both sides of the select | ||||
3209 | // fold to a constant (in which case the icmp is replaced with a select | ||||
3210 | // which will usually simplify) or this is the only user of the | ||||
3211 | // select (in which case we are trading a select+icmp for a simpler | ||||
3212 | // select+icmp) or all uses of the select can be replaced based on | ||||
3213 | // dominance information ("Global cases"). | ||||
3214 | bool Transform = false; | ||||
3215 | if (Op1 && Op2) | ||||
3216 | Transform = true; | ||||
3217 | else if (Op1 || Op2) { | ||||
3218 | // Local case | ||||
3219 | if (LHSI->hasOneUse()) | ||||
3220 | Transform = true; | ||||
3221 | // Global cases | ||||
3222 | else if (CI && !CI->isZero()) | ||||
3223 | // When Op1 is constant try replacing select with second operand. | ||||
3224 | // Otherwise Op2 is constant and try replacing select with first | ||||
3225 | // operand. | ||||
3226 | Transform = | ||||
3227 | replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1); | ||||
3228 | } | ||||
3229 | if (Transform) { | ||||
3230 | if (!Op1) | ||||
3231 | Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, | ||||
3232 | I.getName()); | ||||
3233 | if (!Op2) | ||||
3234 | Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, | ||||
3235 | I.getName()); | ||||
3236 | return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); | ||||
3237 | } | ||||
3238 | break; | ||||
3239 | } | ||||
3240 | case Instruction::IntToPtr: | ||||
3241 | // icmp pred inttoptr(X), null -> icmp pred X, 0 | ||||
3242 | if (RHSC->isNullValue() && | ||||
3243 | DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) | ||||
3244 | return new ICmpInst( | ||||
3245 | I.getPredicate(), LHSI->getOperand(0), | ||||
3246 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | ||||
3247 | break; | ||||
3248 | |||||
3249 | case Instruction::Load: | ||||
3250 | // Try to optimize things like "A[i] > 4" to index computations. | ||||
3251 | if (GetElementPtrInst *GEP = | ||||
3252 | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { | ||||
3253 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | ||||
3254 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | ||||
3255 | !cast<LoadInst>(LHSI)->isVolatile()) | ||||
3256 | if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) | ||||
3257 | return Res; | ||||
3258 | } | ||||
3259 | break; | ||||
3260 | } | ||||
3261 | |||||
3262 | return nullptr; | ||||
3263 | } | ||||
3264 | |||||
3265 | /// Some comparisons can be simplified. | ||||
3266 | /// In this case, we are looking for comparisons that look like | ||||
3267 | /// a check for a lossy truncation. | ||||
3268 | /// Folds: | ||||
3269 | /// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask | ||||
3270 | /// Where Mask is some pattern that produces all-ones in low bits: | ||||
3271 | /// (-1 >> y) | ||||
3272 | /// ((-1 << y) >> y) <- non-canonical, has extra uses | ||||
3273 | /// ~(-1 << y) | ||||
3274 | /// ((1 << y) + (-1)) <- non-canonical, has extra uses | ||||
3275 | /// The Mask can be a constant, too. | ||||
3276 | /// For some predicates, the operands are commutative. | ||||
3277 | /// For others, x can only be on a specific side. | ||||
3278 | static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, | ||||
3279 | InstCombiner::BuilderTy &Builder) { | ||||
3280 | ICmpInst::Predicate SrcPred; | ||||
3281 | Value *X, *M, *Y; | ||||
3282 | auto m_VariableMask = m_CombineOr( | ||||
3283 | m_CombineOr(m_Not(m_Shl(m_AllOnes(), m_Value())), | ||||
3284 | m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())), | ||||
3285 | m_CombineOr(m_LShr(m_AllOnes(), m_Value()), | ||||
3286 | m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y)))); | ||||
3287 | auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask()); | ||||
3288 | if (!match(&I, m_c_ICmp(SrcPred, | ||||
3289 | m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)), | ||||
3290 | m_Deferred(X)))) | ||||
3291 | return nullptr; | ||||
3292 | |||||
3293 | ICmpInst::Predicate DstPred; | ||||
3294 | switch (SrcPred) { | ||||
3295 | case ICmpInst::Predicate::ICMP_EQ: | ||||
3296 | // x & (-1 >> y) == x -> x u<= (-1 >> y) | ||||
3297 | DstPred = ICmpInst::Predicate::ICMP_ULE; | ||||
3298 | break; | ||||
3299 | case ICmpInst::Predicate::ICMP_NE: | ||||
3300 | // x & (-1 >> y) != x -> x u> (-1 >> y) | ||||
3301 | DstPred = ICmpInst::Predicate::ICMP_UGT; | ||||
3302 | break; | ||||
3303 | case ICmpInst::Predicate::ICMP_UGT: | ||||
3304 | // x u> x & (-1 >> y) -> x u> (-1 >> y) | ||||
3305 | assert(X == I.getOperand(0) && "instsimplify took care of commut. variant")((X == I.getOperand(0) && "instsimplify took care of commut. variant" ) ? static_cast<void> (0) : __assert_fail ("X == I.getOperand(0) && \"instsimplify took care of commut. variant\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3305, __PRETTY_FUNCTION__)); | ||||
3306 | DstPred = ICmpInst::Predicate::ICMP_UGT; | ||||
3307 | break; | ||||
3308 | case ICmpInst::Predicate::ICMP_UGE: | ||||
3309 | // x & (-1 >> y) u>= x -> x u<= (-1 >> y) | ||||
3310 | assert(X == I.getOperand(1) && "instsimplify took care of commut. variant")((X == I.getOperand(1) && "instsimplify took care of commut. variant" ) ? static_cast<void> (0) : __assert_fail ("X == I.getOperand(1) && \"instsimplify took care of commut. variant\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3310, __PRETTY_FUNCTION__)); | ||||
3311 | DstPred = ICmpInst::Predicate::ICMP_ULE; | ||||
3312 | break; | ||||
3313 | case ICmpInst::Predicate::ICMP_ULT: | ||||
3314 | // x & (-1 >> y) u< x -> x u> (-1 >> y) | ||||
3315 | assert(X == I.getOperand(1) && "instsimplify took care of commut. variant")((X == I.getOperand(1) && "instsimplify took care of commut. variant" ) ? static_cast<void> (0) : __assert_fail ("X == I.getOperand(1) && \"instsimplify took care of commut. variant\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3315, __PRETTY_FUNCTION__)); | ||||
3316 | DstPred = ICmpInst::Predicate::ICMP_UGT; | ||||
3317 | break; | ||||
3318 | case ICmpInst::Predicate::ICMP_ULE: | ||||
3319 | // x u<= x & (-1 >> y) -> x u<= (-1 >> y) | ||||
3320 | assert(X == I.getOperand(0) && "instsimplify took care of commut. variant")((X == I.getOperand(0) && "instsimplify took care of commut. variant" ) ? static_cast<void> (0) : __assert_fail ("X == I.getOperand(0) && \"instsimplify took care of commut. variant\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3320, __PRETTY_FUNCTION__)); | ||||
3321 | DstPred = ICmpInst::Predicate::ICMP_ULE; | ||||
3322 | break; | ||||
3323 | case ICmpInst::Predicate::ICMP_SGT: | ||||
3324 | // x s> x & (-1 >> y) -> x s> (-1 >> y) | ||||
3325 | if (X != I.getOperand(0)) // X must be on LHS of comparison! | ||||
3326 | return nullptr; // Ignore the other case. | ||||
3327 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. | ||||
3328 | return nullptr; | ||||
3329 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. | ||||
3330 | return nullptr; | ||||
3331 | DstPred = ICmpInst::Predicate::ICMP_SGT; | ||||
3332 | break; | ||||
3333 | case ICmpInst::Predicate::ICMP_SGE: | ||||
3334 | // x & (-1 >> y) s>= x -> x s<= (-1 >> y) | ||||
3335 | if (X != I.getOperand(1)) // X must be on RHS of comparison! | ||||
3336 | return nullptr; // Ignore the other case. | ||||
3337 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. | ||||
3338 | return nullptr; | ||||
3339 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. | ||||
3340 | return nullptr; | ||||
3341 | DstPred = ICmpInst::Predicate::ICMP_SLE; | ||||
3342 | break; | ||||
3343 | case ICmpInst::Predicate::ICMP_SLT: | ||||
3344 | // x & (-1 >> y) s< x -> x s> (-1 >> y) | ||||
3345 | if (X != I.getOperand(1)) // X must be on RHS of comparison! | ||||
3346 | return nullptr; // Ignore the other case. | ||||
3347 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. | ||||
3348 | return nullptr; | ||||
3349 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. | ||||
3350 | return nullptr; | ||||
3351 | DstPred = ICmpInst::Predicate::ICMP_SGT; | ||||
3352 | break; | ||||
3353 | case ICmpInst::Predicate::ICMP_SLE: | ||||
3354 | // x s<= x & (-1 >> y) -> x s<= (-1 >> y) | ||||
3355 | if (X != I.getOperand(0)) // X must be on LHS of comparison! | ||||
3356 | return nullptr; // Ignore the other case. | ||||
3357 | if (!match(M, m_Constant())) // Can not do this fold with non-constant. | ||||
3358 | return nullptr; | ||||
3359 | if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. | ||||
3360 | return nullptr; | ||||
3361 | DstPred = ICmpInst::Predicate::ICMP_SLE; | ||||
3362 | break; | ||||
3363 | default: | ||||
3364 | llvm_unreachable("All possible folds are handled.")::llvm::llvm_unreachable_internal("All possible folds are handled." , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3364); | ||||
3365 | } | ||||
3366 | |||||
3367 | return Builder.CreateICmp(DstPred, X, M); | ||||
3368 | } | ||||
3369 | |||||
3370 | /// Some comparisons can be simplified. | ||||
3371 | /// In this case, we are looking for comparisons that look like | ||||
3372 | /// a check for a lossy signed truncation. | ||||
3373 | /// Folds: (MaskedBits is a constant.) | ||||
3374 | /// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x | ||||
3375 | /// Into: | ||||
3376 | /// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits) | ||||
3377 | /// Where KeptBits = bitwidth(%x) - MaskedBits | ||||
3378 | static Value * | ||||
3379 | foldICmpWithTruncSignExtendedVal(ICmpInst &I, | ||||
3380 | InstCombiner::BuilderTy &Builder) { | ||||
3381 | ICmpInst::Predicate SrcPred; | ||||
3382 | Value *X; | ||||
3383 | const APInt *C0, *C1; // FIXME: non-splats, potentially with undef. | ||||
3384 | // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use. | ||||
3385 | if (!match(&I, m_c_ICmp(SrcPred, | ||||
3386 | m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)), | ||||
3387 | m_APInt(C1))), | ||||
3388 | m_Deferred(X)))) | ||||
3389 | return nullptr; | ||||
3390 | |||||
3391 | // Potential handling of non-splats: for each element: | ||||
3392 | // * if both are undef, replace with constant 0. | ||||
3393 | // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0. | ||||
3394 | // * if both are not undef, and are different, bailout. | ||||
3395 | // * else, only one is undef, then pick the non-undef one. | ||||
3396 | |||||
3397 | // The shift amount must be equal. | ||||
3398 | if (*C0 != *C1) | ||||
3399 | return nullptr; | ||||
3400 | const APInt &MaskedBits = *C0; | ||||
3401 | assert(MaskedBits != 0 && "shift by zero should be folded away already.")((MaskedBits != 0 && "shift by zero should be folded away already." ) ? static_cast<void> (0) : __assert_fail ("MaskedBits != 0 && \"shift by zero should be folded away already.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3401, __PRETTY_FUNCTION__)); | ||||
3402 | |||||
3403 | ICmpInst::Predicate DstPred; | ||||
3404 | switch (SrcPred) { | ||||
3405 | case ICmpInst::Predicate::ICMP_EQ: | ||||
3406 | // ((%x << MaskedBits) a>> MaskedBits) == %x | ||||
3407 | // => | ||||
3408 | // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits) | ||||
3409 | DstPred = ICmpInst::Predicate::ICMP_ULT; | ||||
3410 | break; | ||||
3411 | case ICmpInst::Predicate::ICMP_NE: | ||||
3412 | // ((%x << MaskedBits) a>> MaskedBits) != %x | ||||
3413 | // => | ||||
3414 | // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits) | ||||
3415 | DstPred = ICmpInst::Predicate::ICMP_UGE; | ||||
3416 | break; | ||||
3417 | // FIXME: are more folds possible? | ||||
3418 | default: | ||||
3419 | return nullptr; | ||||
3420 | } | ||||
3421 | |||||
3422 | auto *XType = X->getType(); | ||||
3423 | const unsigned XBitWidth = XType->getScalarSizeInBits(); | ||||
3424 | const APInt BitWidth = APInt(XBitWidth, XBitWidth); | ||||
3425 | assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched")((BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched" ) ? static_cast<void> (0) : __assert_fail ("BitWidth.ugt(MaskedBits) && \"shifts should leave some bits untouched\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3425, __PRETTY_FUNCTION__)); | ||||
3426 | |||||
3427 | // KeptBits = bitwidth(%x) - MaskedBits | ||||
3428 | const APInt KeptBits = BitWidth - MaskedBits; | ||||
3429 | assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable")((KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable") ? static_cast<void> (0) : __assert_fail ("KeptBits.ugt(0) && KeptBits.ult(BitWidth) && \"unreachable\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3429, __PRETTY_FUNCTION__)); | ||||
3430 | // ICmpCst = (1 << KeptBits) | ||||
3431 | const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits); | ||||
3432 | assert(ICmpCst.isPowerOf2())((ICmpCst.isPowerOf2()) ? static_cast<void> (0) : __assert_fail ("ICmpCst.isPowerOf2()", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3432, __PRETTY_FUNCTION__)); | ||||
3433 | // AddCst = (1 << (KeptBits-1)) | ||||
3434 | const APInt AddCst = ICmpCst.lshr(1); | ||||
3435 | assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2())((AddCst.ult(ICmpCst) && AddCst.isPowerOf2()) ? static_cast <void> (0) : __assert_fail ("AddCst.ult(ICmpCst) && AddCst.isPowerOf2()" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3435, __PRETTY_FUNCTION__)); | ||||
3436 | |||||
3437 | // T0 = add %x, AddCst | ||||
3438 | Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst)); | ||||
3439 | // T1 = T0 DstPred ICmpCst | ||||
3440 | Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst)); | ||||
3441 | |||||
3442 | return T1; | ||||
3443 | } | ||||
3444 | |||||
3445 | // Given pattern: | ||||
3446 | // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0 | ||||
3447 | // we should move shifts to the same hand of 'and', i.e. rewrite as | ||||
3448 | // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) | ||||
3449 | // We are only interested in opposite logical shifts here. | ||||
3450 | // One of the shifts can be truncated. | ||||
3451 | // If we can, we want to end up creating 'lshr' shift. | ||||
3452 | static Value * | ||||
3453 | foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, | ||||
3454 | InstCombiner::BuilderTy &Builder) { | ||||
3455 | if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) || | ||||
3456 | !I.getOperand(0)->hasOneUse()) | ||||
3457 | return nullptr; | ||||
3458 | |||||
3459 | auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value()); | ||||
3460 | |||||
3461 | // Look for an 'and' of two logical shifts, one of which may be truncated. | ||||
3462 | // We use m_TruncOrSelf() on the RHS to correctly handle commutative case. | ||||
3463 | Instruction *XShift, *MaybeTruncation, *YShift; | ||||
3464 | if (!match( | ||||
3465 | I.getOperand(0), | ||||
3466 | m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)), | ||||
3467 | m_CombineAnd(m_TruncOrSelf(m_CombineAnd( | ||||
3468 | m_AnyLogicalShift, m_Instruction(YShift))), | ||||
3469 | m_Instruction(MaybeTruncation))))) | ||||
3470 | return nullptr; | ||||
3471 | |||||
3472 | // We potentially looked past 'trunc', but only when matching YShift, | ||||
3473 | // therefore YShift must have the widest type. | ||||
3474 | Instruction *WidestShift = YShift; | ||||
3475 | // Therefore XShift must have the shallowest type. | ||||
3476 | // Or they both have identical types if there was no truncation. | ||||
3477 | Instruction *NarrowestShift = XShift; | ||||
3478 | |||||
3479 | Type *WidestTy = WidestShift->getType(); | ||||
3480 | assert(NarrowestShift->getType() == I.getOperand(0)->getType() &&((NarrowestShift->getType() == I.getOperand(0)->getType () && "We did not look past any shifts while matching XShift though." ) ? static_cast<void> (0) : __assert_fail ("NarrowestShift->getType() == I.getOperand(0)->getType() && \"We did not look past any shifts while matching XShift though.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3481, __PRETTY_FUNCTION__)) | ||||
3481 | "We did not look past any shifts while matching XShift though.")((NarrowestShift->getType() == I.getOperand(0)->getType () && "We did not look past any shifts while matching XShift though." ) ? static_cast<void> (0) : __assert_fail ("NarrowestShift->getType() == I.getOperand(0)->getType() && \"We did not look past any shifts while matching XShift though.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3481, __PRETTY_FUNCTION__)); | ||||
3482 | bool HadTrunc = WidestTy != I.getOperand(0)->getType(); | ||||
3483 | |||||
3484 | // If YShift is a 'lshr', swap the shifts around. | ||||
3485 | if (match(YShift, m_LShr(m_Value(), m_Value()))) | ||||
3486 | std::swap(XShift, YShift); | ||||
3487 | |||||
3488 | // The shifts must be in opposite directions. | ||||
3489 | auto XShiftOpcode = XShift->getOpcode(); | ||||
3490 | if (XShiftOpcode == YShift->getOpcode()) | ||||
3491 | return nullptr; // Do not care about same-direction shifts here. | ||||
3492 | |||||
3493 | Value *X, *XShAmt, *Y, *YShAmt; | ||||
3494 | match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt)))); | ||||
3495 | match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt)))); | ||||
3496 | |||||
3497 | // If one of the values being shifted is a constant, then we will end with | ||||
3498 | // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not, | ||||
3499 | // however, we will need to ensure that we won't increase instruction count. | ||||
3500 | if (!isa<Constant>(X) && !isa<Constant>(Y)) { | ||||
3501 | // At least one of the hands of the 'and' should be one-use shift. | ||||
3502 | if (!match(I.getOperand(0), | ||||
3503 | m_c_And(m_OneUse(m_AnyLogicalShift), m_Value()))) | ||||
3504 | return nullptr; | ||||
3505 | if (HadTrunc) { | ||||
3506 | // Due to the 'trunc', we will need to widen X. For that either the old | ||||
3507 | // 'trunc' or the shift amt in the non-truncated shift should be one-use. | ||||
3508 | if (!MaybeTruncation->hasOneUse() && | ||||
3509 | !NarrowestShift->getOperand(1)->hasOneUse()) | ||||
3510 | return nullptr; | ||||
3511 | } | ||||
3512 | } | ||||
3513 | |||||
3514 | // We have two shift amounts from two different shifts. The types of those | ||||
3515 | // shift amounts may not match. If that's the case let's bailout now. | ||||
3516 | if (XShAmt->getType() != YShAmt->getType()) | ||||
3517 | return nullptr; | ||||
3518 | |||||
3519 | // Can we fold (XShAmt+YShAmt) ? | ||||
3520 | auto *NewShAmt = dyn_cast_or_null<Constant>( | ||||
3521 | SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, | ||||
3522 | /*isNUW=*/false, SQ.getWithInstruction(&I))); | ||||
3523 | if (!NewShAmt) | ||||
3524 | return nullptr; | ||||
3525 | NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); | ||||
3526 | unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); | ||||
3527 | |||||
3528 | // Is the new shift amount smaller than the bit width? | ||||
3529 | // FIXME: could also rely on ConstantRange. | ||||
3530 | if (!match(NewShAmt, | ||||
3531 | m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, | ||||
3532 | APInt(WidestBitWidth, WidestBitWidth)))) | ||||
3533 | return nullptr; | ||||
3534 | |||||
3535 | // An extra legality check is needed if we had trunc-of-lshr. | ||||
3536 | if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { | ||||
3537 | auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, | ||||
3538 | WidestShift]() { | ||||
3539 | // It isn't obvious whether it's worth it to analyze non-constants here. | ||||
3540 | // Also, let's basically give up on non-splat cases, pessimizing vectors. | ||||
3541 | // If *any* of these preconditions matches we can perform the fold. | ||||
3542 | Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() | ||||
3543 | ? NewShAmt->getSplatValue() | ||||
3544 | : NewShAmt; | ||||
3545 | // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. | ||||
3546 | if (NewShAmtSplat && | ||||
3547 | (NewShAmtSplat->isNullValue() || | ||||
3548 | NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) | ||||
3549 | return true; | ||||
3550 | // We consider *min* leading zeros so a single outlier | ||||
3551 | // blocks the transform as opposed to allowing it. | ||||
3552 | if (auto *C = dyn_cast<Constant>(NarrowestShift->getOperand(0))) { | ||||
3553 | KnownBits Known = computeKnownBits(C, SQ.DL); | ||||
3554 | unsigned MinLeadZero = Known.countMinLeadingZeros(); | ||||
3555 | // If the value being shifted has at most lowest bit set we can fold. | ||||
3556 | unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; | ||||
3557 | if (MaxActiveBits <= 1) | ||||
3558 | return true; | ||||
3559 | // Precondition: NewShAmt u<= countLeadingZeros(C) | ||||
3560 | if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) | ||||
3561 | return true; | ||||
3562 | } | ||||
3563 | if (auto *C = dyn_cast<Constant>(WidestShift->getOperand(0))) { | ||||
3564 | KnownBits Known = computeKnownBits(C, SQ.DL); | ||||
3565 | unsigned MinLeadZero = Known.countMinLeadingZeros(); | ||||
3566 | // If the value being shifted has at most lowest bit set we can fold. | ||||
3567 | unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; | ||||
3568 | if (MaxActiveBits <= 1) | ||||
3569 | return true; | ||||
3570 | // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) | ||||
3571 | if (NewShAmtSplat) { | ||||
3572 | APInt AdjNewShAmt = | ||||
3573 | (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); | ||||
3574 | if (AdjNewShAmt.ule(MinLeadZero)) | ||||
3575 | return true; | ||||
3576 | } | ||||
3577 | } | ||||
3578 | return false; // Can't tell if it's ok. | ||||
3579 | }; | ||||
3580 | if (!CanFold()) | ||||
3581 | return nullptr; | ||||
3582 | } | ||||
3583 | |||||
3584 | // All good, we can do this fold. | ||||
3585 | X = Builder.CreateZExt(X, WidestTy); | ||||
3586 | Y = Builder.CreateZExt(Y, WidestTy); | ||||
3587 | // The shift is the same that was for X. | ||||
3588 | Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr | ||||
3589 | ? Builder.CreateLShr(X, NewShAmt) | ||||
3590 | : Builder.CreateShl(X, NewShAmt); | ||||
3591 | Value *T1 = Builder.CreateAnd(T0, Y); | ||||
3592 | return Builder.CreateICmp(I.getPredicate(), T1, | ||||
3593 | Constant::getNullValue(WidestTy)); | ||||
3594 | } | ||||
3595 | |||||
3596 | /// Fold | ||||
3597 | /// (-1 u/ x) u< y | ||||
3598 | /// ((x * y) u/ x) != y | ||||
3599 | /// to | ||||
3600 | /// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit | ||||
3601 | /// Note that the comparison is commutative, while inverted (u>=, ==) predicate | ||||
3602 | /// will mean that we are looking for the opposite answer. | ||||
3603 | Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { | ||||
3604 | ICmpInst::Predicate Pred; | ||||
3605 | Value *X, *Y; | ||||
3606 | Instruction *Mul; | ||||
3607 | bool NeedNegation; | ||||
3608 | // Look for: (-1 u/ x) u</u>= y | ||||
3609 | if (!I.isEquality() && | ||||
3610 | match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))), | ||||
3611 | m_Value(Y)))) { | ||||
3612 | Mul = nullptr; | ||||
3613 | // Canonicalize as-if y was on RHS. | ||||
3614 | if (I.getOperand(1) != Y) | ||||
3615 | Pred = I.getSwappedPredicate(); | ||||
3616 | |||||
3617 | // Are we checking that overflow does not happen, or does happen? | ||||
3618 | switch (Pred) { | ||||
3619 | case ICmpInst::Predicate::ICMP_ULT: | ||||
3620 | NeedNegation = false; | ||||
3621 | break; // OK | ||||
3622 | case ICmpInst::Predicate::ICMP_UGE: | ||||
3623 | NeedNegation = true; | ||||
3624 | break; // OK | ||||
3625 | default: | ||||
3626 | return nullptr; // Wrong predicate. | ||||
3627 | } | ||||
3628 | } else // Look for: ((x * y) u/ x) !=/== y | ||||
3629 | if (I.isEquality() && | ||||
3630 | match(&I, m_c_ICmp(Pred, m_Value(Y), | ||||
3631 | m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y), | ||||
3632 | m_Value(X)), | ||||
3633 | m_Instruction(Mul)), | ||||
3634 | m_Deferred(X)))))) { | ||||
3635 | NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ; | ||||
3636 | } else | ||||
3637 | return nullptr; | ||||
3638 | |||||
3639 | BuilderTy::InsertPointGuard Guard(Builder); | ||||
3640 | // If the pattern included (x * y), we'll want to insert new instructions | ||||
3641 | // right before that original multiplication so that we can replace it. | ||||
3642 | bool MulHadOtherUses = Mul && !Mul->hasOneUse(); | ||||
3643 | if (MulHadOtherUses) | ||||
3644 | Builder.SetInsertPoint(Mul); | ||||
3645 | |||||
3646 | Function *F = Intrinsic::getDeclaration( | ||||
3647 | I.getModule(), Intrinsic::umul_with_overflow, X->getType()); | ||||
3648 | CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul"); | ||||
3649 | |||||
3650 | // If the multiplication was used elsewhere, to ensure that we don't leave | ||||
3651 | // "duplicate" instructions, replace uses of that original multiplication | ||||
3652 | // with the multiplication result from the with.overflow intrinsic. | ||||
3653 | if (MulHadOtherUses) | ||||
3654 | replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val")); | ||||
3655 | |||||
3656 | Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov"); | ||||
3657 | if (NeedNegation) // This technically increases instruction count. | ||||
3658 | Res = Builder.CreateNot(Res, "umul.not.ov"); | ||||
3659 | |||||
3660 | return Res; | ||||
3661 | } | ||||
3662 | |||||
3663 | /// Try to fold icmp (binop), X or icmp X, (binop). | ||||
3664 | /// TODO: A large part of this logic is duplicated in InstSimplify's | ||||
3665 | /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code | ||||
3666 | /// duplication. | ||||
3667 | Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) { | ||||
3668 | const SimplifyQuery Q = SQ.getWithInstruction(&I); | ||||
3669 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
3670 | |||||
3671 | // Special logic for binary operators. | ||||
3672 | BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); | ||||
3673 | BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); | ||||
3674 | if (!BO0 && !BO1) | ||||
3675 | return nullptr; | ||||
3676 | |||||
3677 | const CmpInst::Predicate Pred = I.getPredicate(); | ||||
3678 | Value *X; | ||||
3679 | |||||
3680 | // Convert add-with-unsigned-overflow comparisons into a 'not' with compare. | ||||
3681 | // (Op1 + X) u</u>= Op1 --> ~Op1 u</u>= X | ||||
3682 | if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) && | ||||
3683 | (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) | ||||
3684 | return new ICmpInst(Pred, Builder.CreateNot(Op1), X); | ||||
3685 | // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0 | ||||
3686 | if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) && | ||||
3687 | (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) | ||||
3688 | return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); | ||||
3689 | |||||
3690 | bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; | ||||
3691 | if (BO0 && isa<OverflowingBinaryOperator>(BO0)) | ||||
3692 | NoOp0WrapProblem = | ||||
3693 | ICmpInst::isEquality(Pred) || | ||||
3694 | (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || | ||||
3695 | (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); | ||||
3696 | if (BO1 && isa<OverflowingBinaryOperator>(BO1)) | ||||
3697 | NoOp1WrapProblem = | ||||
3698 | ICmpInst::isEquality(Pred) || | ||||
3699 | (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || | ||||
3700 | (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); | ||||
3701 | |||||
3702 | // Analyze the case when either Op0 or Op1 is an add instruction. | ||||
3703 | // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). | ||||
3704 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; | ||||
3705 | if (BO0 && BO0->getOpcode() == Instruction::Add) { | ||||
3706 | A = BO0->getOperand(0); | ||||
3707 | B = BO0->getOperand(1); | ||||
3708 | } | ||||
3709 | if (BO1 && BO1->getOpcode() == Instruction::Add) { | ||||
3710 | C = BO1->getOperand(0); | ||||
3711 | D = BO1->getOperand(1); | ||||
3712 | } | ||||
3713 | |||||
3714 | // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. | ||||
3715 | // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow. | ||||
3716 | if ((A == Op1 || B == Op1) && NoOp0WrapProblem) | ||||
3717 | return new ICmpInst(Pred, A == Op1 ? B : A, | ||||
3718 | Constant::getNullValue(Op1->getType())); | ||||
3719 | |||||
3720 | // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow. | ||||
3721 | // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow. | ||||
3722 | if ((C == Op0 || D == Op0) && NoOp1WrapProblem) | ||||
3723 | return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), | ||||
3724 | C == Op0 ? D : C); | ||||
3725 | |||||
3726 | // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow. | ||||
3727 | if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem && | ||||
3728 | NoOp1WrapProblem) { | ||||
3729 | // Determine Y and Z in the form icmp (X+Y), (X+Z). | ||||
3730 | Value *Y, *Z; | ||||
3731 | if (A == C) { | ||||
3732 | // C + B == C + D -> B == D | ||||
3733 | Y = B; | ||||
3734 | Z = D; | ||||
3735 | } else if (A == D) { | ||||
3736 | // D + B == C + D -> B == C | ||||
3737 | Y = B; | ||||
3738 | Z = C; | ||||
3739 | } else if (B == C) { | ||||
3740 | // A + C == C + D -> A == D | ||||
3741 | Y = A; | ||||
3742 | Z = D; | ||||
3743 | } else { | ||||
3744 | assert(B == D)((B == D) ? static_cast<void> (0) : __assert_fail ("B == D" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 3744, __PRETTY_FUNCTION__)); | ||||
3745 | // A + D == C + D -> A == C | ||||
3746 | Y = A; | ||||
3747 | Z = C; | ||||
3748 | } | ||||
3749 | return new ICmpInst(Pred, Y, Z); | ||||
3750 | } | ||||
3751 | |||||
3752 | // icmp slt (A + -1), Op1 -> icmp sle A, Op1 | ||||
3753 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && | ||||
3754 | match(B, m_AllOnes())) | ||||
3755 | return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); | ||||
3756 | |||||
3757 | // icmp sge (A + -1), Op1 -> icmp sgt A, Op1 | ||||
3758 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && | ||||
3759 | match(B, m_AllOnes())) | ||||
3760 | return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); | ||||
3761 | |||||
3762 | // icmp sle (A + 1), Op1 -> icmp slt A, Op1 | ||||
3763 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One())) | ||||
3764 | return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); | ||||
3765 | |||||
3766 | // icmp sgt (A + 1), Op1 -> icmp sge A, Op1 | ||||
3767 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One())) | ||||
3768 | return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); | ||||
3769 | |||||
3770 | // icmp sgt Op0, (C + -1) -> icmp sge Op0, C | ||||
3771 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT && | ||||
3772 | match(D, m_AllOnes())) | ||||
3773 | return new ICmpInst(CmpInst::ICMP_SGE, Op0, C); | ||||
3774 | |||||
3775 | // icmp sle Op0, (C + -1) -> icmp slt Op0, C | ||||
3776 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE && | ||||
3777 | match(D, m_AllOnes())) | ||||
3778 | return new ICmpInst(CmpInst::ICMP_SLT, Op0, C); | ||||
3779 | |||||
3780 | // icmp sge Op0, (C + 1) -> icmp sgt Op0, C | ||||
3781 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One())) | ||||
3782 | return new ICmpInst(CmpInst::ICMP_SGT, Op0, C); | ||||
3783 | |||||
3784 | // icmp slt Op0, (C + 1) -> icmp sle Op0, C | ||||
3785 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One())) | ||||
3786 | return new ICmpInst(CmpInst::ICMP_SLE, Op0, C); | ||||
3787 | |||||
3788 | // TODO: The subtraction-related identities shown below also hold, but | ||||
3789 | // canonicalization from (X -nuw 1) to (X + -1) means that the combinations | ||||
3790 | // wouldn't happen even if they were implemented. | ||||
3791 | // | ||||
3792 | // icmp ult (A - 1), Op1 -> icmp ule A, Op1 | ||||
3793 | // icmp uge (A - 1), Op1 -> icmp ugt A, Op1 | ||||
3794 | // icmp ugt Op0, (C - 1) -> icmp uge Op0, C | ||||
3795 | // icmp ule Op0, (C - 1) -> icmp ult Op0, C | ||||
3796 | |||||
3797 | // icmp ule (A + 1), Op0 -> icmp ult A, Op1 | ||||
3798 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One())) | ||||
3799 | return new ICmpInst(CmpInst::ICMP_ULT, A, Op1); | ||||
3800 | |||||
3801 | // icmp ugt (A + 1), Op0 -> icmp uge A, Op1 | ||||
3802 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One())) | ||||
3803 | return new ICmpInst(CmpInst::ICMP_UGE, A, Op1); | ||||
3804 | |||||
3805 | // icmp uge Op0, (C + 1) -> icmp ugt Op0, C | ||||
3806 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One())) | ||||
3807 | return new ICmpInst(CmpInst::ICMP_UGT, Op0, C); | ||||
3808 | |||||
3809 | // icmp ult Op0, (C + 1) -> icmp ule Op0, C | ||||
3810 | if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One())) | ||||
3811 | return new ICmpInst(CmpInst::ICMP_ULE, Op0, C); | ||||
3812 | |||||
3813 | // if C1 has greater magnitude than C2: | ||||
3814 | // icmp (A + C1), (C + C2) -> icmp (A + C3), C | ||||
3815 | // s.t. C3 = C1 - C2 | ||||
3816 | // | ||||
3817 | // if C2 has greater magnitude than C1: | ||||
3818 | // icmp (A + C1), (C + C2) -> icmp A, (C + C3) | ||||
3819 | // s.t. C3 = C2 - C1 | ||||
3820 | if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && | ||||
3821 | (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) | ||||
3822 | if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) | ||||
3823 | if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { | ||||
3824 | const APInt &AP1 = C1->getValue(); | ||||
3825 | const APInt &AP2 = C2->getValue(); | ||||
3826 | if (AP1.isNegative() == AP2.isNegative()) { | ||||
3827 | APInt AP1Abs = C1->getValue().abs(); | ||||
3828 | APInt AP2Abs = C2->getValue().abs(); | ||||
3829 | if (AP1Abs.uge(AP2Abs)) { | ||||
3830 | ConstantInt *C3 = Builder.getInt(AP1 - AP2); | ||||
3831 | Value *NewAdd = Builder.CreateNSWAdd(A, C3); | ||||
3832 | return new ICmpInst(Pred, NewAdd, C); | ||||
3833 | } else { | ||||
3834 | ConstantInt *C3 = Builder.getInt(AP2 - AP1); | ||||
3835 | Value *NewAdd = Builder.CreateNSWAdd(C, C3); | ||||
3836 | return new ICmpInst(Pred, A, NewAdd); | ||||
3837 | } | ||||
3838 | } | ||||
3839 | } | ||||
3840 | |||||
3841 | // Analyze the case when either Op0 or Op1 is a sub instruction. | ||||
3842 | // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). | ||||
3843 | A = nullptr; | ||||
3844 | B = nullptr; | ||||
3845 | C = nullptr; | ||||
3846 | D = nullptr; | ||||
3847 | if (BO0 && BO0->getOpcode() == Instruction::Sub) { | ||||
3848 | A = BO0->getOperand(0); | ||||
3849 | B = BO0->getOperand(1); | ||||
3850 | } | ||||
3851 | if (BO1 && BO1->getOpcode() == Instruction::Sub) { | ||||
3852 | C = BO1->getOperand(0); | ||||
3853 | D = BO1->getOperand(1); | ||||
3854 | } | ||||
3855 | |||||
3856 | // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow. | ||||
3857 | if (A == Op1 && NoOp0WrapProblem) | ||||
3858 | return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); | ||||
3859 | // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow. | ||||
3860 | if (C == Op0 && NoOp1WrapProblem) | ||||
3861 | return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); | ||||
3862 | |||||
3863 | // Convert sub-with-unsigned-overflow comparisons into a comparison of args. | ||||
3864 | // (A - B) u>/u<= A --> B u>/u<= A | ||||
3865 | if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) | ||||
3866 | return new ICmpInst(Pred, B, A); | ||||
3867 | // C u</u>= (C - D) --> C u</u>= D | ||||
3868 | if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) | ||||
3869 | return new ICmpInst(Pred, C, D); | ||||
3870 | // (A - B) u>=/u< A --> B u>/u<= A iff B != 0 | ||||
3871 | if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && | ||||
3872 | isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) | ||||
3873 | return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A); | ||||
3874 | // C u<=/u> (C - D) --> C u</u>= D iff B != 0 | ||||
3875 | if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && | ||||
3876 | isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) | ||||
3877 | return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D); | ||||
3878 | |||||
3879 | // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow. | ||||
3880 | if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem) | ||||
3881 | return new ICmpInst(Pred, A, C); | ||||
3882 | |||||
3883 | // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow. | ||||
3884 | if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem) | ||||
3885 | return new ICmpInst(Pred, D, B); | ||||
3886 | |||||
3887 | // icmp (0-X) < cst --> x > -cst | ||||
3888 | if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { | ||||
3889 | Value *X; | ||||
3890 | if (match(BO0, m_Neg(m_Value(X)))) | ||||
3891 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) | ||||
3892 | if (RHSC->isNotMinSignedValue()) | ||||
3893 | return new ICmpInst(I.getSwappedPredicate(), X, | ||||
3894 | ConstantExpr::getNeg(RHSC)); | ||||
3895 | } | ||||
3896 | |||||
3897 | BinaryOperator *SRem = nullptr; | ||||
3898 | // icmp (srem X, Y), Y | ||||
3899 | if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) | ||||
3900 | SRem = BO0; | ||||
3901 | // icmp Y, (srem X, Y) | ||||
3902 | else if (BO1 && BO1->getOpcode() == Instruction::SRem && | ||||
3903 | Op0 == BO1->getOperand(1)) | ||||
3904 | SRem = BO1; | ||||
3905 | if (SRem) { | ||||
3906 | // We don't check hasOneUse to avoid increasing register pressure because | ||||
3907 | // the value we use is the same value this instruction was already using. | ||||
3908 | switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { | ||||
3909 | default: | ||||
3910 | break; | ||||
3911 | case ICmpInst::ICMP_EQ: | ||||
3912 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
3913 | case ICmpInst::ICMP_NE: | ||||
3914 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
3915 | case ICmpInst::ICMP_SGT: | ||||
3916 | case ICmpInst::ICMP_SGE: | ||||
3917 | return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), | ||||
3918 | Constant::getAllOnesValue(SRem->getType())); | ||||
3919 | case ICmpInst::ICMP_SLT: | ||||
3920 | case ICmpInst::ICMP_SLE: | ||||
3921 | return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), | ||||
3922 | Constant::getNullValue(SRem->getType())); | ||||
3923 | } | ||||
3924 | } | ||||
3925 | |||||
3926 | if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() && | ||||
3927 | BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) { | ||||
3928 | switch (BO0->getOpcode()) { | ||||
3929 | default: | ||||
3930 | break; | ||||
3931 | case Instruction::Add: | ||||
3932 | case Instruction::Sub: | ||||
3933 | case Instruction::Xor: { | ||||
3934 | if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b | ||||
3935 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3936 | |||||
3937 | const APInt *C; | ||||
3938 | if (match(BO0->getOperand(1), m_APInt(C))) { | ||||
3939 | // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b | ||||
3940 | if (C->isSignMask()) { | ||||
3941 | ICmpInst::Predicate NewPred = | ||||
3942 | I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); | ||||
3943 | return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3944 | } | ||||
3945 | |||||
3946 | // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b | ||||
3947 | if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { | ||||
3948 | ICmpInst::Predicate NewPred = | ||||
3949 | I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); | ||||
3950 | NewPred = I.getSwappedPredicate(NewPred); | ||||
3951 | return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3952 | } | ||||
3953 | } | ||||
3954 | break; | ||||
3955 | } | ||||
3956 | case Instruction::Mul: { | ||||
3957 | if (!I.isEquality()) | ||||
3958 | break; | ||||
3959 | |||||
3960 | const APInt *C; | ||||
3961 | if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() && | ||||
3962 | !C->isOneValue()) { | ||||
3963 | // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) | ||||
3964 | // Mask = -1 >> count-trailing-zeros(C). | ||||
3965 | if (unsigned TZs = C->countTrailingZeros()) { | ||||
3966 | Constant *Mask = ConstantInt::get( | ||||
3967 | BO0->getType(), | ||||
3968 | APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); | ||||
3969 | Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask); | ||||
3970 | Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); | ||||
3971 | return new ICmpInst(Pred, And1, And2); | ||||
3972 | } | ||||
3973 | // If there are no trailing zeros in the multiplier, just eliminate | ||||
3974 | // the multiplies (no masking is needed): | ||||
3975 | // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y | ||||
3976 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3977 | } | ||||
3978 | break; | ||||
3979 | } | ||||
3980 | case Instruction::UDiv: | ||||
3981 | case Instruction::LShr: | ||||
3982 | if (I.isSigned() || !BO0->isExact() || !BO1->isExact()) | ||||
3983 | break; | ||||
3984 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3985 | |||||
3986 | case Instruction::SDiv: | ||||
3987 | if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) | ||||
3988 | break; | ||||
3989 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3990 | |||||
3991 | case Instruction::AShr: | ||||
3992 | if (!BO0->isExact() || !BO1->isExact()) | ||||
3993 | break; | ||||
3994 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
3995 | |||||
3996 | case Instruction::Shl: { | ||||
3997 | bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); | ||||
3998 | bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); | ||||
3999 | if (!NUW && !NSW) | ||||
4000 | break; | ||||
4001 | if (!NSW && I.isSigned()) | ||||
4002 | break; | ||||
4003 | return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); | ||||
4004 | } | ||||
4005 | } | ||||
4006 | } | ||||
4007 | |||||
4008 | if (BO0) { | ||||
4009 | // Transform A & (L - 1) `ult` L --> L != 0 | ||||
4010 | auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes()); | ||||
4011 | auto BitwiseAnd = m_c_And(m_Value(), LSubOne); | ||||
4012 | |||||
4013 | if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) { | ||||
4014 | auto *Zero = Constant::getNullValue(BO0->getType()); | ||||
4015 | return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero); | ||||
4016 | } | ||||
4017 | } | ||||
4018 | |||||
4019 | if (Value *V = foldUnsignedMultiplicationOverflowCheck(I)) | ||||
4020 | return replaceInstUsesWith(I, V); | ||||
4021 | |||||
4022 | if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) | ||||
4023 | return replaceInstUsesWith(I, V); | ||||
4024 | |||||
4025 | if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder)) | ||||
4026 | return replaceInstUsesWith(I, V); | ||||
4027 | |||||
4028 | if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder)) | ||||
4029 | return replaceInstUsesWith(I, V); | ||||
4030 | |||||
4031 | return nullptr; | ||||
4032 | } | ||||
4033 | |||||
4034 | /// Fold icmp Pred min|max(X, Y), X. | ||||
4035 | static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) { | ||||
4036 | ICmpInst::Predicate Pred = Cmp.getPredicate(); | ||||
4037 | Value *Op0 = Cmp.getOperand(0); | ||||
4038 | Value *X = Cmp.getOperand(1); | ||||
4039 | |||||
4040 | // Canonicalize minimum or maximum operand to LHS of the icmp. | ||||
4041 | if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) || | ||||
4042 | match(X, m_c_SMax(m_Specific(Op0), m_Value())) || | ||||
4043 | match(X, m_c_UMin(m_Specific(Op0), m_Value())) || | ||||
4044 | match(X, m_c_UMax(m_Specific(Op0), m_Value()))) { | ||||
4045 | std::swap(Op0, X); | ||||
4046 | Pred = Cmp.getSwappedPredicate(); | ||||
4047 | } | ||||
4048 | |||||
4049 | Value *Y; | ||||
4050 | if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) { | ||||
4051 | // smin(X, Y) == X --> X s<= Y | ||||
4052 | // smin(X, Y) s>= X --> X s<= Y | ||||
4053 | if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE) | ||||
4054 | return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); | ||||
4055 | |||||
4056 | // smin(X, Y) != X --> X s> Y | ||||
4057 | // smin(X, Y) s< X --> X s> Y | ||||
4058 | if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT) | ||||
4059 | return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); | ||||
4060 | |||||
4061 | // These cases should be handled in InstSimplify: | ||||
4062 | // smin(X, Y) s<= X --> true | ||||
4063 | // smin(X, Y) s> X --> false | ||||
4064 | return nullptr; | ||||
4065 | } | ||||
4066 | |||||
4067 | if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) { | ||||
4068 | // smax(X, Y) == X --> X s>= Y | ||||
4069 | // smax(X, Y) s<= X --> X s>= Y | ||||
4070 | if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE) | ||||
4071 | return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); | ||||
4072 | |||||
4073 | // smax(X, Y) != X --> X s< Y | ||||
4074 | // smax(X, Y) s> X --> X s< Y | ||||
4075 | if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT) | ||||
4076 | return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); | ||||
4077 | |||||
4078 | // These cases should be handled in InstSimplify: | ||||
4079 | // smax(X, Y) s>= X --> true | ||||
4080 | // smax(X, Y) s< X --> false | ||||
4081 | return nullptr; | ||||
4082 | } | ||||
4083 | |||||
4084 | if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) { | ||||
4085 | // umin(X, Y) == X --> X u<= Y | ||||
4086 | // umin(X, Y) u>= X --> X u<= Y | ||||
4087 | if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE) | ||||
4088 | return new ICmpInst(ICmpInst::ICMP_ULE, X, Y); | ||||
4089 | |||||
4090 | // umin(X, Y) != X --> X u> Y | ||||
4091 | // umin(X, Y) u< X --> X u> Y | ||||
4092 | if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT) | ||||
4093 | return new ICmpInst(ICmpInst::ICMP_UGT, X, Y); | ||||
4094 | |||||
4095 | // These cases should be handled in InstSimplify: | ||||
4096 | // umin(X, Y) u<= X --> true | ||||
4097 | // umin(X, Y) u> X --> false | ||||
4098 | return nullptr; | ||||
4099 | } | ||||
4100 | |||||
4101 | if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) { | ||||
4102 | // umax(X, Y) == X --> X u>= Y | ||||
4103 | // umax(X, Y) u<= X --> X u>= Y | ||||
4104 | if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE) | ||||
4105 | return new ICmpInst(ICmpInst::ICMP_UGE, X, Y); | ||||
4106 | |||||
4107 | // umax(X, Y) != X --> X u< Y | ||||
4108 | // umax(X, Y) u> X --> X u< Y | ||||
4109 | if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT) | ||||
4110 | return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); | ||||
4111 | |||||
4112 | // These cases should be handled in InstSimplify: | ||||
4113 | // umax(X, Y) u>= X --> true | ||||
4114 | // umax(X, Y) u< X --> false | ||||
4115 | return nullptr; | ||||
4116 | } | ||||
4117 | |||||
4118 | return nullptr; | ||||
4119 | } | ||||
4120 | |||||
4121 | Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { | ||||
4122 | if (!I.isEquality()) | ||||
4123 | return nullptr; | ||||
4124 | |||||
4125 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
4126 | const CmpInst::Predicate Pred = I.getPredicate(); | ||||
4127 | Value *A, *B, *C, *D; | ||||
4128 | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { | ||||
4129 | if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 | ||||
4130 | Value *OtherVal = A == Op1 ? B : A; | ||||
4131 | return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); | ||||
4132 | } | ||||
4133 | |||||
4134 | if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { | ||||
4135 | // A^c1 == C^c2 --> A == C^(c1^c2) | ||||
4136 | ConstantInt *C1, *C2; | ||||
4137 | if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && | ||||
4138 | Op1->hasOneUse()) { | ||||
4139 | Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue()); | ||||
4140 | Value *Xor = Builder.CreateXor(C, NC); | ||||
4141 | return new ICmpInst(Pred, A, Xor); | ||||
4142 | } | ||||
4143 | |||||
4144 | // A^B == A^D -> B == D | ||||
4145 | if (A == C) | ||||
4146 | return new ICmpInst(Pred, B, D); | ||||
4147 | if (A == D) | ||||
4148 | return new ICmpInst(Pred, B, C); | ||||
4149 | if (B == C) | ||||
4150 | return new ICmpInst(Pred, A, D); | ||||
4151 | if (B == D) | ||||
4152 | return new ICmpInst(Pred, A, C); | ||||
4153 | } | ||||
4154 | } | ||||
4155 | |||||
4156 | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { | ||||
4157 | // A == (A^B) -> B == 0 | ||||
4158 | Value *OtherVal = A == Op0 ? B : A; | ||||
4159 | return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType())); | ||||
4160 | } | ||||
4161 | |||||
4162 | // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 | ||||
4163 | if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && | ||||
4164 | match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { | ||||
4165 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; | ||||
4166 | |||||
4167 | if (A == C) { | ||||
4168 | X = B; | ||||
4169 | Y = D; | ||||
4170 | Z = A; | ||||
4171 | } else if (A == D) { | ||||
4172 | X = B; | ||||
4173 | Y = C; | ||||
4174 | Z = A; | ||||
4175 | } else if (B == C) { | ||||
4176 | X = A; | ||||
4177 | Y = D; | ||||
4178 | Z = B; | ||||
4179 | } else if (B == D) { | ||||
4180 | X = A; | ||||
4181 | Y = C; | ||||
4182 | Z = B; | ||||
4183 | } | ||||
4184 | |||||
4185 | if (X) { // Build (X^Y) & Z | ||||
4186 | Op1 = Builder.CreateXor(X, Y); | ||||
4187 | Op1 = Builder.CreateAnd(Op1, Z); | ||||
4188 | I.setOperand(0, Op1); | ||||
4189 | I.setOperand(1, Constant::getNullValue(Op1->getType())); | ||||
4190 | return &I; | ||||
4191 | } | ||||
4192 | } | ||||
4193 | |||||
4194 | // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) | ||||
4195 | // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) | ||||
4196 | ConstantInt *Cst1; | ||||
4197 | if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) && | ||||
4198 | match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || | ||||
4199 | (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && | ||||
4200 | match(Op1, m_ZExt(m_Value(A))))) { | ||||
4201 | APInt Pow2 = Cst1->getValue() + 1; | ||||
4202 | if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && | ||||
4203 | Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) | ||||
4204 | return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); | ||||
4205 | } | ||||
4206 | |||||
4207 | // (A >> C) == (B >> C) --> (A^B) u< (1 << C) | ||||
4208 | // For lshr and ashr pairs. | ||||
4209 | if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && | ||||
4210 | match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || | ||||
4211 | (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && | ||||
4212 | match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { | ||||
4213 | unsigned TypeBits = Cst1->getBitWidth(); | ||||
4214 | unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); | ||||
4215 | if (ShAmt < TypeBits && ShAmt != 0) { | ||||
4216 | ICmpInst::Predicate NewPred = | ||||
4217 | Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; | ||||
4218 | Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); | ||||
4219 | APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); | ||||
4220 | return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); | ||||
4221 | } | ||||
4222 | } | ||||
4223 | |||||
4224 | // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0 | ||||
4225 | if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) && | ||||
4226 | match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) { | ||||
4227 | unsigned TypeBits = Cst1->getBitWidth(); | ||||
4228 | unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); | ||||
4229 | if (ShAmt < TypeBits && ShAmt != 0) { | ||||
4230 | Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); | ||||
4231 | APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); | ||||
4232 | Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal), | ||||
4233 | I.getName() + ".mask"); | ||||
4234 | return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType())); | ||||
4235 | } | ||||
4236 | } | ||||
4237 | |||||
4238 | // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to | ||||
4239 | // "icmp (and X, mask), cst" | ||||
4240 | uint64_t ShAmt = 0; | ||||
4241 | if (Op0->hasOneUse() && | ||||
4242 | match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) && | ||||
4243 | match(Op1, m_ConstantInt(Cst1)) && | ||||
4244 | // Only do this when A has multiple uses. This is most important to do | ||||
4245 | // when it exposes other optimizations. | ||||
4246 | !A->hasOneUse()) { | ||||
4247 | unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); | ||||
4248 | |||||
4249 | if (ShAmt < ASize) { | ||||
4250 | APInt MaskV = | ||||
4251 | APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); | ||||
4252 | MaskV <<= ShAmt; | ||||
4253 | |||||
4254 | APInt CmpV = Cst1->getValue().zext(ASize); | ||||
4255 | CmpV <<= ShAmt; | ||||
4256 | |||||
4257 | Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV)); | ||||
4258 | return new ICmpInst(Pred, Mask, Builder.getInt(CmpV)); | ||||
4259 | } | ||||
4260 | } | ||||
4261 | |||||
4262 | // If both operands are byte-swapped or bit-reversed, just compare the | ||||
4263 | // original values. | ||||
4264 | // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant() | ||||
4265 | // and handle more intrinsics. | ||||
4266 | if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) || | ||||
4267 | (match(Op0, m_BitReverse(m_Value(A))) && | ||||
4268 | match(Op1, m_BitReverse(m_Value(B))))) | ||||
4269 | return new ICmpInst(Pred, A, B); | ||||
4270 | |||||
4271 | // Canonicalize checking for a power-of-2-or-zero value: | ||||
4272 | // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants) | ||||
4273 | // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants) | ||||
4274 | if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()), | ||||
4275 | m_Deferred(A)))) || | ||||
4276 | !match(Op1, m_ZeroInt())) | ||||
4277 | A = nullptr; | ||||
4278 | |||||
4279 | // (A & -A) == A --> ctpop(A) < 2 (four commuted variants) | ||||
4280 | // (-A & A) != A --> ctpop(A) > 1 (four commuted variants) | ||||
4281 | if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1))))) | ||||
4282 | A = Op1; | ||||
4283 | else if (match(Op1, | ||||
4284 | m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0))))) | ||||
4285 | A = Op0; | ||||
4286 | |||||
4287 | if (A) { | ||||
4288 | Type *Ty = A->getType(); | ||||
4289 | CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A); | ||||
4290 | return Pred == ICmpInst::ICMP_EQ | ||||
4291 | ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2)) | ||||
4292 | : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1)); | ||||
4293 | } | ||||
4294 | |||||
4295 | return nullptr; | ||||
4296 | } | ||||
4297 | |||||
4298 | static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, | ||||
4299 | InstCombiner::BuilderTy &Builder) { | ||||
4300 | assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0")((isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0" ) ? static_cast<void> (0) : __assert_fail ("isa<CastInst>(ICmp.getOperand(0)) && \"Expected cast for operand 0\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4300, __PRETTY_FUNCTION__)); | ||||
4301 | auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0)); | ||||
4302 | Value *X; | ||||
4303 | if (!match(CastOp0, m_ZExtOrSExt(m_Value(X)))) | ||||
4304 | return nullptr; | ||||
4305 | |||||
4306 | bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt; | ||||
4307 | bool IsSignedCmp = ICmp.isSigned(); | ||||
4308 | if (auto *CastOp1 = dyn_cast<CastInst>(ICmp.getOperand(1))) { | ||||
4309 | // If the signedness of the two casts doesn't agree (i.e. one is a sext | ||||
4310 | // and the other is a zext), then we can't handle this. | ||||
4311 | // TODO: This is too strict. We can handle some predicates (equality?). | ||||
4312 | if (CastOp0->getOpcode() != CastOp1->getOpcode()) | ||||
4313 | return nullptr; | ||||
4314 | |||||
4315 | // Not an extension from the same type? | ||||
4316 | Value *Y = CastOp1->getOperand(0); | ||||
4317 | Type *XTy = X->getType(), *YTy = Y->getType(); | ||||
4318 | if (XTy != YTy) { | ||||
4319 | // One of the casts must have one use because we are creating a new cast. | ||||
4320 | if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse()) | ||||
4321 | return nullptr; | ||||
4322 | // Extend the narrower operand to the type of the wider operand. | ||||
4323 | if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits()) | ||||
4324 | X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy); | ||||
4325 | else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits()) | ||||
4326 | Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy); | ||||
4327 | else | ||||
4328 | return nullptr; | ||||
4329 | } | ||||
4330 | |||||
4331 | // (zext X) == (zext Y) --> X == Y | ||||
4332 | // (sext X) == (sext Y) --> X == Y | ||||
4333 | if (ICmp.isEquality()) | ||||
4334 | return new ICmpInst(ICmp.getPredicate(), X, Y); | ||||
4335 | |||||
4336 | // A signed comparison of sign extended values simplifies into a | ||||
4337 | // signed comparison. | ||||
4338 | if (IsSignedCmp && IsSignedExt) | ||||
4339 | return new ICmpInst(ICmp.getPredicate(), X, Y); | ||||
4340 | |||||
4341 | // The other three cases all fold into an unsigned comparison. | ||||
4342 | return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y); | ||||
4343 | } | ||||
4344 | |||||
4345 | // Below here, we are only folding a compare with constant. | ||||
4346 | auto *C = dyn_cast<Constant>(ICmp.getOperand(1)); | ||||
4347 | if (!C) | ||||
4348 | return nullptr; | ||||
4349 | |||||
4350 | // Compute the constant that would happen if we truncated to SrcTy then | ||||
4351 | // re-extended to DestTy. | ||||
4352 | Type *SrcTy = CastOp0->getSrcTy(); | ||||
4353 | Type *DestTy = CastOp0->getDestTy(); | ||||
4354 | Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy); | ||||
4355 | Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy); | ||||
4356 | |||||
4357 | // If the re-extended constant didn't change... | ||||
4358 | if (Res2 == C) { | ||||
4359 | if (ICmp.isEquality()) | ||||
4360 | return new ICmpInst(ICmp.getPredicate(), X, Res1); | ||||
4361 | |||||
4362 | // A signed comparison of sign extended values simplifies into a | ||||
4363 | // signed comparison. | ||||
4364 | if (IsSignedExt && IsSignedCmp) | ||||
4365 | return new ICmpInst(ICmp.getPredicate(), X, Res1); | ||||
4366 | |||||
4367 | // The other three cases all fold into an unsigned comparison. | ||||
4368 | return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1); | ||||
4369 | } | ||||
4370 | |||||
4371 | // The re-extended constant changed, partly changed (in the case of a vector), | ||||
4372 | // or could not be determined to be equal (in the case of a constant | ||||
4373 | // expression), so the constant cannot be represented in the shorter type. | ||||
4374 | // All the cases that fold to true or false will have already been handled | ||||
4375 | // by SimplifyICmpInst, so only deal with the tricky case. | ||||
4376 | if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C)) | ||||
4377 | return nullptr; | ||||
4378 | |||||
4379 | // Is source op positive? | ||||
4380 | // icmp ult (sext X), C --> icmp sgt X, -1 | ||||
4381 | if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) | ||||
4382 | return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy)); | ||||
4383 | |||||
4384 | // Is source op negative? | ||||
4385 | // icmp ugt (sext X), C --> icmp slt X, 0 | ||||
4386 | assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!")((ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!" ) ? static_cast<void> (0) : __assert_fail ("ICmp.getPredicate() == ICmpInst::ICMP_UGT && \"ICmp should be folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4386, __PRETTY_FUNCTION__)); | ||||
4387 | return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy)); | ||||
4388 | } | ||||
4389 | |||||
4390 | /// Handle icmp (cast x), (cast or constant). | ||||
4391 | Instruction *InstCombiner::foldICmpWithCastOp(ICmpInst &ICmp) { | ||||
4392 | auto *CastOp0 = dyn_cast<CastInst>(ICmp.getOperand(0)); | ||||
4393 | if (!CastOp0) | ||||
4394 | return nullptr; | ||||
4395 | if (!isa<Constant>(ICmp.getOperand(1)) && !isa<CastInst>(ICmp.getOperand(1))) | ||||
4396 | return nullptr; | ||||
4397 | |||||
4398 | Value *Op0Src = CastOp0->getOperand(0); | ||||
4399 | Type *SrcTy = CastOp0->getSrcTy(); | ||||
4400 | Type *DestTy = CastOp0->getDestTy(); | ||||
4401 | |||||
4402 | // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the | ||||
4403 | // integer type is the same size as the pointer type. | ||||
4404 | auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) { | ||||
4405 | if (isa<VectorType>(SrcTy)) { | ||||
4406 | SrcTy = cast<VectorType>(SrcTy)->getElementType(); | ||||
4407 | DestTy = cast<VectorType>(DestTy)->getElementType(); | ||||
4408 | } | ||||
4409 | return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); | ||||
4410 | }; | ||||
4411 | if (CastOp0->getOpcode() == Instruction::PtrToInt && | ||||
4412 | CompatibleSizes(SrcTy, DestTy)) { | ||||
4413 | Value *NewOp1 = nullptr; | ||||
4414 | if (auto *PtrToIntOp1 = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) { | ||||
4415 | Value *PtrSrc = PtrToIntOp1->getOperand(0); | ||||
4416 | if (PtrSrc->getType()->getPointerAddressSpace() == | ||||
4417 | Op0Src->getType()->getPointerAddressSpace()) { | ||||
4418 | NewOp1 = PtrToIntOp1->getOperand(0); | ||||
4419 | // If the pointer types don't match, insert a bitcast. | ||||
4420 | if (Op0Src->getType() != NewOp1->getType()) | ||||
4421 | NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType()); | ||||
4422 | } | ||||
4423 | } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) { | ||||
4424 | NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy); | ||||
4425 | } | ||||
4426 | |||||
4427 | if (NewOp1) | ||||
4428 | return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1); | ||||
4429 | } | ||||
4430 | |||||
4431 | return foldICmpWithZextOrSext(ICmp, Builder); | ||||
4432 | } | ||||
4433 | |||||
4434 | static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) { | ||||
4435 | switch (BinaryOp) { | ||||
4436 | default: | ||||
4437 | llvm_unreachable("Unsupported binary op")::llvm::llvm_unreachable_internal("Unsupported binary op", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4437); | ||||
4438 | case Instruction::Add: | ||||
4439 | case Instruction::Sub: | ||||
4440 | return match(RHS, m_Zero()); | ||||
4441 | case Instruction::Mul: | ||||
4442 | return match(RHS, m_One()); | ||||
4443 | } | ||||
4444 | } | ||||
4445 | |||||
4446 | OverflowResult InstCombiner::computeOverflow( | ||||
4447 | Instruction::BinaryOps BinaryOp, bool IsSigned, | ||||
4448 | Value *LHS, Value *RHS, Instruction *CxtI) const { | ||||
4449 | switch (BinaryOp) { | ||||
4450 | default: | ||||
4451 | llvm_unreachable("Unsupported binary op")::llvm::llvm_unreachable_internal("Unsupported binary op", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4451); | ||||
4452 | case Instruction::Add: | ||||
4453 | if (IsSigned) | ||||
4454 | return computeOverflowForSignedAdd(LHS, RHS, CxtI); | ||||
4455 | else | ||||
4456 | return computeOverflowForUnsignedAdd(LHS, RHS, CxtI); | ||||
4457 | case Instruction::Sub: | ||||
4458 | if (IsSigned) | ||||
4459 | return computeOverflowForSignedSub(LHS, RHS, CxtI); | ||||
4460 | else | ||||
4461 | return computeOverflowForUnsignedSub(LHS, RHS, CxtI); | ||||
4462 | case Instruction::Mul: | ||||
4463 | if (IsSigned) | ||||
4464 | return computeOverflowForSignedMul(LHS, RHS, CxtI); | ||||
4465 | else | ||||
4466 | return computeOverflowForUnsignedMul(LHS, RHS, CxtI); | ||||
4467 | } | ||||
4468 | } | ||||
4469 | |||||
4470 | bool InstCombiner::OptimizeOverflowCheck( | ||||
4471 | Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, | ||||
4472 | Instruction &OrigI, Value *&Result, Constant *&Overflow) { | ||||
4473 | if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS)) | ||||
4474 | std::swap(LHS, RHS); | ||||
4475 | |||||
4476 | // If the overflow check was an add followed by a compare, the insertion point | ||||
4477 | // may be pointing to the compare. We want to insert the new instructions | ||||
4478 | // before the add in case there are uses of the add between the add and the | ||||
4479 | // compare. | ||||
4480 | Builder.SetInsertPoint(&OrigI); | ||||
4481 | |||||
4482 | if (isNeutralValue(BinaryOp, RHS)) { | ||||
4483 | Result = LHS; | ||||
4484 | Overflow = Builder.getFalse(); | ||||
4485 | return true; | ||||
4486 | } | ||||
4487 | |||||
4488 | switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) { | ||||
4489 | case OverflowResult::MayOverflow: | ||||
4490 | return false; | ||||
4491 | case OverflowResult::AlwaysOverflowsLow: | ||||
4492 | case OverflowResult::AlwaysOverflowsHigh: | ||||
4493 | Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); | ||||
4494 | Result->takeName(&OrigI); | ||||
4495 | Overflow = Builder.getTrue(); | ||||
4496 | return true; | ||||
4497 | case OverflowResult::NeverOverflows: | ||||
4498 | Result = Builder.CreateBinOp(BinaryOp, LHS, RHS); | ||||
4499 | Result->takeName(&OrigI); | ||||
4500 | Overflow = Builder.getFalse(); | ||||
4501 | if (auto *Inst = dyn_cast<Instruction>(Result)) { | ||||
4502 | if (IsSigned) | ||||
4503 | Inst->setHasNoSignedWrap(); | ||||
4504 | else | ||||
4505 | Inst->setHasNoUnsignedWrap(); | ||||
4506 | } | ||||
4507 | return true; | ||||
4508 | } | ||||
4509 | |||||
4510 | llvm_unreachable("Unexpected overflow result")::llvm::llvm_unreachable_internal("Unexpected overflow result" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4510); | ||||
4511 | } | ||||
4512 | |||||
4513 | /// Recognize and process idiom involving test for multiplication | ||||
4514 | /// overflow. | ||||
4515 | /// | ||||
4516 | /// The caller has matched a pattern of the form: | ||||
4517 | /// I = cmp u (mul(zext A, zext B), V | ||||
4518 | /// The function checks if this is a test for overflow and if so replaces | ||||
4519 | /// multiplication with call to 'mul.with.overflow' intrinsic. | ||||
4520 | /// | ||||
4521 | /// \param I Compare instruction. | ||||
4522 | /// \param MulVal Result of 'mult' instruction. It is one of the arguments of | ||||
4523 | /// the compare instruction. Must be of integer type. | ||||
4524 | /// \param OtherVal The other argument of compare instruction. | ||||
4525 | /// \returns Instruction which must replace the compare instruction, NULL if no | ||||
4526 | /// replacement required. | ||||
4527 | static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, | ||||
4528 | Value *OtherVal, InstCombiner &IC) { | ||||
4529 | // Don't bother doing this transformation for pointers, don't do it for | ||||
4530 | // vectors. | ||||
4531 | if (!isa<IntegerType>(MulVal->getType())) | ||||
4532 | return nullptr; | ||||
4533 | |||||
4534 | assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal)((I.getOperand(0) == MulVal || I.getOperand(1) == MulVal) ? static_cast <void> (0) : __assert_fail ("I.getOperand(0) == MulVal || I.getOperand(1) == MulVal" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4534, __PRETTY_FUNCTION__)); | ||||
4535 | assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal)((I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal) ? static_cast<void> (0) : __assert_fail ("I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4535, __PRETTY_FUNCTION__)); | ||||
4536 | auto *MulInstr = dyn_cast<Instruction>(MulVal); | ||||
4537 | if (!MulInstr) | ||||
4538 | return nullptr; | ||||
4539 | assert(MulInstr->getOpcode() == Instruction::Mul)((MulInstr->getOpcode() == Instruction::Mul) ? static_cast <void> (0) : __assert_fail ("MulInstr->getOpcode() == Instruction::Mul" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4539, __PRETTY_FUNCTION__)); | ||||
4540 | |||||
4541 | auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)), | ||||
4542 | *RHS = cast<ZExtOperator>(MulInstr->getOperand(1)); | ||||
4543 | assert(LHS->getOpcode() == Instruction::ZExt)((LHS->getOpcode() == Instruction::ZExt) ? static_cast< void> (0) : __assert_fail ("LHS->getOpcode() == Instruction::ZExt" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4543, __PRETTY_FUNCTION__)); | ||||
4544 | assert(RHS->getOpcode() == Instruction::ZExt)((RHS->getOpcode() == Instruction::ZExt) ? static_cast< void> (0) : __assert_fail ("RHS->getOpcode() == Instruction::ZExt" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4544, __PRETTY_FUNCTION__)); | ||||
4545 | Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); | ||||
4546 | |||||
4547 | // Calculate type and width of the result produced by mul.with.overflow. | ||||
4548 | Type *TyA = A->getType(), *TyB = B->getType(); | ||||
4549 | unsigned WidthA = TyA->getPrimitiveSizeInBits(), | ||||
4550 | WidthB = TyB->getPrimitiveSizeInBits(); | ||||
4551 | unsigned MulWidth; | ||||
4552 | Type *MulType; | ||||
4553 | if (WidthB > WidthA) { | ||||
4554 | MulWidth = WidthB; | ||||
4555 | MulType = TyB; | ||||
4556 | } else { | ||||
4557 | MulWidth = WidthA; | ||||
4558 | MulType = TyA; | ||||
4559 | } | ||||
4560 | |||||
4561 | // In order to replace the original mul with a narrower mul.with.overflow, | ||||
4562 | // all uses must ignore upper bits of the product. The number of used low | ||||
4563 | // bits must be not greater than the width of mul.with.overflow. | ||||
4564 | if (MulVal->hasNUsesOrMore(2)) | ||||
4565 | for (User *U : MulVal->users()) { | ||||
4566 | if (U == &I) | ||||
4567 | continue; | ||||
4568 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | ||||
4569 | // Check if truncation ignores bits above MulWidth. | ||||
4570 | unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); | ||||
4571 | if (TruncWidth > MulWidth) | ||||
4572 | return nullptr; | ||||
4573 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | ||||
4574 | // Check if AND ignores bits above MulWidth. | ||||
4575 | if (BO->getOpcode() != Instruction::And) | ||||
4576 | return nullptr; | ||||
4577 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { | ||||
4578 | const APInt &CVal = CI->getValue(); | ||||
4579 | if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) | ||||
4580 | return nullptr; | ||||
4581 | } else { | ||||
4582 | // In this case we could have the operand of the binary operation | ||||
4583 | // being defined in another block, and performing the replacement | ||||
4584 | // could break the dominance relation. | ||||
4585 | return nullptr; | ||||
4586 | } | ||||
4587 | } else { | ||||
4588 | // Other uses prohibit this transformation. | ||||
4589 | return nullptr; | ||||
4590 | } | ||||
4591 | } | ||||
4592 | |||||
4593 | // Recognize patterns | ||||
4594 | switch (I.getPredicate()) { | ||||
4595 | case ICmpInst::ICMP_EQ: | ||||
4596 | case ICmpInst::ICMP_NE: | ||||
4597 | // Recognize pattern: | ||||
4598 | // mulval = mul(zext A, zext B) | ||||
4599 | // cmp eq/neq mulval, zext trunc mulval | ||||
4600 | if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) | ||||
4601 | if (Zext->hasOneUse()) { | ||||
4602 | Value *ZextArg = Zext->getOperand(0); | ||||
4603 | if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) | ||||
4604 | if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) | ||||
4605 | break; //Recognized | ||||
4606 | } | ||||
4607 | |||||
4608 | // Recognize pattern: | ||||
4609 | // mulval = mul(zext A, zext B) | ||||
4610 | // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. | ||||
4611 | ConstantInt *CI; | ||||
4612 | Value *ValToMask; | ||||
4613 | if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { | ||||
4614 | if (ValToMask != MulVal) | ||||
4615 | return nullptr; | ||||
4616 | const APInt &CVal = CI->getValue() + 1; | ||||
4617 | if (CVal.isPowerOf2()) { | ||||
4618 | unsigned MaskWidth = CVal.logBase2(); | ||||
4619 | if (MaskWidth == MulWidth) | ||||
4620 | break; // Recognized | ||||
4621 | } | ||||
4622 | } | ||||
4623 | return nullptr; | ||||
4624 | |||||
4625 | case ICmpInst::ICMP_UGT: | ||||
4626 | // Recognize pattern: | ||||
4627 | // mulval = mul(zext A, zext B) | ||||
4628 | // cmp ugt mulval, max | ||||
4629 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | ||||
4630 | APInt MaxVal = APInt::getMaxValue(MulWidth); | ||||
4631 | MaxVal = MaxVal.zext(CI->getBitWidth()); | ||||
4632 | if (MaxVal.eq(CI->getValue())) | ||||
4633 | break; // Recognized | ||||
4634 | } | ||||
4635 | return nullptr; | ||||
4636 | |||||
4637 | case ICmpInst::ICMP_UGE: | ||||
4638 | // Recognize pattern: | ||||
4639 | // mulval = mul(zext A, zext B) | ||||
4640 | // cmp uge mulval, max+1 | ||||
4641 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | ||||
4642 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | ||||
4643 | if (MaxVal.eq(CI->getValue())) | ||||
4644 | break; // Recognized | ||||
4645 | } | ||||
4646 | return nullptr; | ||||
4647 | |||||
4648 | case ICmpInst::ICMP_ULE: | ||||
4649 | // Recognize pattern: | ||||
4650 | // mulval = mul(zext A, zext B) | ||||
4651 | // cmp ule mulval, max | ||||
4652 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | ||||
4653 | APInt MaxVal = APInt::getMaxValue(MulWidth); | ||||
4654 | MaxVal = MaxVal.zext(CI->getBitWidth()); | ||||
4655 | if (MaxVal.eq(CI->getValue())) | ||||
4656 | break; // Recognized | ||||
4657 | } | ||||
4658 | return nullptr; | ||||
4659 | |||||
4660 | case ICmpInst::ICMP_ULT: | ||||
4661 | // Recognize pattern: | ||||
4662 | // mulval = mul(zext A, zext B) | ||||
4663 | // cmp ule mulval, max + 1 | ||||
4664 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | ||||
4665 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | ||||
4666 | if (MaxVal.eq(CI->getValue())) | ||||
4667 | break; // Recognized | ||||
4668 | } | ||||
4669 | return nullptr; | ||||
4670 | |||||
4671 | default: | ||||
4672 | return nullptr; | ||||
4673 | } | ||||
4674 | |||||
4675 | InstCombiner::BuilderTy &Builder = IC.Builder; | ||||
4676 | Builder.SetInsertPoint(MulInstr); | ||||
4677 | |||||
4678 | // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) | ||||
4679 | Value *MulA = A, *MulB = B; | ||||
4680 | if (WidthA < MulWidth) | ||||
4681 | MulA = Builder.CreateZExt(A, MulType); | ||||
4682 | if (WidthB < MulWidth) | ||||
4683 | MulB = Builder.CreateZExt(B, MulType); | ||||
4684 | Function *F = Intrinsic::getDeclaration( | ||||
4685 | I.getModule(), Intrinsic::umul_with_overflow, MulType); | ||||
4686 | CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); | ||||
4687 | IC.Worklist.Add(MulInstr); | ||||
4688 | |||||
4689 | // If there are uses of mul result other than the comparison, we know that | ||||
4690 | // they are truncation or binary AND. Change them to use result of | ||||
4691 | // mul.with.overflow and adjust properly mask/size. | ||||
4692 | if (MulVal->hasNUsesOrMore(2)) { | ||||
4693 | Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); | ||||
4694 | for (auto UI = MulVal->user_begin(), UE = MulVal->user_end(); UI != UE;) { | ||||
4695 | User *U = *UI++; | ||||
4696 | if (U == &I || U == OtherVal) | ||||
4697 | continue; | ||||
4698 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | ||||
4699 | if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) | ||||
4700 | IC.replaceInstUsesWith(*TI, Mul); | ||||
4701 | else | ||||
4702 | TI->setOperand(0, Mul); | ||||
4703 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | ||||
4704 | assert(BO->getOpcode() == Instruction::And)((BO->getOpcode() == Instruction::And) ? static_cast<void > (0) : __assert_fail ("BO->getOpcode() == Instruction::And" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4704, __PRETTY_FUNCTION__)); | ||||
4705 | // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) | ||||
4706 | ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); | ||||
4707 | APInt ShortMask = CI->getValue().trunc(MulWidth); | ||||
4708 | Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); | ||||
4709 | Instruction *Zext = | ||||
4710 | cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType())); | ||||
4711 | IC.Worklist.Add(Zext); | ||||
4712 | IC.replaceInstUsesWith(*BO, Zext); | ||||
4713 | } else { | ||||
4714 | llvm_unreachable("Unexpected Binary operation")::llvm::llvm_unreachable_internal("Unexpected Binary operation" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4714); | ||||
4715 | } | ||||
4716 | IC.Worklist.Add(cast<Instruction>(U)); | ||||
4717 | } | ||||
4718 | } | ||||
4719 | if (isa<Instruction>(OtherVal)) | ||||
4720 | IC.Worklist.Add(cast<Instruction>(OtherVal)); | ||||
4721 | |||||
4722 | // The original icmp gets replaced with the overflow value, maybe inverted | ||||
4723 | // depending on predicate. | ||||
4724 | bool Inverse = false; | ||||
4725 | switch (I.getPredicate()) { | ||||
4726 | case ICmpInst::ICMP_NE: | ||||
4727 | break; | ||||
4728 | case ICmpInst::ICMP_EQ: | ||||
4729 | Inverse = true; | ||||
4730 | break; | ||||
4731 | case ICmpInst::ICMP_UGT: | ||||
4732 | case ICmpInst::ICMP_UGE: | ||||
4733 | if (I.getOperand(0) == MulVal) | ||||
4734 | break; | ||||
4735 | Inverse = true; | ||||
4736 | break; | ||||
4737 | case ICmpInst::ICMP_ULT: | ||||
4738 | case ICmpInst::ICMP_ULE: | ||||
4739 | if (I.getOperand(1) == MulVal) | ||||
4740 | break; | ||||
4741 | Inverse = true; | ||||
4742 | break; | ||||
4743 | default: | ||||
4744 | llvm_unreachable("Unexpected predicate")::llvm::llvm_unreachable_internal("Unexpected predicate", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4744); | ||||
4745 | } | ||||
4746 | if (Inverse) { | ||||
4747 | Value *Res = Builder.CreateExtractValue(Call, 1); | ||||
4748 | return BinaryOperator::CreateNot(Res); | ||||
4749 | } | ||||
4750 | |||||
4751 | return ExtractValueInst::Create(Call, 1); | ||||
4752 | } | ||||
4753 | |||||
4754 | /// When performing a comparison against a constant, it is possible that not all | ||||
4755 | /// the bits in the LHS are demanded. This helper method computes the mask that | ||||
4756 | /// IS demanded. | ||||
4757 | static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) { | ||||
4758 | const APInt *RHS; | ||||
4759 | if (!match(I.getOperand(1), m_APInt(RHS))) | ||||
4760 | return APInt::getAllOnesValue(BitWidth); | ||||
4761 | |||||
4762 | // If this is a normal comparison, it demands all bits. If it is a sign bit | ||||
4763 | // comparison, it only demands the sign bit. | ||||
4764 | bool UnusedBit; | ||||
4765 | if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit)) | ||||
4766 | return APInt::getSignMask(BitWidth); | ||||
4767 | |||||
4768 | switch (I.getPredicate()) { | ||||
4769 | // For a UGT comparison, we don't care about any bits that | ||||
4770 | // correspond to the trailing ones of the comparand. The value of these | ||||
4771 | // bits doesn't impact the outcome of the comparison, because any value | ||||
4772 | // greater than the RHS must differ in a bit higher than these due to carry. | ||||
4773 | case ICmpInst::ICMP_UGT: | ||||
4774 | return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes()); | ||||
4775 | |||||
4776 | // Similarly, for a ULT comparison, we don't care about the trailing zeros. | ||||
4777 | // Any value less than the RHS must differ in a higher bit because of carries. | ||||
4778 | case ICmpInst::ICMP_ULT: | ||||
4779 | return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros()); | ||||
4780 | |||||
4781 | default: | ||||
4782 | return APInt::getAllOnesValue(BitWidth); | ||||
4783 | } | ||||
4784 | } | ||||
4785 | |||||
4786 | /// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst | ||||
4787 | /// should be swapped. | ||||
4788 | /// The decision is based on how many times these two operands are reused | ||||
4789 | /// as subtract operands and their positions in those instructions. | ||||
4790 | /// The rationale is that several architectures use the same instruction for | ||||
4791 | /// both subtract and cmp. Thus, it is better if the order of those operands | ||||
4792 | /// match. | ||||
4793 | /// \return true if Op0 and Op1 should be swapped. | ||||
4794 | static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) { | ||||
4795 | // Filter out pointer values as those cannot appear directly in subtract. | ||||
4796 | // FIXME: we may want to go through inttoptrs or bitcasts. | ||||
4797 | if (Op0->getType()->isPointerTy()) | ||||
4798 | return false; | ||||
4799 | // If a subtract already has the same operands as a compare, swapping would be | ||||
4800 | // bad. If a subtract has the same operands as a compare but in reverse order, | ||||
4801 | // then swapping is good. | ||||
4802 | int GoodToSwap = 0; | ||||
4803 | for (const User *U : Op0->users()) { | ||||
4804 | if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0)))) | ||||
4805 | GoodToSwap++; | ||||
4806 | else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1)))) | ||||
4807 | GoodToSwap--; | ||||
4808 | } | ||||
4809 | return GoodToSwap > 0; | ||||
4810 | } | ||||
4811 | |||||
4812 | /// Check that one use is in the same block as the definition and all | ||||
4813 | /// other uses are in blocks dominated by a given block. | ||||
4814 | /// | ||||
4815 | /// \param DI Definition | ||||
4816 | /// \param UI Use | ||||
4817 | /// \param DB Block that must dominate all uses of \p DI outside | ||||
4818 | /// the parent block | ||||
4819 | /// \return true when \p UI is the only use of \p DI in the parent block | ||||
4820 | /// and all other uses of \p DI are in blocks dominated by \p DB. | ||||
4821 | /// | ||||
4822 | bool InstCombiner::dominatesAllUses(const Instruction *DI, | ||||
4823 | const Instruction *UI, | ||||
4824 | const BasicBlock *DB) const { | ||||
4825 | assert(DI && UI && "Instruction not defined\n")((DI && UI && "Instruction not defined\n") ? static_cast <void> (0) : __assert_fail ("DI && UI && \"Instruction not defined\\n\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4825, __PRETTY_FUNCTION__)); | ||||
4826 | // Ignore incomplete definitions. | ||||
4827 | if (!DI->getParent()) | ||||
4828 | return false; | ||||
4829 | // DI and UI must be in the same block. | ||||
4830 | if (DI->getParent() != UI->getParent()) | ||||
4831 | return false; | ||||
4832 | // Protect from self-referencing blocks. | ||||
4833 | if (DI->getParent() == DB) | ||||
4834 | return false; | ||||
4835 | for (const User *U : DI->users()) { | ||||
4836 | auto *Usr = cast<Instruction>(U); | ||||
4837 | if (Usr != UI && !DT.dominates(DB, Usr->getParent())) | ||||
4838 | return false; | ||||
4839 | } | ||||
4840 | return true; | ||||
4841 | } | ||||
4842 | |||||
4843 | /// Return true when the instruction sequence within a block is select-cmp-br. | ||||
4844 | static bool isChainSelectCmpBranch(const SelectInst *SI) { | ||||
4845 | const BasicBlock *BB = SI->getParent(); | ||||
4846 | if (!BB) | ||||
4847 | return false; | ||||
4848 | auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator()); | ||||
4849 | if (!BI || BI->getNumSuccessors() != 2) | ||||
4850 | return false; | ||||
4851 | auto *IC = dyn_cast<ICmpInst>(BI->getCondition()); | ||||
4852 | if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) | ||||
4853 | return false; | ||||
4854 | return true; | ||||
4855 | } | ||||
4856 | |||||
4857 | /// True when a select result is replaced by one of its operands | ||||
4858 | /// in select-icmp sequence. This will eventually result in the elimination | ||||
4859 | /// of the select. | ||||
4860 | /// | ||||
4861 | /// \param SI Select instruction | ||||
4862 | /// \param Icmp Compare instruction | ||||
4863 | /// \param SIOpd Operand that replaces the select | ||||
4864 | /// | ||||
4865 | /// Notes: | ||||
4866 | /// - The replacement is global and requires dominator information | ||||
4867 | /// - The caller is responsible for the actual replacement | ||||
4868 | /// | ||||
4869 | /// Example: | ||||
4870 | /// | ||||
4871 | /// entry: | ||||
4872 | /// %4 = select i1 %3, %C* %0, %C* null | ||||
4873 | /// %5 = icmp eq %C* %4, null | ||||
4874 | /// br i1 %5, label %9, label %7 | ||||
4875 | /// ... | ||||
4876 | /// ; <label>:7 ; preds = %entry | ||||
4877 | /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0 | ||||
4878 | /// ... | ||||
4879 | /// | ||||
4880 | /// can be transformed to | ||||
4881 | /// | ||||
4882 | /// %5 = icmp eq %C* %0, null | ||||
4883 | /// %6 = select i1 %3, i1 %5, i1 true | ||||
4884 | /// br i1 %6, label %9, label %7 | ||||
4885 | /// ... | ||||
4886 | /// ; <label>:7 ; preds = %entry | ||||
4887 | /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0! | ||||
4888 | /// | ||||
4889 | /// Similar when the first operand of the select is a constant or/and | ||||
4890 | /// the compare is for not equal rather than equal. | ||||
4891 | /// | ||||
4892 | /// NOTE: The function is only called when the select and compare constants | ||||
4893 | /// are equal, the optimization can work only for EQ predicates. This is not a | ||||
4894 | /// major restriction since a NE compare should be 'normalized' to an equal | ||||
4895 | /// compare, which usually happens in the combiner and test case | ||||
4896 | /// select-cmp-br.ll checks for it. | ||||
4897 | bool InstCombiner::replacedSelectWithOperand(SelectInst *SI, | ||||
4898 | const ICmpInst *Icmp, | ||||
4899 | const unsigned SIOpd) { | ||||
4900 | assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!")(((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!" ) ? static_cast<void> (0) : __assert_fail ("(SIOpd == 1 || SIOpd == 2) && \"Invalid select operand!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4900, __PRETTY_FUNCTION__)); | ||||
4901 | if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) { | ||||
4902 | BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); | ||||
4903 | // The check for the single predecessor is not the best that can be | ||||
4904 | // done. But it protects efficiently against cases like when SI's | ||||
4905 | // home block has two successors, Succ and Succ1, and Succ1 predecessor | ||||
4906 | // of Succ. Then SI can't be replaced by SIOpd because the use that gets | ||||
4907 | // replaced can be reached on either path. So the uniqueness check | ||||
4908 | // guarantees that the path all uses of SI (outside SI's parent) are on | ||||
4909 | // is disjoint from all other paths out of SI. But that information | ||||
4910 | // is more expensive to compute, and the trade-off here is in favor | ||||
4911 | // of compile-time. It should also be noticed that we check for a single | ||||
4912 | // predecessor and not only uniqueness. This to handle the situation when | ||||
4913 | // Succ and Succ1 points to the same basic block. | ||||
4914 | if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) { | ||||
4915 | NumSel++; | ||||
4916 | SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent()); | ||||
4917 | return true; | ||||
4918 | } | ||||
4919 | } | ||||
4920 | return false; | ||||
4921 | } | ||||
4922 | |||||
4923 | /// Try to fold the comparison based on range information we can get by checking | ||||
4924 | /// whether bits are known to be zero or one in the inputs. | ||||
4925 | Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { | ||||
4926 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
4927 | Type *Ty = Op0->getType(); | ||||
4928 | ICmpInst::Predicate Pred = I.getPredicate(); | ||||
4929 | |||||
4930 | // Get scalar or pointer size. | ||||
4931 | unsigned BitWidth = Ty->isIntOrIntVectorTy() | ||||
4932 | ? Ty->getScalarSizeInBits() | ||||
4933 | : DL.getPointerTypeSizeInBits(Ty->getScalarType()); | ||||
4934 | |||||
4935 | if (!BitWidth) | ||||
4936 | return nullptr; | ||||
4937 | |||||
4938 | KnownBits Op0Known(BitWidth); | ||||
4939 | KnownBits Op1Known(BitWidth); | ||||
4940 | |||||
4941 | if (SimplifyDemandedBits(&I, 0, | ||||
4942 | getDemandedBitsLHSMask(I, BitWidth), | ||||
4943 | Op0Known, 0)) | ||||
4944 | return &I; | ||||
4945 | |||||
4946 | if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth), | ||||
4947 | Op1Known, 0)) | ||||
4948 | return &I; | ||||
4949 | |||||
4950 | // Given the known and unknown bits, compute a range that the LHS could be | ||||
4951 | // in. Compute the Min, Max and RHS values based on the known bits. For the | ||||
4952 | // EQ and NE we use unsigned values. | ||||
4953 | APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); | ||||
4954 | APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); | ||||
4955 | if (I.isSigned()) { | ||||
4956 | computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); | ||||
4957 | computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); | ||||
4958 | } else { | ||||
4959 | computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); | ||||
4960 | computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); | ||||
4961 | } | ||||
4962 | |||||
4963 | // If Min and Max are known to be the same, then SimplifyDemandedBits figured | ||||
4964 | // out that the LHS or RHS is a constant. Constant fold this now, so that | ||||
4965 | // code below can assume that Min != Max. | ||||
4966 | if (!isa<Constant>(Op0) && Op0Min == Op0Max) | ||||
4967 | return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1); | ||||
4968 | if (!isa<Constant>(Op1) && Op1Min == Op1Max) | ||||
4969 | return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); | ||||
4970 | |||||
4971 | // Based on the range information we know about the LHS, see if we can | ||||
4972 | // simplify this comparison. For example, (x&4) < 8 is always true. | ||||
4973 | switch (Pred) { | ||||
4974 | default: | ||||
4975 | llvm_unreachable("Unknown icmp opcode!")::llvm::llvm_unreachable_internal("Unknown icmp opcode!", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 4975); | ||||
4976 | case ICmpInst::ICMP_EQ: | ||||
4977 | case ICmpInst::ICMP_NE: { | ||||
4978 | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) { | ||||
4979 | return Pred == CmpInst::ICMP_EQ | ||||
4980 | ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())) | ||||
4981 | : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
4982 | } | ||||
4983 | |||||
4984 | // If all bits are known zero except for one, then we know at most one bit | ||||
4985 | // is set. If the comparison is against zero, then this is a check to see if | ||||
4986 | // *that* bit is set. | ||||
4987 | APInt Op0KnownZeroInverted = ~Op0Known.Zero; | ||||
4988 | if (Op1Known.isZero()) { | ||||
4989 | // If the LHS is an AND with the same constant, look through it. | ||||
4990 | Value *LHS = nullptr; | ||||
4991 | const APInt *LHSC; | ||||
4992 | if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) || | ||||
4993 | *LHSC != Op0KnownZeroInverted) | ||||
4994 | LHS = Op0; | ||||
4995 | |||||
4996 | Value *X; | ||||
4997 | if (match(LHS, m_Shl(m_One(), m_Value(X)))) { | ||||
4998 | APInt ValToCheck = Op0KnownZeroInverted; | ||||
4999 | Type *XTy = X->getType(); | ||||
5000 | if (ValToCheck.isPowerOf2()) { | ||||
5001 | // ((1 << X) & 8) == 0 -> X != 3 | ||||
5002 | // ((1 << X) & 8) != 0 -> X == 3 | ||||
5003 | auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros()); | ||||
5004 | auto NewPred = ICmpInst::getInversePredicate(Pred); | ||||
5005 | return new ICmpInst(NewPred, X, CmpC); | ||||
5006 | } else if ((++ValToCheck).isPowerOf2()) { | ||||
5007 | // ((1 << X) & 7) == 0 -> X >= 3 | ||||
5008 | // ((1 << X) & 7) != 0 -> X < 3 | ||||
5009 | auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros()); | ||||
5010 | auto NewPred = | ||||
5011 | Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT; | ||||
5012 | return new ICmpInst(NewPred, X, CmpC); | ||||
5013 | } | ||||
5014 | } | ||||
5015 | |||||
5016 | // Check if the LHS is 8 >>u x and the result is a power of 2 like 1. | ||||
5017 | const APInt *CI; | ||||
5018 | if (Op0KnownZeroInverted.isOneValue() && | ||||
5019 | match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) { | ||||
5020 | // ((8 >>u X) & 1) == 0 -> X != 3 | ||||
5021 | // ((8 >>u X) & 1) != 0 -> X == 3 | ||||
5022 | unsigned CmpVal = CI->countTrailingZeros(); | ||||
5023 | auto NewPred = ICmpInst::getInversePredicate(Pred); | ||||
5024 | return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal)); | ||||
5025 | } | ||||
5026 | } | ||||
5027 | break; | ||||
5028 | } | ||||
5029 | case ICmpInst::ICMP_ULT: { | ||||
5030 | if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) | ||||
5031 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5032 | if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) | ||||
5033 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5034 | if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) | ||||
5035 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | ||||
5036 | |||||
5037 | const APInt *CmpC; | ||||
5038 | if (match(Op1, m_APInt(CmpC))) { | ||||
5039 | // A <u C -> A == C-1 if min(A)+1 == C | ||||
5040 | if (*CmpC == Op0Min + 1) | ||||
5041 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | ||||
5042 | ConstantInt::get(Op1->getType(), *CmpC - 1)); | ||||
5043 | // X <u C --> X == 0, if the number of zero bits in the bottom of X | ||||
5044 | // exceeds the log2 of C. | ||||
5045 | if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) | ||||
5046 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | ||||
5047 | Constant::getNullValue(Op1->getType())); | ||||
5048 | } | ||||
5049 | break; | ||||
5050 | } | ||||
5051 | case ICmpInst::ICMP_UGT: { | ||||
5052 | if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) | ||||
5053 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5054 | if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) | ||||
5055 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5056 | if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) | ||||
5057 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | ||||
5058 | |||||
5059 | const APInt *CmpC; | ||||
5060 | if (match(Op1, m_APInt(CmpC))) { | ||||
5061 | // A >u C -> A == C+1 if max(a)-1 == C | ||||
5062 | if (*CmpC == Op0Max - 1) | ||||
5063 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | ||||
5064 | ConstantInt::get(Op1->getType(), *CmpC + 1)); | ||||
5065 | // X >u C --> X != 0, if the number of zero bits in the bottom of X | ||||
5066 | // exceeds the log2 of C. | ||||
5067 | if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) | ||||
5068 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, | ||||
5069 | Constant::getNullValue(Op1->getType())); | ||||
5070 | } | ||||
5071 | break; | ||||
5072 | } | ||||
5073 | case ICmpInst::ICMP_SLT: { | ||||
5074 | if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) | ||||
5075 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5076 | if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) | ||||
5077 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5078 | if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) | ||||
5079 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | ||||
5080 | const APInt *CmpC; | ||||
5081 | if (match(Op1, m_APInt(CmpC))) { | ||||
5082 | if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C | ||||
5083 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | ||||
5084 | ConstantInt::get(Op1->getType(), *CmpC - 1)); | ||||
5085 | } | ||||
5086 | break; | ||||
5087 | } | ||||
5088 | case ICmpInst::ICMP_SGT: { | ||||
5089 | if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) | ||||
5090 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5091 | if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) | ||||
5092 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5093 | if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) | ||||
5094 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | ||||
5095 | const APInt *CmpC; | ||||
5096 | if (match(Op1, m_APInt(CmpC))) { | ||||
5097 | if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C | ||||
5098 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | ||||
5099 | ConstantInt::get(Op1->getType(), *CmpC + 1)); | ||||
5100 | } | ||||
5101 | break; | ||||
5102 | } | ||||
5103 | case ICmpInst::ICMP_SGE: | ||||
5104 | assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!")((!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!" ) ? static_cast<void> (0) : __assert_fail ("!isa<ConstantInt>(Op1) && \"ICMP_SGE with ConstantInt not folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5104, __PRETTY_FUNCTION__)); | ||||
5105 | if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) | ||||
5106 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5107 | if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) | ||||
5108 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5109 | if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B) | ||||
5110 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); | ||||
5111 | break; | ||||
5112 | case ICmpInst::ICMP_SLE: | ||||
5113 | assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!")((!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!" ) ? static_cast<void> (0) : __assert_fail ("!isa<ConstantInt>(Op1) && \"ICMP_SLE with ConstantInt not folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5113, __PRETTY_FUNCTION__)); | ||||
5114 | if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) | ||||
5115 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5116 | if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) | ||||
5117 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5118 | if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B) | ||||
5119 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); | ||||
5120 | break; | ||||
5121 | case ICmpInst::ICMP_UGE: | ||||
5122 | assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!")((!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!" ) ? static_cast<void> (0) : __assert_fail ("!isa<ConstantInt>(Op1) && \"ICMP_UGE with ConstantInt not folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5122, __PRETTY_FUNCTION__)); | ||||
5123 | if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) | ||||
5124 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5125 | if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) | ||||
5126 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5127 | if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B) | ||||
5128 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); | ||||
5129 | break; | ||||
5130 | case ICmpInst::ICMP_ULE: | ||||
5131 | assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!")((!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!" ) ? static_cast<void> (0) : __assert_fail ("!isa<ConstantInt>(Op1) && \"ICMP_ULE with ConstantInt not folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5131, __PRETTY_FUNCTION__)); | ||||
5132 | if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) | ||||
5133 | return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | ||||
5134 | if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) | ||||
5135 | return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | ||||
5136 | if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B) | ||||
5137 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); | ||||
5138 | break; | ||||
5139 | } | ||||
5140 | |||||
5141 | // Turn a signed comparison into an unsigned one if both operands are known to | ||||
5142 | // have the same sign. | ||||
5143 | if (I.isSigned() && | ||||
5144 | ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) || | ||||
5145 | (Op0Known.One.isNegative() && Op1Known.One.isNegative()))) | ||||
5146 | return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); | ||||
5147 | |||||
5148 | return nullptr; | ||||
5149 | } | ||||
5150 | |||||
5151 | llvm::Optional<std::pair<CmpInst::Predicate, Constant *>> | ||||
5152 | llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, | ||||
5153 | Constant *C) { | ||||
5154 | assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&((ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate (Pred) && "Only for relational integer predicates.") ? static_cast<void> (0) : __assert_fail ("ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && \"Only for relational integer predicates.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5155, __PRETTY_FUNCTION__)) | ||||
5155 | "Only for relational integer predicates.")((ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate (Pred) && "Only for relational integer predicates.") ? static_cast<void> (0) : __assert_fail ("ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && \"Only for relational integer predicates.\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5155, __PRETTY_FUNCTION__)); | ||||
5156 | |||||
5157 | Type *Type = C->getType(); | ||||
5158 | bool IsSigned = ICmpInst::isSigned(Pred); | ||||
5159 | |||||
5160 | CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred); | ||||
5161 | bool WillIncrement = | ||||
5162 | UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT; | ||||
5163 | |||||
5164 | // Check if the constant operand can be safely incremented/decremented | ||||
5165 | // without overflowing/underflowing. | ||||
5166 | auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) { | ||||
5167 | return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned); | ||||
5168 | }; | ||||
5169 | |||||
5170 | Constant *SafeReplacementConstant = nullptr; | ||||
5171 | if (auto *CI = dyn_cast<ConstantInt>(C)) { | ||||
5172 | // Bail out if the constant can't be safely incremented/decremented. | ||||
5173 | if (!ConstantIsOk(CI)) | ||||
5174 | return llvm::None; | ||||
5175 | } else if (Type->isVectorTy()) { | ||||
5176 | unsigned NumElts = Type->getVectorNumElements(); | ||||
5177 | for (unsigned i = 0; i != NumElts; ++i) { | ||||
5178 | Constant *Elt = C->getAggregateElement(i); | ||||
5179 | if (!Elt) | ||||
5180 | return llvm::None; | ||||
5181 | |||||
5182 | if (isa<UndefValue>(Elt)) | ||||
5183 | continue; | ||||
5184 | |||||
5185 | // Bail out if we can't determine if this constant is min/max or if we | ||||
5186 | // know that this constant is min/max. | ||||
5187 | auto *CI = dyn_cast<ConstantInt>(Elt); | ||||
5188 | if (!CI || !ConstantIsOk(CI)) | ||||
5189 | return llvm::None; | ||||
5190 | |||||
5191 | if (!SafeReplacementConstant) | ||||
5192 | SafeReplacementConstant = CI; | ||||
5193 | } | ||||
5194 | } else { | ||||
5195 | // ConstantExpr? | ||||
5196 | return llvm::None; | ||||
5197 | } | ||||
5198 | |||||
5199 | // It may not be safe to change a compare predicate in the presence of | ||||
5200 | // undefined elements, so replace those elements with the first safe constant | ||||
5201 | // that we found. | ||||
5202 | if (C->containsUndefElement()) { | ||||
5203 | assert(SafeReplacementConstant && "Replacement constant not set")((SafeReplacementConstant && "Replacement constant not set" ) ? static_cast<void> (0) : __assert_fail ("SafeReplacementConstant && \"Replacement constant not set\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5203, __PRETTY_FUNCTION__)); | ||||
5204 | C = Constant::replaceUndefsWith(C, SafeReplacementConstant); | ||||
5205 | } | ||||
5206 | |||||
5207 | CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred); | ||||
5208 | |||||
5209 | // Increment or decrement the constant. | ||||
5210 | Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true); | ||||
5211 | Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne); | ||||
5212 | |||||
5213 | return std::make_pair(NewPred, NewC); | ||||
5214 | } | ||||
5215 | |||||
5216 | /// If we have an icmp le or icmp ge instruction with a constant operand, turn | ||||
5217 | /// it into the appropriate icmp lt or icmp gt instruction. This transform | ||||
5218 | /// allows them to be folded in visitICmpInst. | ||||
5219 | static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { | ||||
5220 | ICmpInst::Predicate Pred = I.getPredicate(); | ||||
5221 | if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) || | ||||
5222 | isCanonicalPredicate(Pred)) | ||||
5223 | return nullptr; | ||||
5224 | |||||
5225 | Value *Op0 = I.getOperand(0); | ||||
5226 | Value *Op1 = I.getOperand(1); | ||||
5227 | auto *Op1C = dyn_cast<Constant>(Op1); | ||||
5228 | if (!Op1C) | ||||
5229 | return nullptr; | ||||
5230 | |||||
5231 | auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C); | ||||
5232 | if (!FlippedStrictness) | ||||
5233 | return nullptr; | ||||
5234 | |||||
5235 | return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second); | ||||
5236 | } | ||||
5237 | |||||
5238 | /// Integer compare with boolean values can always be turned into bitwise ops. | ||||
5239 | static Instruction *canonicalizeICmpBool(ICmpInst &I, | ||||
5240 | InstCombiner::BuilderTy &Builder) { | ||||
5241 | Value *A = I.getOperand(0), *B = I.getOperand(1); | ||||
5242 | assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only")((A->getType()->isIntOrIntVectorTy(1) && "Bools only" ) ? static_cast<void> (0) : __assert_fail ("A->getType()->isIntOrIntVectorTy(1) && \"Bools only\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5242, __PRETTY_FUNCTION__)); | ||||
5243 | |||||
5244 | // A boolean compared to true/false can be simplified to Op0/true/false in | ||||
5245 | // 14 out of the 20 (10 predicates * 2 constants) possible combinations. | ||||
5246 | // Cases not handled by InstSimplify are always 'not' of Op0. | ||||
5247 | if (match(B, m_Zero())) { | ||||
5248 | switch (I.getPredicate()) { | ||||
5249 | case CmpInst::ICMP_EQ: // A == 0 -> !A | ||||
5250 | case CmpInst::ICMP_ULE: // A <=u 0 -> !A | ||||
5251 | case CmpInst::ICMP_SGE: // A >=s 0 -> !A | ||||
5252 | return BinaryOperator::CreateNot(A); | ||||
5253 | default: | ||||
5254 | llvm_unreachable("ICmp i1 X, C not simplified as expected.")::llvm::llvm_unreachable_internal("ICmp i1 X, C not simplified as expected." , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5254); | ||||
5255 | } | ||||
5256 | } else if (match(B, m_One())) { | ||||
5257 | switch (I.getPredicate()) { | ||||
5258 | case CmpInst::ICMP_NE: // A != 1 -> !A | ||||
5259 | case CmpInst::ICMP_ULT: // A <u 1 -> !A | ||||
5260 | case CmpInst::ICMP_SGT: // A >s -1 -> !A | ||||
5261 | return BinaryOperator::CreateNot(A); | ||||
5262 | default: | ||||
5263 | llvm_unreachable("ICmp i1 X, C not simplified as expected.")::llvm::llvm_unreachable_internal("ICmp i1 X, C not simplified as expected." , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5263); | ||||
5264 | } | ||||
5265 | } | ||||
5266 | |||||
5267 | switch (I.getPredicate()) { | ||||
5268 | default: | ||||
5269 | llvm_unreachable("Invalid icmp instruction!")::llvm::llvm_unreachable_internal("Invalid icmp instruction!" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5269); | ||||
5270 | case ICmpInst::ICMP_EQ: | ||||
5271 | // icmp eq i1 A, B -> ~(A ^ B) | ||||
5272 | return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); | ||||
5273 | |||||
5274 | case ICmpInst::ICMP_NE: | ||||
5275 | // icmp ne i1 A, B -> A ^ B | ||||
5276 | return BinaryOperator::CreateXor(A, B); | ||||
5277 | |||||
5278 | case ICmpInst::ICMP_UGT: | ||||
5279 | // icmp ugt -> icmp ult | ||||
5280 | std::swap(A, B); | ||||
5281 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
5282 | case ICmpInst::ICMP_ULT: | ||||
5283 | // icmp ult i1 A, B -> ~A & B | ||||
5284 | return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); | ||||
5285 | |||||
5286 | case ICmpInst::ICMP_SGT: | ||||
5287 | // icmp sgt -> icmp slt | ||||
5288 | std::swap(A, B); | ||||
5289 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
5290 | case ICmpInst::ICMP_SLT: | ||||
5291 | // icmp slt i1 A, B -> A & ~B | ||||
5292 | return BinaryOperator::CreateAnd(Builder.CreateNot(B), A); | ||||
5293 | |||||
5294 | case ICmpInst::ICMP_UGE: | ||||
5295 | // icmp uge -> icmp ule | ||||
5296 | std::swap(A, B); | ||||
5297 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
5298 | case ICmpInst::ICMP_ULE: | ||||
5299 | // icmp ule i1 A, B -> ~A | B | ||||
5300 | return BinaryOperator::CreateOr(Builder.CreateNot(A), B); | ||||
5301 | |||||
5302 | case ICmpInst::ICMP_SGE: | ||||
5303 | // icmp sge -> icmp sle | ||||
5304 | std::swap(A, B); | ||||
5305 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||
5306 | case ICmpInst::ICMP_SLE: | ||||
5307 | // icmp sle i1 A, B -> A | ~B | ||||
5308 | return BinaryOperator::CreateOr(Builder.CreateNot(B), A); | ||||
5309 | } | ||||
5310 | } | ||||
5311 | |||||
5312 | // Transform pattern like: | ||||
5313 | // (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X | ||||
5314 | // (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X | ||||
5315 | // Into: | ||||
5316 | // (X l>> Y) != 0 | ||||
5317 | // (X l>> Y) == 0 | ||||
5318 | static Instruction *foldICmpWithHighBitMask(ICmpInst &Cmp, | ||||
5319 | InstCombiner::BuilderTy &Builder) { | ||||
5320 | ICmpInst::Predicate Pred, NewPred; | ||||
5321 | Value *X, *Y; | ||||
5322 | if (match(&Cmp, | ||||
| |||||
| |||||
5323 | m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) { | ||||
| |||||
5324 | // We want X to be the icmp's second operand, so swap predicate if it isn't. | ||||
5325 | if (Cmp.getOperand(0) == X) | ||||
5326 | Pred = Cmp.getSwappedPredicate(); | ||||
5327 | |||||
5328 | switch (Pred) { | ||||
5329 | case ICmpInst::ICMP_ULE: | ||||
5330 | NewPred = ICmpInst::ICMP_NE; | ||||
5331 | break; | ||||
5332 | case ICmpInst::ICMP_UGT: | ||||
5333 | NewPred = ICmpInst::ICMP_EQ; | ||||
5334 | break; | ||||
5335 | default: | ||||
5336 | return nullptr; | ||||
5337 | } | ||||
5338 | } else if (match(&Cmp, m_c_ICmp(Pred, | ||||
5339 | m_OneUse(m_CombineOr( | ||||
5340 | m_Not(m_Shl(m_AllOnes(), m_Value(Y))), | ||||
5341 | m_Add(m_Shl(m_One(), m_Value(Y)), | ||||
5342 | m_AllOnes()))), | ||||
5343 | m_Value(X)))) { | ||||
5344 | // The variant with 'add' is not canonical, (the variant with 'not' is) | ||||
5345 | // we only get it because it has extra uses, and can't be canonicalized, | ||||
5346 | |||||
5347 | // We want X to be the icmp's second operand, so swap predicate if it isn't. | ||||
5348 | if (Cmp.getOperand(0) == X) | ||||
5349 | Pred = Cmp.getSwappedPredicate(); | ||||
5350 | |||||
5351 | switch (Pred) { | ||||
5352 | case ICmpInst::ICMP_ULT: | ||||
5353 | NewPred = ICmpInst::ICMP_NE; | ||||
5354 | break; | ||||
5355 | case ICmpInst::ICMP_UGE: | ||||
5356 | NewPred = ICmpInst::ICMP_EQ; | ||||
5357 | break; | ||||
5358 | default: | ||||
5359 | return nullptr; | ||||
5360 | } | ||||
5361 | } else | ||||
5362 | return nullptr; | ||||
5363 | |||||
5364 | Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits"); | ||||
| |||||
5365 | Constant *Zero = Constant::getNullValue(NewX->getType()); | ||||
5366 | return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero); | ||||
5367 | } | ||||
5368 | |||||
5369 | static Instruction *foldVectorCmp(CmpInst &Cmp, | ||||
5370 | InstCombiner::BuilderTy &Builder) { | ||||
5371 | // If both arguments of the cmp are shuffles that use the same mask and | ||||
5372 | // shuffle within a single vector, move the shuffle after the cmp. | ||||
5373 | Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1); | ||||
5374 | Value *V1, *V2; | ||||
5375 | Constant *M; | ||||
5376 | if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) && | ||||
5377 | match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) && | ||||
5378 | V1->getType() == V2->getType() && | ||||
5379 | (LHS->hasOneUse() || RHS->hasOneUse())) { | ||||
5380 | // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M | ||||
5381 | CmpInst::Predicate P = Cmp.getPredicate(); | ||||
5382 | Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2) | ||||
5383 | : Builder.CreateFCmp(P, V1, V2); | ||||
5384 | return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M); | ||||
5385 | } | ||||
5386 | return nullptr; | ||||
5387 | } | ||||
5388 | |||||
5389 | // extract(uadd.with.overflow(A, B), 0) ult A | ||||
5390 | // -> extract(uadd.with.overflow(A, B), 1) | ||||
5391 | static Instruction *foldICmpOfUAddOv(ICmpInst &I) { | ||||
5392 | CmpInst::Predicate Pred = I.getPredicate(); | ||||
5393 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
5394 | |||||
5395 | Value *UAddOv; | ||||
5396 | Value *A, *B; | ||||
5397 | auto UAddOvResultPat = m_ExtractValue<0>( | ||||
5398 | m_Intrinsic<Intrinsic::uadd_with_overflow>(m_Value(A), m_Value(B))); | ||||
5399 | if (match(Op0, UAddOvResultPat) && | ||||
5400 | ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) || | ||||
5401 | (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) && | ||||
5402 | (match(A, m_One()) || match(B, m_One()))) || | ||||
5403 | (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) && | ||||
5404 | (match(A, m_AllOnes()) || match(B, m_AllOnes()))))) | ||||
5405 | // extract(uadd.with.overflow(A, B), 0) < A | ||||
5406 | // extract(uadd.with.overflow(A, 1), 0) == 0 | ||||
5407 | // extract(uadd.with.overflow(A, -1), 0) != -1 | ||||
5408 | UAddOv = cast<ExtractValueInst>(Op0)->getAggregateOperand(); | ||||
5409 | else if (match(Op1, UAddOvResultPat) && | ||||
5410 | Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B)) | ||||
5411 | // A > extract(uadd.with.overflow(A, B), 0) | ||||
5412 | UAddOv = cast<ExtractValueInst>(Op1)->getAggregateOperand(); | ||||
5413 | else | ||||
5414 | return nullptr; | ||||
5415 | |||||
5416 | return ExtractValueInst::Create(UAddOv, 1); | ||||
5417 | } | ||||
5418 | |||||
5419 | Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { | ||||
5420 | bool Changed = false; | ||||
5421 | const SimplifyQuery Q = SQ.getWithInstruction(&I); | ||||
5422 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
5423 | unsigned Op0Cplxity = getComplexity(Op0); | ||||
5424 | unsigned Op1Cplxity = getComplexity(Op1); | ||||
5425 | |||||
5426 | /// Orders the operands of the compare so that they are listed from most | ||||
5427 | /// complex to least complex. This puts constants before unary operators, | ||||
5428 | /// before binary operators. | ||||
5429 | if (Op0Cplxity < Op1Cplxity || | ||||
5430 | (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) { | ||||
5431 | I.swapOperands(); | ||||
5432 | std::swap(Op0, Op1); | ||||
5433 | Changed = true; | ||||
5434 | } | ||||
5435 | |||||
5436 | if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) | ||||
5437 | return replaceInstUsesWith(I, V); | ||||
5438 | |||||
5439 | // Comparing -val or val with non-zero is the same as just comparing val | ||||
5440 | // ie, abs(val) != 0 -> val != 0 | ||||
5441 | if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) { | ||||
5442 | Value *Cond, *SelectTrue, *SelectFalse; | ||||
5443 | if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), | ||||
5444 | m_Value(SelectFalse)))) { | ||||
5445 | if (Value *V = dyn_castNegVal(SelectTrue)) { | ||||
5446 | if (V == SelectFalse) | ||||
5447 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | ||||
5448 | } | ||||
5449 | else if (Value *V = dyn_castNegVal(SelectFalse)) { | ||||
5450 | if (V == SelectTrue) | ||||
5451 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | ||||
5452 | } | ||||
5453 | } | ||||
5454 | } | ||||
5455 | |||||
5456 | if (Op0->getType()->isIntOrIntVectorTy(1)) | ||||
5457 | if (Instruction *Res = canonicalizeICmpBool(I, Builder)) | ||||
5458 | return Res; | ||||
5459 | |||||
5460 | if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) | ||||
5461 | return NewICmp; | ||||
5462 | |||||
5463 | if (Instruction *Res = foldICmpWithConstant(I)) | ||||
5464 | return Res; | ||||
5465 | |||||
5466 | if (Instruction *Res = foldICmpWithDominatingICmp(I)) | ||||
5467 | return Res; | ||||
5468 | |||||
5469 | if (Instruction *Res = foldICmpBinOp(I, Q)) | ||||
5470 | return Res; | ||||
5471 | |||||
5472 | if (Instruction *Res = foldICmpUsingKnownBits(I)) | ||||
5473 | return Res; | ||||
5474 | |||||
5475 | // Test if the ICmpInst instruction is used exclusively by a select as | ||||
5476 | // part of a minimum or maximum operation. If so, refrain from doing | ||||
5477 | // any other folding. This helps out other analyses which understand | ||||
5478 | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution | ||||
5479 | // and CodeGen. And in this case, at least one of the comparison | ||||
5480 | // operands has at least one user besides the compare (the select), | ||||
5481 | // which would often largely negate the benefit of folding anyway. | ||||
5482 | // | ||||
5483 | // Do the same for the other patterns recognized by matchSelectPattern. | ||||
5484 | if (I.hasOneUse()) | ||||
5485 | if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) { | ||||
5486 | Value *A, *B; | ||||
5487 | SelectPatternResult SPR = matchSelectPattern(SI, A, B); | ||||
5488 | if (SPR.Flavor != SPF_UNKNOWN) | ||||
5489 | return nullptr; | ||||
5490 | } | ||||
5491 | |||||
5492 | // Do this after checking for min/max to prevent infinite looping. | ||||
5493 | if (Instruction *Res = foldICmpWithZero(I)) | ||||
5494 | return Res; | ||||
5495 | |||||
5496 | // FIXME: We only do this after checking for min/max to prevent infinite | ||||
5497 | // looping caused by a reverse canonicalization of these patterns for min/max. | ||||
5498 | // FIXME: The organization of folds is a mess. These would naturally go into | ||||
5499 | // canonicalizeCmpWithConstant(), but we can't move all of the above folds | ||||
5500 | // down here after the min/max restriction. | ||||
5501 | ICmpInst::Predicate Pred = I.getPredicate(); | ||||
5502 | const APInt *C; | ||||
5503 | if (match(Op1, m_APInt(C))) { | ||||
5504 | // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set | ||||
5505 | if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) { | ||||
5506 | Constant *Zero = Constant::getNullValue(Op0->getType()); | ||||
5507 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero); | ||||
5508 | } | ||||
5509 | |||||
5510 | // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear | ||||
5511 | if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) { | ||||
5512 | Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); | ||||
5513 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); | ||||
5514 | } | ||||
5515 | } | ||||
5516 | |||||
5517 | if (Instruction *Res = foldICmpInstWithConstant(I)) | ||||
5518 | return Res; | ||||
5519 | |||||
5520 | // Try to match comparison as a sign bit test. Intentionally do this after | ||||
5521 | // foldICmpInstWithConstant() to potentially let other folds to happen first. | ||||
5522 | if (Instruction *New = foldSignBitTest(I)) | ||||
5523 | return New; | ||||
5524 | |||||
5525 | if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) | ||||
5526 | return Res; | ||||
5527 | |||||
5528 | // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. | ||||
5529 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) | ||||
5530 | if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I)) | ||||
5531 | return NI; | ||||
5532 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) | ||||
5533 | if (Instruction *NI = foldGEPICmp(GEP, Op0, | ||||
5534 | ICmpInst::getSwappedPredicate(I.getPredicate()), I)) | ||||
5535 | return NI; | ||||
5536 | |||||
5537 | // Try to optimize equality comparisons against alloca-based pointers. | ||||
5538 | if (Op0->getType()->isPointerTy() && I.isEquality()) { | ||||
5539 | assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?")((Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?" ) ? static_cast<void> (0) : __assert_fail ("Op1->getType()->isPointerTy() && \"Comparing pointer with non-pointer?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5539, __PRETTY_FUNCTION__)); | ||||
5540 | if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL))) | ||||
5541 | if (Instruction *New = foldAllocaCmp(I, Alloca, Op1)) | ||||
5542 | return New; | ||||
5543 | if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL))) | ||||
5544 | if (Instruction *New = foldAllocaCmp(I, Alloca, Op0)) | ||||
5545 | return New; | ||||
5546 | } | ||||
5547 | |||||
5548 | if (Instruction *Res = foldICmpBitCast(I, Builder)) | ||||
5549 | return Res; | ||||
5550 | |||||
5551 | if (Instruction *R = foldICmpWithCastOp(I)) | ||||
5552 | return R; | ||||
5553 | |||||
5554 | if (Instruction *Res = foldICmpWithMinMax(I)) | ||||
5555 | return Res; | ||||
5556 | |||||
5557 | { | ||||
5558 | Value *A, *B; | ||||
5559 | // Transform (A & ~B) == 0 --> (A & B) != 0 | ||||
5560 | // and (A & ~B) != 0 --> (A & B) == 0 | ||||
5561 | // if A is a power of 2. | ||||
5562 | if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && | ||||
5563 | match(Op1, m_Zero()) && | ||||
5564 | isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) | ||||
5565 | return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B), | ||||
5566 | Op1); | ||||
5567 | |||||
5568 | // ~X < ~Y --> Y < X | ||||
5569 | // ~X < C --> X > ~C | ||||
5570 | if (match(Op0, m_Not(m_Value(A)))) { | ||||
5571 | if (match(Op1, m_Not(m_Value(B)))) | ||||
5572 | return new ICmpInst(I.getPredicate(), B, A); | ||||
5573 | |||||
5574 | const APInt *C; | ||||
5575 | if (match(Op1, m_APInt(C))) | ||||
5576 | return new ICmpInst(I.getSwappedPredicate(), A, | ||||
5577 | ConstantInt::get(Op1->getType(), ~(*C))); | ||||
5578 | } | ||||
5579 | |||||
5580 | Instruction *AddI = nullptr; | ||||
5581 | if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), | ||||
5582 | m_Instruction(AddI))) && | ||||
5583 | isa<IntegerType>(A->getType())) { | ||||
5584 | Value *Result; | ||||
5585 | Constant *Overflow; | ||||
5586 | if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B, | ||||
5587 | *AddI, Result, Overflow)) { | ||||
5588 | replaceInstUsesWith(*AddI, Result); | ||||
5589 | return replaceInstUsesWith(I, Overflow); | ||||
5590 | } | ||||
5591 | } | ||||
5592 | |||||
5593 | // (zext a) * (zext b) --> llvm.umul.with.overflow. | ||||
5594 | if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | ||||
5595 | if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this)) | ||||
5596 | return R; | ||||
5597 | } | ||||
5598 | if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | ||||
5599 | if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this)) | ||||
5600 | return R; | ||||
5601 | } | ||||
5602 | } | ||||
5603 | |||||
5604 | if (Instruction *Res = foldICmpEquality(I)) | ||||
5605 | return Res; | ||||
5606 | |||||
5607 | if (Instruction *Res = foldICmpOfUAddOv(I)) | ||||
5608 | return Res; | ||||
5609 | |||||
5610 | // The 'cmpxchg' instruction returns an aggregate containing the old value and | ||||
5611 | // an i1 which indicates whether or not we successfully did the swap. | ||||
5612 | // | ||||
5613 | // Replace comparisons between the old value and the expected value with the | ||||
5614 | // indicator that 'cmpxchg' returns. | ||||
5615 | // | ||||
5616 | // N.B. This transform is only valid when the 'cmpxchg' is not permitted to | ||||
5617 | // spuriously fail. In those cases, the old value may equal the expected | ||||
5618 | // value but it is possible for the swap to not occur. | ||||
5619 | if (I.getPredicate() == ICmpInst::ICMP_EQ) | ||||
5620 | if (auto *EVI = dyn_cast<ExtractValueInst>(Op0)) | ||||
5621 | if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand())) | ||||
5622 | if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 && | ||||
5623 | !ACXI->isWeak()) | ||||
5624 | return ExtractValueInst::Create(ACXI, 1); | ||||
5625 | |||||
5626 | { | ||||
5627 | Value *X; | ||||
5628 | const APInt *C; | ||||
5629 | // icmp X+Cst, X | ||||
5630 | if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) | ||||
5631 | return foldICmpAddOpConst(X, *C, I.getPredicate()); | ||||
5632 | |||||
5633 | // icmp X, X+Cst | ||||
5634 | if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X) | ||||
5635 | return foldICmpAddOpConst(X, *C, I.getSwappedPredicate()); | ||||
5636 | } | ||||
5637 | |||||
5638 | if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) | ||||
5639 | return Res; | ||||
5640 | |||||
5641 | if (I.getType()->isVectorTy()) | ||||
5642 | if (Instruction *Res = foldVectorCmp(I, Builder)) | ||||
5643 | return Res; | ||||
5644 | |||||
5645 | return Changed ? &I : nullptr; | ||||
5646 | } | ||||
5647 | |||||
5648 | /// Fold fcmp ([us]itofp x, cst) if possible. | ||||
5649 | Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, | ||||
5650 | Constant *RHSC) { | ||||
5651 | if (!isa<ConstantFP>(RHSC)) return nullptr; | ||||
5652 | const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); | ||||
5653 | |||||
5654 | // Get the width of the mantissa. We don't want to hack on conversions that | ||||
5655 | // might lose information from the integer, e.g. "i64 -> float" | ||||
5656 | int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); | ||||
5657 | if (MantissaWidth == -1) return nullptr; // Unknown. | ||||
5658 | |||||
5659 | IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); | ||||
5660 | |||||
5661 | bool LHSUnsigned = isa<UIToFPInst>(LHSI); | ||||
5662 | |||||
5663 | if (I.isEquality()) { | ||||
5664 | FCmpInst::Predicate P = I.getPredicate(); | ||||
5665 | bool IsExact = false; | ||||
5666 | APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned); | ||||
5667 | RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact); | ||||
5668 | |||||
5669 | // If the floating point constant isn't an integer value, we know if we will | ||||
5670 | // ever compare equal / not equal to it. | ||||
5671 | if (!IsExact) { | ||||
5672 | // TODO: Can never be -0.0 and other non-representable values | ||||
5673 | APFloat RHSRoundInt(RHS); | ||||
5674 | RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); | ||||
5675 | if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { | ||||
5676 | if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) | ||||
5677 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5678 | |||||
5679 | assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE)((P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE) ? static_cast <void> (0) : __assert_fail ("P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5679, __PRETTY_FUNCTION__)); | ||||
5680 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5681 | } | ||||
5682 | } | ||||
5683 | |||||
5684 | // TODO: If the constant is exactly representable, is it always OK to do | ||||
5685 | // equality compares as integer? | ||||
5686 | } | ||||
5687 | |||||
5688 | // Check to see that the input is converted from an integer type that is small | ||||
5689 | // enough that preserves all bits. TODO: check here for "known" sign bits. | ||||
5690 | // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. | ||||
5691 | unsigned InputSize = IntTy->getScalarSizeInBits(); | ||||
5692 | |||||
5693 | // Following test does NOT adjust InputSize downwards for signed inputs, | ||||
5694 | // because the most negative value still requires all the mantissa bits | ||||
5695 | // to distinguish it from one less than that value. | ||||
5696 | if ((int)InputSize > MantissaWidth) { | ||||
5697 | // Conversion would lose accuracy. Check if loss can impact comparison. | ||||
5698 | int Exp = ilogb(RHS); | ||||
5699 | if (Exp == APFloat::IEK_Inf) { | ||||
5700 | int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics())); | ||||
5701 | if (MaxExponent < (int)InputSize - !LHSUnsigned) | ||||
5702 | // Conversion could create infinity. | ||||
5703 | return nullptr; | ||||
5704 | } else { | ||||
5705 | // Note that if RHS is zero or NaN, then Exp is negative | ||||
5706 | // and first condition is trivially false. | ||||
5707 | if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned) | ||||
5708 | // Conversion could affect comparison. | ||||
5709 | return nullptr; | ||||
5710 | } | ||||
5711 | } | ||||
5712 | |||||
5713 | // Otherwise, we can potentially simplify the comparison. We know that it | ||||
5714 | // will always come through as an integer value and we know the constant is | ||||
5715 | // not a NAN (it would have been previously simplified). | ||||
5716 | assert(!RHS.isNaN() && "NaN comparison not already folded!")((!RHS.isNaN() && "NaN comparison not already folded!" ) ? static_cast<void> (0) : __assert_fail ("!RHS.isNaN() && \"NaN comparison not already folded!\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5716, __PRETTY_FUNCTION__)); | ||||
5717 | |||||
5718 | ICmpInst::Predicate Pred; | ||||
5719 | switch (I.getPredicate()) { | ||||
5720 | default: llvm_unreachable("Unexpected predicate!")::llvm::llvm_unreachable_internal("Unexpected predicate!", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5720); | ||||
5721 | case FCmpInst::FCMP_UEQ: | ||||
5722 | case FCmpInst::FCMP_OEQ: | ||||
5723 | Pred = ICmpInst::ICMP_EQ; | ||||
5724 | break; | ||||
5725 | case FCmpInst::FCMP_UGT: | ||||
5726 | case FCmpInst::FCMP_OGT: | ||||
5727 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; | ||||
5728 | break; | ||||
5729 | case FCmpInst::FCMP_UGE: | ||||
5730 | case FCmpInst::FCMP_OGE: | ||||
5731 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; | ||||
5732 | break; | ||||
5733 | case FCmpInst::FCMP_ULT: | ||||
5734 | case FCmpInst::FCMP_OLT: | ||||
5735 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; | ||||
5736 | break; | ||||
5737 | case FCmpInst::FCMP_ULE: | ||||
5738 | case FCmpInst::FCMP_OLE: | ||||
5739 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; | ||||
5740 | break; | ||||
5741 | case FCmpInst::FCMP_UNE: | ||||
5742 | case FCmpInst::FCMP_ONE: | ||||
5743 | Pred = ICmpInst::ICMP_NE; | ||||
5744 | break; | ||||
5745 | case FCmpInst::FCMP_ORD: | ||||
5746 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5747 | case FCmpInst::FCMP_UNO: | ||||
5748 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5749 | } | ||||
5750 | |||||
5751 | // Now we know that the APFloat is a normal number, zero or inf. | ||||
5752 | |||||
5753 | // See if the FP constant is too large for the integer. For example, | ||||
5754 | // comparing an i8 to 300.0. | ||||
5755 | unsigned IntWidth = IntTy->getScalarSizeInBits(); | ||||
5756 | |||||
5757 | if (!LHSUnsigned) { | ||||
5758 | // If the RHS value is > SignedMax, fold the comparison. This handles +INF | ||||
5759 | // and large values. | ||||
5760 | APFloat SMax(RHS.getSemantics()); | ||||
5761 | SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, | ||||
5762 | APFloat::rmNearestTiesToEven); | ||||
5763 | if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 | ||||
5764 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || | ||||
5765 | Pred == ICmpInst::ICMP_SLE) | ||||
5766 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5767 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5768 | } | ||||
5769 | } else { | ||||
5770 | // If the RHS value is > UnsignedMax, fold the comparison. This handles | ||||
5771 | // +INF and large values. | ||||
5772 | APFloat UMax(RHS.getSemantics()); | ||||
5773 | UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, | ||||
5774 | APFloat::rmNearestTiesToEven); | ||||
5775 | if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 | ||||
5776 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || | ||||
5777 | Pred == ICmpInst::ICMP_ULE) | ||||
5778 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5779 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5780 | } | ||||
5781 | } | ||||
5782 | |||||
5783 | if (!LHSUnsigned) { | ||||
5784 | // See if the RHS value is < SignedMin. | ||||
5785 | APFloat SMin(RHS.getSemantics()); | ||||
5786 | SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, | ||||
5787 | APFloat::rmNearestTiesToEven); | ||||
5788 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 | ||||
5789 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || | ||||
5790 | Pred == ICmpInst::ICMP_SGE) | ||||
5791 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5792 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5793 | } | ||||
5794 | } else { | ||||
5795 | // See if the RHS value is < UnsignedMin. | ||||
5796 | APFloat SMin(RHS.getSemantics()); | ||||
5797 | SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, | ||||
5798 | APFloat::rmNearestTiesToEven); | ||||
5799 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 | ||||
5800 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || | ||||
5801 | Pred == ICmpInst::ICMP_UGE) | ||||
5802 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5803 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5804 | } | ||||
5805 | } | ||||
5806 | |||||
5807 | // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or | ||||
5808 | // [0, UMAX], but it may still be fractional. See if it is fractional by | ||||
5809 | // casting the FP value to the integer value and back, checking for equality. | ||||
5810 | // Don't do this for zero, because -0.0 is not fractional. | ||||
5811 | Constant *RHSInt = LHSUnsigned | ||||
5812 | ? ConstantExpr::getFPToUI(RHSC, IntTy) | ||||
5813 | : ConstantExpr::getFPToSI(RHSC, IntTy); | ||||
5814 | if (!RHS.isZero()) { | ||||
5815 | bool Equal = LHSUnsigned | ||||
5816 | ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC | ||||
5817 | : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; | ||||
5818 | if (!Equal) { | ||||
5819 | // If we had a comparison against a fractional value, we have to adjust | ||||
5820 | // the compare predicate and sometimes the value. RHSC is rounded towards | ||||
5821 | // zero at this point. | ||||
5822 | switch (Pred) { | ||||
5823 | default: llvm_unreachable("Unexpected integer comparison!")::llvm::llvm_unreachable_internal("Unexpected integer comparison!" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5823); | ||||
5824 | case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true | ||||
5825 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5826 | case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false | ||||
5827 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5828 | case ICmpInst::ICMP_ULE: | ||||
5829 | // (float)int <= 4.4 --> int <= 4 | ||||
5830 | // (float)int <= -4.4 --> false | ||||
5831 | if (RHS.isNegative()) | ||||
5832 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5833 | break; | ||||
5834 | case ICmpInst::ICMP_SLE: | ||||
5835 | // (float)int <= 4.4 --> int <= 4 | ||||
5836 | // (float)int <= -4.4 --> int < -4 | ||||
5837 | if (RHS.isNegative()) | ||||
5838 | Pred = ICmpInst::ICMP_SLT; | ||||
5839 | break; | ||||
5840 | case ICmpInst::ICMP_ULT: | ||||
5841 | // (float)int < -4.4 --> false | ||||
5842 | // (float)int < 4.4 --> int <= 4 | ||||
5843 | if (RHS.isNegative()) | ||||
5844 | return replaceInstUsesWith(I, Builder.getFalse()); | ||||
5845 | Pred = ICmpInst::ICMP_ULE; | ||||
5846 | break; | ||||
5847 | case ICmpInst::ICMP_SLT: | ||||
5848 | // (float)int < -4.4 --> int < -4 | ||||
5849 | // (float)int < 4.4 --> int <= 4 | ||||
5850 | if (!RHS.isNegative()) | ||||
5851 | Pred = ICmpInst::ICMP_SLE; | ||||
5852 | break; | ||||
5853 | case ICmpInst::ICMP_UGT: | ||||
5854 | // (float)int > 4.4 --> int > 4 | ||||
5855 | // (float)int > -4.4 --> true | ||||
5856 | if (RHS.isNegative()) | ||||
5857 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5858 | break; | ||||
5859 | case ICmpInst::ICMP_SGT: | ||||
5860 | // (float)int > 4.4 --> int > 4 | ||||
5861 | // (float)int > -4.4 --> int >= -4 | ||||
5862 | if (RHS.isNegative()) | ||||
5863 | Pred = ICmpInst::ICMP_SGE; | ||||
5864 | break; | ||||
5865 | case ICmpInst::ICMP_UGE: | ||||
5866 | // (float)int >= -4.4 --> true | ||||
5867 | // (float)int >= 4.4 --> int > 4 | ||||
5868 | if (RHS.isNegative()) | ||||
5869 | return replaceInstUsesWith(I, Builder.getTrue()); | ||||
5870 | Pred = ICmpInst::ICMP_UGT; | ||||
5871 | break; | ||||
5872 | case ICmpInst::ICMP_SGE: | ||||
5873 | // (float)int >= -4.4 --> int >= -4 | ||||
5874 | // (float)int >= 4.4 --> int > 4 | ||||
5875 | if (!RHS.isNegative()) | ||||
5876 | Pred = ICmpInst::ICMP_SGT; | ||||
5877 | break; | ||||
5878 | } | ||||
5879 | } | ||||
5880 | } | ||||
5881 | |||||
5882 | // Lower this FP comparison into an appropriate integer version of the | ||||
5883 | // comparison. | ||||
5884 | return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); | ||||
5885 | } | ||||
5886 | |||||
5887 | /// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary. | ||||
5888 | static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, | ||||
5889 | Constant *RHSC) { | ||||
5890 | // When C is not 0.0 and infinities are not allowed: | ||||
5891 | // (C / X) < 0.0 is a sign-bit test of X | ||||
5892 | // (C / X) < 0.0 --> X < 0.0 (if C is positive) | ||||
5893 | // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate) | ||||
5894 | // | ||||
5895 | // Proof: | ||||
5896 | // Multiply (C / X) < 0.0 by X * X / C. | ||||
5897 | // - X is non zero, if it is the flag 'ninf' is violated. | ||||
5898 | // - C defines the sign of X * X * C. Thus it also defines whether to swap | ||||
5899 | // the predicate. C is also non zero by definition. | ||||
5900 | // | ||||
5901 | // Thus X * X / C is non zero and the transformation is valid. [qed] | ||||
5902 | |||||
5903 | FCmpInst::Predicate Pred = I.getPredicate(); | ||||
5904 | |||||
5905 | // Check that predicates are valid. | ||||
5906 | if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && | ||||
5907 | (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) | ||||
5908 | return nullptr; | ||||
5909 | |||||
5910 | // Check that RHS operand is zero. | ||||
5911 | if (!match(RHSC, m_AnyZeroFP())) | ||||
5912 | return nullptr; | ||||
5913 | |||||
5914 | // Check fastmath flags ('ninf'). | ||||
5915 | if (!LHSI->hasNoInfs() || !I.hasNoInfs()) | ||||
5916 | return nullptr; | ||||
5917 | |||||
5918 | // Check the properties of the dividend. It must not be zero to avoid a | ||||
5919 | // division by zero (see Proof). | ||||
5920 | const APFloat *C; | ||||
5921 | if (!match(LHSI->getOperand(0), m_APFloat(C))) | ||||
5922 | return nullptr; | ||||
5923 | |||||
5924 | if (C->isZero()) | ||||
5925 | return nullptr; | ||||
5926 | |||||
5927 | // Get swapped predicate if necessary. | ||||
5928 | if (C->isNegative()) | ||||
5929 | Pred = I.getSwappedPredicate(); | ||||
5930 | |||||
5931 | return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I); | ||||
5932 | } | ||||
5933 | |||||
5934 | /// Optimize fabs(X) compared with zero. | ||||
5935 | static Instruction *foldFabsWithFcmpZero(FCmpInst &I) { | ||||
5936 | Value *X; | ||||
5937 | if (!match(I.getOperand(0), m_Intrinsic<Intrinsic::fabs>(m_Value(X))) || | ||||
5938 | !match(I.getOperand(1), m_PosZeroFP())) | ||||
5939 | return nullptr; | ||||
5940 | |||||
5941 | auto replacePredAndOp0 = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) { | ||||
5942 | I->setPredicate(P); | ||||
5943 | I->setOperand(0, X); | ||||
5944 | return I; | ||||
5945 | }; | ||||
5946 | |||||
5947 | switch (I.getPredicate()) { | ||||
5948 | case FCmpInst::FCMP_UGE: | ||||
5949 | case FCmpInst::FCMP_OLT: | ||||
5950 | // fabs(X) >= 0.0 --> true | ||||
5951 | // fabs(X) < 0.0 --> false | ||||
5952 | llvm_unreachable("fcmp should have simplified")::llvm::llvm_unreachable_internal("fcmp should have simplified" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5952); | ||||
5953 | |||||
5954 | case FCmpInst::FCMP_OGT: | ||||
5955 | // fabs(X) > 0.0 --> X != 0.0 | ||||
5956 | return replacePredAndOp0(&I, FCmpInst::FCMP_ONE, X); | ||||
5957 | |||||
5958 | case FCmpInst::FCMP_UGT: | ||||
5959 | // fabs(X) u> 0.0 --> X u!= 0.0 | ||||
5960 | return replacePredAndOp0(&I, FCmpInst::FCMP_UNE, X); | ||||
5961 | |||||
5962 | case FCmpInst::FCMP_OLE: | ||||
5963 | // fabs(X) <= 0.0 --> X == 0.0 | ||||
5964 | return replacePredAndOp0(&I, FCmpInst::FCMP_OEQ, X); | ||||
5965 | |||||
5966 | case FCmpInst::FCMP_ULE: | ||||
5967 | // fabs(X) u<= 0.0 --> X u== 0.0 | ||||
5968 | return replacePredAndOp0(&I, FCmpInst::FCMP_UEQ, X); | ||||
5969 | |||||
5970 | case FCmpInst::FCMP_OGE: | ||||
5971 | // fabs(X) >= 0.0 --> !isnan(X) | ||||
5972 | assert(!I.hasNoNaNs() && "fcmp should have simplified")((!I.hasNoNaNs() && "fcmp should have simplified") ? static_cast <void> (0) : __assert_fail ("!I.hasNoNaNs() && \"fcmp should have simplified\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5972, __PRETTY_FUNCTION__)); | ||||
5973 | return replacePredAndOp0(&I, FCmpInst::FCMP_ORD, X); | ||||
5974 | |||||
5975 | case FCmpInst::FCMP_ULT: | ||||
5976 | // fabs(X) u< 0.0 --> isnan(X) | ||||
5977 | assert(!I.hasNoNaNs() && "fcmp should have simplified")((!I.hasNoNaNs() && "fcmp should have simplified") ? static_cast <void> (0) : __assert_fail ("!I.hasNoNaNs() && \"fcmp should have simplified\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 5977, __PRETTY_FUNCTION__)); | ||||
5978 | return replacePredAndOp0(&I, FCmpInst::FCMP_UNO, X); | ||||
5979 | |||||
5980 | case FCmpInst::FCMP_OEQ: | ||||
5981 | case FCmpInst::FCMP_UEQ: | ||||
5982 | case FCmpInst::FCMP_ONE: | ||||
5983 | case FCmpInst::FCMP_UNE: | ||||
5984 | case FCmpInst::FCMP_ORD: | ||||
5985 | case FCmpInst::FCMP_UNO: | ||||
5986 | // Look through the fabs() because it doesn't change anything but the sign. | ||||
5987 | // fabs(X) == 0.0 --> X == 0.0, | ||||
5988 | // fabs(X) != 0.0 --> X != 0.0 | ||||
5989 | // isnan(fabs(X)) --> isnan(X) | ||||
5990 | // !isnan(fabs(X) --> !isnan(X) | ||||
5991 | return replacePredAndOp0(&I, I.getPredicate(), X); | ||||
5992 | |||||
5993 | default: | ||||
5994 | return nullptr; | ||||
5995 | } | ||||
5996 | } | ||||
5997 | |||||
5998 | Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { | ||||
5999 | bool Changed = false; | ||||
6000 | |||||
6001 | /// Orders the operands of the compare so that they are listed from most | ||||
6002 | /// complex to least complex. This puts constants before unary operators, | ||||
6003 | /// before binary operators. | ||||
6004 | if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { | ||||
6005 | I.swapOperands(); | ||||
6006 | Changed = true; | ||||
6007 | } | ||||
6008 | |||||
6009 | const CmpInst::Predicate Pred = I.getPredicate(); | ||||
6010 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||
6011 | if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(), | ||||
6012 | SQ.getWithInstruction(&I))) | ||||
6013 | return replaceInstUsesWith(I, V); | ||||
6014 | |||||
6015 | // Simplify 'fcmp pred X, X' | ||||
6016 | Type *OpType = Op0->getType(); | ||||
6017 | assert(OpType == Op1->getType() && "fcmp with different-typed operands?")((OpType == Op1->getType() && "fcmp with different-typed operands?" ) ? static_cast<void> (0) : __assert_fail ("OpType == Op1->getType() && \"fcmp with different-typed operands?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp" , 6017, __PRETTY_FUNCTION__)); | ||||
6018 | if (Op0 == Op1) { | ||||
6019 | switch (Pred) { | ||||
6020 | default: break; | ||||
6021 | case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) | ||||
6022 | case FCmpInst::FCMP_ULT: // True if unordered or less than | ||||
6023 | case FCmpInst::FCMP_UGT: // True if unordered or greater than | ||||
6024 | case FCmpInst::FCMP_UNE: // True if unordered or not equal | ||||
6025 | // Canonicalize these to be 'fcmp uno %X, 0.0'. | ||||
6026 | I.setPredicate(FCmpInst::FCMP_UNO); | ||||
6027 | I.setOperand(1, Constant::getNullValue(OpType)); | ||||
6028 | return &I; | ||||
6029 | |||||
6030 | case FCmpInst::FCMP_ORD: // True if ordered (no nans) | ||||
6031 | case FCmpInst::FCMP_OEQ: // True if ordered and equal | ||||
6032 | case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal | ||||
6033 | case FCmpInst::FCMP_OLE: // True if ordered and less than or equal | ||||
6034 | // Canonicalize these to be 'fcmp ord %X, 0.0'. | ||||
6035 | I.setPredicate(FCmpInst::FCMP_ORD); | ||||
6036 | I.setOperand(1, Constant::getNullValue(OpType)); | ||||
6037 | return &I; | ||||
6038 | } | ||||
6039 | } | ||||
6040 | |||||
6041 | // If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand, | ||||
6042 | // then canonicalize the operand to 0.0. | ||||
6043 | if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) { | ||||
6044 | if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) { | ||||
6045 | I.setOperand(0, ConstantFP::getNullValue(OpType)); | ||||
6046 | return &I; | ||||
6047 | } | ||||
6048 | if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) { | ||||
6049 | I.setOperand(1, ConstantFP::getNullValue(OpType)); | ||||
6050 | return &I; | ||||
6051 | } | ||||
6052 | } | ||||
6053 | |||||
6054 | // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y | ||||
6055 | Value *X, *Y; | ||||
6056 | if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) | ||||
6057 | return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I); | ||||
6058 | |||||
6059 | // Test if the FCmpInst instruction is used exclusively by a select as | ||||
6060 | // part of a minimum or maximum operation. If so, refrain from doing | ||||
6061 | // any other folding. This helps out other analyses which understand | ||||
6062 | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution | ||||
6063 | // and CodeGen. And in this case, at least one of the comparison | ||||
6064 | // operands has at least one user besides the compare (the select), | ||||
6065 | // which would often largely negate the benefit of folding anyway. | ||||
6066 | if (I.hasOneUse()) | ||||
6067 | if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) { | ||||
6068 | Value *A, *B; | ||||
6069 | SelectPatternResult SPR = matchSelectPattern(SI, A, B); | ||||
6070 | if (SPR.Flavor != SPF_UNKNOWN) | ||||
6071 | return nullptr; | ||||
6072 | } | ||||
6073 | |||||
6074 | // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0: | ||||
6075 | // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0 | ||||
6076 | if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) { | ||||
6077 | I.setOperand(1, ConstantFP::getNullValue(OpType)); | ||||
6078 | return &I; | ||||
6079 | } | ||||
6080 | |||||
6081 | // Handle fcmp with instruction LHS and constant RHS. | ||||
6082 | Instruction *LHSI; | ||||
6083 | Constant *RHSC; | ||||
6084 | if (match(Op0, m_Instruction(LHSI)) && match(Op1, m_Constant(RHSC))) { | ||||
6085 | switch (LHSI->getOpcode()) { | ||||
6086 | case Instruction::PHI: | ||||
6087 | // Only fold fcmp into the PHI if the phi and fcmp are in the same | ||||
6088 | // block. If in the same block, we're encouraging jump threading. If | ||||
6089 | // not, we are just pessimizing the code by making an i1 phi. | ||||
6090 | if (LHSI->getParent() == I.getParent()) | ||||
6091 | if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI))) | ||||
6092 | return NV; | ||||
6093 | break; | ||||
6094 | case Instruction::SIToFP: | ||||
6095 | case Instruction::UIToFP: | ||||
6096 | if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC)) | ||||
6097 | return NV; | ||||
6098 | break; | ||||
6099 | case Instruction::FDiv: | ||||
6100 | if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC)) | ||||
6101 | return NV; | ||||
6102 | break; | ||||
6103 | case Instruction::Load: | ||||
6104 | if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) | ||||
6105 | if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | ||||
6106 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | ||||
6107 | !cast<LoadInst>(LHSI)->isVolatile()) | ||||
6108 | if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I)) | ||||
6109 | return Res; | ||||
6110 | break; | ||||
6111 | } | ||||
6112 | } | ||||
6113 | |||||
6114 | if (Instruction *R = foldFabsWithFcmpZero(I)) | ||||
6115 | return R; | ||||
6116 | |||||
6117 | if (match(Op0, m_FNeg(m_Value(X)))) { | ||||
6118 | // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C | ||||
6119 | Constant *C; | ||||
6120 | if (match(Op1, m_Constant(C))) { | ||||
6121 | Constant *NegC = ConstantExpr::getFNeg(C); | ||||
6122 | return new FCmpInst(I.getSwappedPredicate(), X, NegC, "", &I); | ||||
6123 | } | ||||
6124 | } | ||||
6125 | |||||
6126 | if (match(Op0, m_FPExt(m_Value(X)))) { | ||||
6127 | // fcmp (fpext X), (fpext Y) -> fcmp X, Y | ||||
6128 | if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType()) | ||||
6129 | return new FCmpInst(Pred, X, Y, "", &I); | ||||
6130 | |||||
6131 | // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless | ||||
6132 | const APFloat *C; | ||||
6133 | if (match(Op1, m_APFloat(C))) { | ||||
6134 | const fltSemantics &FPSem = | ||||
6135 | X->getType()->getScalarType()->getFltSemantics(); | ||||
6136 | bool Lossy; | ||||
6137 | APFloat TruncC = *C; | ||||
6138 | TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy); | ||||
6139 | |||||
6140 | // Avoid lossy conversions and denormals. | ||||
6141 | // Zero is a special case that's OK to convert. | ||||
6142 | APFloat Fabs = TruncC; | ||||
6143 | Fabs.clearSign(); | ||||
6144 | if (!Lossy && | ||||
6145 | ((Fabs.compare(APFloat::getSmallestNormalized(FPSem)) != | ||||
6146 | APFloat::cmpLessThan) || Fabs.isZero())) { | ||||
6147 | Constant *NewC = ConstantFP::get(X->getType(), TruncC); | ||||
6148 | return new FCmpInst(Pred, X, NewC, "", &I); | ||||
6149 | } | ||||
6150 | } | ||||
6151 | } | ||||
6152 | |||||
6153 | if (I.getType()->isVectorTy()) | ||||
6154 | if (Instruction *Res = foldVectorCmp(I, Builder)) | ||||
6155 | return Res; | ||||
6156 | |||||
6157 | return Changed ? &I : nullptr; | ||||
6158 | } |
1 | //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file provides a simple and efficient mechanism for performing general | ||||
10 | // tree-based pattern matches on the LLVM IR. The power of these routines is | ||||
11 | // that it allows you to write concise patterns that are expressive and easy to | ||||
12 | // understand. The other major advantage of this is that it allows you to | ||||
13 | // trivially capture/bind elements in the pattern to variables. For example, | ||||
14 | // you can do something like this: | ||||
15 | // | ||||
16 | // Value *Exp = ... | ||||
17 | // Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2) | ||||
18 | // if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)), | ||||
19 | // m_And(m_Value(Y), m_ConstantInt(C2))))) { | ||||
20 | // ... Pattern is matched and variables are bound ... | ||||
21 | // } | ||||
22 | // | ||||
23 | // This is primarily useful to things like the instruction combiner, but can | ||||
24 | // also be useful for static analysis tools or code generators. | ||||
25 | // | ||||
26 | //===----------------------------------------------------------------------===// | ||||
27 | |||||
28 | #ifndef LLVM_IR_PATTERNMATCH_H | ||||
29 | #define LLVM_IR_PATTERNMATCH_H | ||||
30 | |||||
31 | #include "llvm/ADT/APFloat.h" | ||||
32 | #include "llvm/ADT/APInt.h" | ||||
33 | #include "llvm/IR/Constant.h" | ||||
34 | #include "llvm/IR/Constants.h" | ||||
35 | #include "llvm/IR/InstrTypes.h" | ||||
36 | #include "llvm/IR/Instruction.h" | ||||
37 | #include "llvm/IR/Instructions.h" | ||||
38 | #include "llvm/IR/IntrinsicInst.h" | ||||
39 | #include "llvm/IR/Intrinsics.h" | ||||
40 | #include "llvm/IR/Operator.h" | ||||
41 | #include "llvm/IR/Value.h" | ||||
42 | #include "llvm/Support/Casting.h" | ||||
43 | #include <cstdint> | ||||
44 | |||||
45 | namespace llvm { | ||||
46 | namespace PatternMatch { | ||||
47 | |||||
48 | template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) { | ||||
49 | return const_cast<Pattern &>(P).match(V); | ||||
50 | } | ||||
51 | |||||
52 | template <typename SubPattern_t> struct OneUse_match { | ||||
53 | SubPattern_t SubPattern; | ||||
54 | |||||
55 | OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||
56 | |||||
57 | template <typename OpTy> bool match(OpTy *V) { | ||||
58 | return V->hasOneUse() && SubPattern.match(V); | ||||
59 | } | ||||
60 | }; | ||||
61 | |||||
62 | template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) { | ||||
63 | return SubPattern; | ||||
64 | } | ||||
65 | |||||
66 | template <typename Class> struct class_match { | ||||
67 | template <typename ITy> bool match(ITy *V) { return isa<Class>(V); } | ||||
68 | }; | ||||
69 | |||||
70 | /// Match an arbitrary value and ignore it. | ||||
71 | inline class_match<Value> m_Value() { return class_match<Value>(); } | ||||
72 | |||||
73 | /// Match an arbitrary binary operation and ignore it. | ||||
74 | inline class_match<BinaryOperator> m_BinOp() { | ||||
75 | return class_match<BinaryOperator>(); | ||||
76 | } | ||||
77 | |||||
78 | /// Matches any compare instruction and ignore it. | ||||
79 | inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } | ||||
80 | |||||
81 | /// Match an arbitrary ConstantInt and ignore it. | ||||
82 | inline class_match<ConstantInt> m_ConstantInt() { | ||||
83 | return class_match<ConstantInt>(); | ||||
84 | } | ||||
85 | |||||
86 | /// Match an arbitrary undef constant. | ||||
87 | inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } | ||||
88 | |||||
89 | /// Match an arbitrary Constant and ignore it. | ||||
90 | inline class_match<Constant> m_Constant() { return class_match<Constant>(); } | ||||
91 | |||||
92 | /// Match an arbitrary basic block value and ignore it. | ||||
93 | inline class_match<BasicBlock> m_BasicBlock() { | ||||
94 | return class_match<BasicBlock>(); | ||||
95 | } | ||||
96 | |||||
97 | /// Inverting matcher | ||||
98 | template <typename Ty> struct match_unless { | ||||
99 | Ty M; | ||||
100 | |||||
101 | match_unless(const Ty &Matcher) : M(Matcher) {} | ||||
102 | |||||
103 | template <typename ITy> bool match(ITy *V) { return !M.match(V); } | ||||
104 | }; | ||||
105 | |||||
106 | /// Match if the inner matcher does *NOT* match. | ||||
107 | template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) { | ||||
108 | return match_unless<Ty>(M); | ||||
109 | } | ||||
110 | |||||
111 | /// Matching combinators | ||||
112 | template <typename LTy, typename RTy> struct match_combine_or { | ||||
113 | LTy L; | ||||
114 | RTy R; | ||||
115 | |||||
116 | match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||
117 | |||||
118 | template <typename ITy> bool match(ITy *V) { | ||||
119 | if (L.match(V)) | ||||
120 | return true; | ||||
121 | if (R.match(V)) | ||||
122 | return true; | ||||
123 | return false; | ||||
124 | } | ||||
125 | }; | ||||
126 | |||||
127 | template <typename LTy, typename RTy> struct match_combine_and { | ||||
128 | LTy L; | ||||
129 | RTy R; | ||||
130 | |||||
131 | match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} | ||||
132 | |||||
133 | template <typename ITy> bool match(ITy *V) { | ||||
134 | if (L.match(V)) | ||||
135 | if (R.match(V)) | ||||
136 | return true; | ||||
137 | return false; | ||||
138 | } | ||||
139 | }; | ||||
140 | |||||
141 | /// Combine two pattern matchers matching L || R | ||||
142 | template <typename LTy, typename RTy> | ||||
143 | inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) { | ||||
144 | return match_combine_or<LTy, RTy>(L, R); | ||||
145 | } | ||||
146 | |||||
147 | /// Combine two pattern matchers matching L && R | ||||
148 | template <typename LTy, typename RTy> | ||||
149 | inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { | ||||
150 | return match_combine_and<LTy, RTy>(L, R); | ||||
151 | } | ||||
152 | |||||
153 | struct apint_match { | ||||
154 | const APInt *&Res; | ||||
155 | |||||
156 | apint_match(const APInt *&R) : Res(R) {} | ||||
157 | |||||
158 | template <typename ITy> bool match(ITy *V) { | ||||
159 | if (auto *CI = dyn_cast<ConstantInt>(V)) { | ||||
160 | Res = &CI->getValue(); | ||||
161 | return true; | ||||
162 | } | ||||
163 | if (V->getType()->isVectorTy()) | ||||
164 | if (const auto *C = dyn_cast<Constant>(V)) | ||||
165 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) { | ||||
166 | Res = &CI->getValue(); | ||||
167 | return true; | ||||
168 | } | ||||
169 | return false; | ||||
170 | } | ||||
171 | }; | ||||
172 | // Either constexpr if or renaming ConstantFP::getValueAPF to | ||||
173 | // ConstantFP::getValue is needed to do it via single template | ||||
174 | // function for both apint/apfloat. | ||||
175 | struct apfloat_match { | ||||
176 | const APFloat *&Res; | ||||
177 | apfloat_match(const APFloat *&R) : Res(R) {} | ||||
178 | template <typename ITy> bool match(ITy *V) { | ||||
179 | if (auto *CI = dyn_cast<ConstantFP>(V)) { | ||||
180 | Res = &CI->getValueAPF(); | ||||
181 | return true; | ||||
182 | } | ||||
183 | if (V->getType()->isVectorTy()) | ||||
184 | if (const auto *C = dyn_cast<Constant>(V)) | ||||
185 | if (auto *CI = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) { | ||||
186 | Res = &CI->getValueAPF(); | ||||
187 | return true; | ||||
188 | } | ||||
189 | return false; | ||||
190 | } | ||||
191 | }; | ||||
192 | |||||
193 | /// Match a ConstantInt or splatted ConstantVector, binding the | ||||
194 | /// specified pointer to the contained APInt. | ||||
195 | inline apint_match m_APInt(const APInt *&Res) { return Res; } | ||||
196 | |||||
197 | /// Match a ConstantFP or splatted ConstantVector, binding the | ||||
198 | /// specified pointer to the contained APFloat. | ||||
199 | inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; } | ||||
200 | |||||
201 | template <int64_t Val> struct constantint_match { | ||||
202 | template <typename ITy> bool match(ITy *V) { | ||||
203 | if (const auto *CI = dyn_cast<ConstantInt>(V)) { | ||||
204 | const APInt &CIV = CI->getValue(); | ||||
205 | if (Val >= 0) | ||||
206 | return CIV == static_cast<uint64_t>(Val); | ||||
207 | // If Val is negative, and CI is shorter than it, truncate to the right | ||||
208 | // number of bits. If it is larger, then we have to sign extend. Just | ||||
209 | // compare their negated values. | ||||
210 | return -CIV == -Val; | ||||
211 | } | ||||
212 | return false; | ||||
213 | } | ||||
214 | }; | ||||
215 | |||||
216 | /// Match a ConstantInt with a specific value. | ||||
217 | template <int64_t Val> inline constantint_match<Val> m_ConstantInt() { | ||||
218 | return constantint_match<Val>(); | ||||
219 | } | ||||
220 | |||||
221 | /// This helper class is used to match scalar and vector integer constants that | ||||
222 | /// satisfy a specified predicate. | ||||
223 | /// For vector constants, undefined elements are ignored. | ||||
224 | template <typename Predicate> struct cst_pred_ty : public Predicate { | ||||
225 | template <typename ITy> bool match(ITy *V) { | ||||
226 | if (const auto *CI = dyn_cast<ConstantInt>(V)) | ||||
227 | return this->isValue(CI->getValue()); | ||||
228 | if (V->getType()->isVectorTy()) { | ||||
229 | if (const auto *C = dyn_cast<Constant>(V)) { | ||||
230 | if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) | ||||
231 | return this->isValue(CI->getValue()); | ||||
232 | |||||
233 | // Non-splat vector constant: check each element for a match. | ||||
234 | unsigned NumElts = V->getType()->getVectorNumElements(); | ||||
235 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/include/llvm/IR/PatternMatch.h" , 235, __PRETTY_FUNCTION__)); | ||||
236 | bool HasNonUndefElements = false; | ||||
237 | for (unsigned i = 0; i != NumElts; ++i) { | ||||
238 | Constant *Elt = C->getAggregateElement(i); | ||||
239 | if (!Elt) | ||||
240 | return false; | ||||
241 | if (isa<UndefValue>(Elt)) | ||||
242 | continue; | ||||
243 | auto *CI = dyn_cast<ConstantInt>(Elt); | ||||
244 | if (!CI || !this->isValue(CI->getValue())) | ||||
245 | return false; | ||||
246 | HasNonUndefElements = true; | ||||
247 | } | ||||
248 | return HasNonUndefElements; | ||||
249 | } | ||||
250 | } | ||||
251 | return false; | ||||
252 | } | ||||
253 | }; | ||||
254 | |||||
255 | /// This helper class is used to match scalar and vector constants that | ||||
256 | /// satisfy a specified predicate, and bind them to an APInt. | ||||
257 | template <typename Predicate> struct api_pred_ty : public Predicate { | ||||
258 | const APInt *&Res; | ||||
259 | |||||
260 | api_pred_ty(const APInt *&R) : Res(R) {} | ||||
261 | |||||
262 | template <typename ITy> bool match(ITy *V) { | ||||
263 | if (const auto *CI = dyn_cast<ConstantInt>(V)) | ||||
264 | if (this->isValue(CI->getValue())) { | ||||
265 | Res = &CI->getValue(); | ||||
266 | return true; | ||||
267 | } | ||||
268 | if (V->getType()->isVectorTy()) | ||||
269 | if (const auto *C = dyn_cast<Constant>(V)) | ||||
270 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) | ||||
271 | if (this->isValue(CI->getValue())) { | ||||
272 | Res = &CI->getValue(); | ||||
273 | return true; | ||||
274 | } | ||||
275 | |||||
276 | return false; | ||||
277 | } | ||||
278 | }; | ||||
279 | |||||
280 | /// This helper class is used to match scalar and vector floating-point | ||||
281 | /// constants that satisfy a specified predicate. | ||||
282 | /// For vector constants, undefined elements are ignored. | ||||
283 | template <typename Predicate> struct cstfp_pred_ty : public Predicate { | ||||
284 | template <typename ITy> bool match(ITy *V) { | ||||
285 | if (const auto *CF = dyn_cast<ConstantFP>(V)) | ||||
286 | return this->isValue(CF->getValueAPF()); | ||||
287 | if (V->getType()->isVectorTy()) { | ||||
288 | if (const auto *C = dyn_cast<Constant>(V)) { | ||||
289 | if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) | ||||
290 | return this->isValue(CF->getValueAPF()); | ||||
291 | |||||
292 | // Non-splat vector constant: check each element for a match. | ||||
293 | unsigned NumElts = V->getType()->getVectorNumElements(); | ||||
294 | assert(NumElts != 0 && "Constant vector with no elements?")((NumElts != 0 && "Constant vector with no elements?" ) ? static_cast<void> (0) : __assert_fail ("NumElts != 0 && \"Constant vector with no elements?\"" , "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/include/llvm/IR/PatternMatch.h" , 294, __PRETTY_FUNCTION__)); | ||||
295 | bool HasNonUndefElements = false; | ||||
296 | for (unsigned i = 0; i != NumElts; ++i) { | ||||
297 | Constant *Elt = C->getAggregateElement(i); | ||||
298 | if (!Elt) | ||||
299 | return false; | ||||
300 | if (isa<UndefValue>(Elt)) | ||||
301 | continue; | ||||
302 | auto *CF = dyn_cast<ConstantFP>(Elt); | ||||
303 | if (!CF || !this->isValue(CF->getValueAPF())) | ||||
304 | return false; | ||||
305 | HasNonUndefElements = true; | ||||
306 | } | ||||
307 | return HasNonUndefElements; | ||||
308 | } | ||||
309 | } | ||||
310 | return false; | ||||
311 | } | ||||
312 | }; | ||||
313 | |||||
314 | /////////////////////////////////////////////////////////////////////////////// | ||||
315 | // | ||||
316 | // Encapsulate constant value queries for use in templated predicate matchers. | ||||
317 | // This allows checking if constants match using compound predicates and works | ||||
318 | // with vector constants, possibly with relaxed constraints. For example, ignore | ||||
319 | // undef values. | ||||
320 | // | ||||
321 | /////////////////////////////////////////////////////////////////////////////// | ||||
322 | |||||
323 | struct is_any_apint { | ||||
324 | bool isValue(const APInt &C) { return true; } | ||||
325 | }; | ||||
326 | /// Match an integer or vector with any integral constant. | ||||
327 | /// For vectors, this includes constants with undefined elements. | ||||
328 | inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { | ||||
329 | return cst_pred_ty<is_any_apint>(); | ||||
330 | } | ||||
331 | |||||
332 | struct is_all_ones { | ||||
333 | bool isValue(const APInt &C) { return C.isAllOnesValue(); } | ||||
334 | }; | ||||
335 | /// Match an integer or vector with all bits set. | ||||
336 | /// For vectors, this includes constants with undefined elements. | ||||
337 | inline cst_pred_ty<is_all_ones> m_AllOnes() { | ||||
338 | return cst_pred_ty<is_all_ones>(); | ||||
339 | } | ||||
340 | |||||
341 | struct is_maxsignedvalue { | ||||
342 | bool isValue(const APInt &C) { return C.isMaxSignedValue(); } | ||||
343 | }; | ||||
344 | /// Match an integer or vector with values having all bits except for the high | ||||
345 | /// bit set (0x7f...). | ||||
346 | /// For vectors, this includes constants with undefined elements. | ||||
347 | inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { | ||||
348 | return cst_pred_ty<is_maxsignedvalue>(); | ||||
349 | } | ||||
350 | inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { | ||||
351 | return V; | ||||
352 | } | ||||
353 | |||||
354 | struct is_negative { | ||||
355 | bool isValue(const APInt &C) { return C.isNegative(); } | ||||
356 | }; | ||||
357 | /// Match an integer or vector of negative values. | ||||
358 | /// For vectors, this includes constants with undefined elements. | ||||
359 | inline cst_pred_ty<is_negative> m_Negative() { | ||||
360 | return cst_pred_ty<is_negative>(); | ||||
361 | } | ||||
362 | inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { | ||||
363 | return V; | ||||
364 | } | ||||
365 | |||||
366 | struct is_nonnegative { | ||||
367 | bool isValue(const APInt &C) { return C.isNonNegative(); } | ||||
368 | }; | ||||
369 | /// Match an integer or vector of non-negative values. | ||||
370 | /// For vectors, this includes constants with undefined elements. | ||||
371 | inline cst_pred_ty<is_nonnegative> m_NonNegative() { | ||||
372 | return cst_pred_ty<is_nonnegative>(); | ||||
373 | } | ||||
374 | inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { | ||||
375 | return V; | ||||
376 | } | ||||
377 | |||||
378 | struct is_strictlypositive { | ||||
379 | bool isValue(const APInt &C) { return C.isStrictlyPositive(); } | ||||
380 | }; | ||||
381 | /// Match an integer or vector of strictly positive values. | ||||
382 | /// For vectors, this includes constants with undefined elements. | ||||
383 | inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() { | ||||
384 | return cst_pred_ty<is_strictlypositive>(); | ||||
385 | } | ||||
386 | inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) { | ||||
387 | return V; | ||||
388 | } | ||||
389 | |||||
390 | struct is_nonpositive { | ||||
391 | bool isValue(const APInt &C) { return C.isNonPositive(); } | ||||
392 | }; | ||||
393 | /// Match an integer or vector of non-positive values. | ||||
394 | /// For vectors, this includes constants with undefined elements. | ||||
395 | inline cst_pred_ty<is_nonpositive> m_NonPositive() { | ||||
396 | return cst_pred_ty<is_nonpositive>(); | ||||
397 | } | ||||
398 | inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } | ||||
399 | |||||
400 | struct is_one { | ||||
401 | bool isValue(const APInt &C) { return C.isOneValue(); } | ||||
402 | }; | ||||
403 | /// Match an integer 1 or a vector with all elements equal to 1. | ||||
404 | /// For vectors, this includes constants with undefined elements. | ||||
405 | inline cst_pred_ty<is_one> m_One() { | ||||
406 | return cst_pred_ty<is_one>(); | ||||
407 | } | ||||
408 | |||||
409 | struct is_zero_int { | ||||
410 | bool isValue(const APInt &C) { return C.isNullValue(); } | ||||
411 | }; | ||||
412 | /// Match an integer 0 or a vector with all elements equal to 0. | ||||
413 | /// For vectors, this includes constants with undefined elements. | ||||
414 | inline cst_pred_ty<is_zero_int> m_ZeroInt() { | ||||
415 | return cst_pred_ty<is_zero_int>(); | ||||
416 | } | ||||
417 | |||||
418 | struct is_zero { | ||||
419 | template <typename ITy> bool match(ITy *V) { | ||||
420 | auto *C = dyn_cast<Constant>(V); | ||||
421 | return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C)); | ||||
422 | } | ||||
423 | }; | ||||
424 | /// Match any null constant or a vector with all elements equal to 0. | ||||
425 | /// For vectors, this includes constants with undefined elements. | ||||
426 | inline is_zero m_Zero() { | ||||
427 | return is_zero(); | ||||
428 | } | ||||
429 | |||||
430 | struct is_power2 { | ||||
431 | bool isValue(const APInt &C) { return C.isPowerOf2(); } | ||||
432 | }; | ||||
433 | /// Match an integer or vector power-of-2. | ||||
434 | /// For vectors, this includes constants with undefined elements. | ||||
435 | inline cst_pred_ty<is_power2> m_Power2() { | ||||
436 | return cst_pred_ty<is_power2>(); | ||||
437 | } | ||||
438 | inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { | ||||
439 | return V; | ||||
440 | } | ||||
441 | |||||
442 | struct is_negated_power2 { | ||||
443 | bool isValue(const APInt &C) { return (-C).isPowerOf2(); } | ||||
444 | }; | ||||
445 | /// Match a integer or vector negated power-of-2. | ||||
446 | /// For vectors, this includes constants with undefined elements. | ||||
447 | inline cst_pred_ty<is_negated_power2> m_NegatedPower2() { | ||||
448 | return cst_pred_ty<is_negated_power2>(); | ||||
449 | } | ||||
450 | inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) { | ||||
451 | return V; | ||||
452 | } | ||||
453 | |||||
454 | struct is_power2_or_zero { | ||||
455 | bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } | ||||
456 | }; | ||||
457 | /// Match an integer or vector of 0 or power-of-2 values. | ||||
458 | /// For vectors, this includes constants with undefined elements. | ||||
459 | inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() { | ||||
460 | return cst_pred_ty<is_power2_or_zero>(); | ||||
461 | } | ||||
462 | inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) { | ||||
463 | return V; | ||||
464 | } | ||||
465 | |||||
466 | struct is_sign_mask { | ||||
467 | bool isValue(const APInt &C) { return C.isSignMask(); } | ||||
468 | }; | ||||
469 | /// Match an integer or vector with only the sign bit(s) set. | ||||
470 | /// For vectors, this includes constants with undefined elements. | ||||
471 | inline cst_pred_ty<is_sign_mask> m_SignMask() { | ||||
472 | return cst_pred_ty<is_sign_mask>(); | ||||
473 | } | ||||
474 | |||||
475 | struct is_lowbit_mask { | ||||
476 | bool isValue(const APInt &C) { return C.isMask(); } | ||||
477 | }; | ||||
478 | /// Match an integer or vector with only the low bit(s) set. | ||||
479 | /// For vectors, this includes constants with undefined elements. | ||||
480 | inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { | ||||
481 | return cst_pred_ty<is_lowbit_mask>(); | ||||
482 | } | ||||
483 | |||||
484 | struct icmp_pred_with_threshold { | ||||
485 | ICmpInst::Predicate Pred; | ||||
486 | const APInt *Thr; | ||||
487 | bool isValue(const APInt &C) { | ||||
488 | switch (Pred) { | ||||
489 | case ICmpInst::Predicate::ICMP_EQ: | ||||
490 | return C.eq(*Thr); | ||||
491 | case ICmpInst::Predicate::ICMP_NE: | ||||
492 | return C.ne(*Thr); | ||||
493 | case ICmpInst::Predicate::ICMP_UGT: | ||||
494 | return C.ugt(*Thr); | ||||
495 | case ICmpInst::Predicate::ICMP_UGE: | ||||
496 | return C.uge(*Thr); | ||||
497 | case ICmpInst::Predicate::ICMP_ULT: | ||||
498 | return C.ult(*Thr); | ||||
499 | case ICmpInst::Predicate::ICMP_ULE: | ||||
500 | return C.ule(*Thr); | ||||
501 | case ICmpInst::Predicate::ICMP_SGT: | ||||
502 | return C.sgt(*Thr); | ||||
503 | case ICmpInst::Predicate::ICMP_SGE: | ||||
504 | return C.sge(*Thr); | ||||
505 | case ICmpInst::Predicate::ICMP_SLT: | ||||
506 | return C.slt(*Thr); | ||||
507 | case ICmpInst::Predicate::ICMP_SLE: | ||||
508 | return C.sle(*Thr); | ||||
509 | default: | ||||
510 | llvm_unreachable("Unhandled ICmp predicate")::llvm::llvm_unreachable_internal("Unhandled ICmp predicate", "/build/llvm-toolchain-snapshot-10~+20191219111111+200cce345dc/llvm/include/llvm/IR/PatternMatch.h" , 510); | ||||
511 | } | ||||
512 | } | ||||
513 | }; | ||||
514 | /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) | ||||
515 | /// to Threshold. For vectors, this includes constants with undefined elements. | ||||
516 | inline cst_pred_ty<icmp_pred_with_threshold> | ||||
517 | m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) { | ||||
518 | cst_pred_ty<icmp_pred_with_threshold> P; | ||||
519 | P.Pred = Predicate; | ||||
520 | P.Thr = &Threshold; | ||||
521 | return P; | ||||
522 | } | ||||
523 | |||||
524 | struct is_nan { | ||||
525 | bool isValue(const APFloat &C) { return C.isNaN(); } | ||||
526 | }; | ||||
527 | /// Match an arbitrary NaN constant. This includes quiet and signalling nans. | ||||
528 | /// For vectors, this includes constants with undefined elements. | ||||
529 | inline cstfp_pred_ty<is_nan> m_NaN() { | ||||
530 | return cstfp_pred_ty<is_nan>(); | ||||
531 | } | ||||
532 | |||||
533 | struct is_any_zero_fp { | ||||
534 | bool isValue(const APFloat &C) { return C.isZero(); } | ||||
535 | }; | ||||
536 | /// Match a floating-point negative zero or positive zero. | ||||
537 | /// For vectors, this includes constants with undefined elements. | ||||
538 | inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() { | ||||
539 | return cstfp_pred_ty<is_any_zero_fp>(); | ||||
540 | } | ||||
541 | |||||
542 | struct is_pos_zero_fp { | ||||
543 | bool isValue(const APFloat &C) { return C.isPosZero(); } | ||||
544 | }; | ||||
545 | /// Match a floating-point positive zero. | ||||
546 | /// For vectors, this includes constants with undefined elements. | ||||
547 | inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() { | ||||
548 | return cstfp_pred_ty<is_pos_zero_fp>(); | ||||
549 | } | ||||
550 | |||||
551 | struct is_neg_zero_fp { | ||||
552 | bool isValue(const APFloat &C) { return C.isNegZero(); } | ||||
553 | }; | ||||
554 | /// Match a floating-point negative zero. | ||||
555 | /// For vectors, this includes constants with undefined elements. | ||||
556 | inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { | ||||
557 | return cstfp_pred_ty<is_neg_zero_fp>(); | ||||
558 | } | ||||
559 | |||||
560 | /////////////////////////////////////////////////////////////////////////////// | ||||
561 | |||||
562 | template <typename Class> struct bind_ty { | ||||
563 | Class *&VR; | ||||
564 | |||||
565 | bind_ty(Class *&V) : VR(V) {} | ||||
566 | |||||
567 | template <typename ITy> bool match(ITy *V) { | ||||
568 | if (auto *CV = dyn_cast<Class>(V)) { | ||||
569 | VR = CV; | ||||
570 | return true; | ||||
571 | } | ||||
572 | return false; | ||||
573 | } | ||||
574 | }; | ||||
575 | |||||
576 | /// Match a value, capturing it if we match. | ||||
577 | inline bind_ty<Value> m_Value(Value *&V) { return V; } | ||||
578 | inline bind_ty<const Value> m_Value(const Value *&V) { return V; } | ||||
579 | |||||
580 | /// Match an instruction, capturing it if we match. | ||||
581 | inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } | ||||
582 | /// Match a binary operator, capturing it if we match. | ||||
583 | inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } | ||||
584 | /// Match a with overflow intrinsic, capturing it if we match. | ||||
585 | inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } | ||||
586 | |||||
587 | /// Match a ConstantInt, capturing the value if we match. | ||||
588 | inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } | ||||
589 | |||||
590 | /// Match a Constant, capturing the value if we match. | ||||
591 | inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } | ||||
592 | |||||
593 | /// Match a ConstantFP, capturing the value if we match. | ||||
594 | inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } | ||||
595 | |||||
596 | /// Match a basic block value, capturing it if we match. | ||||
597 | inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } | ||||
598 | inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { | ||||
599 | return V; | ||||
600 | } | ||||
601 | |||||
602 | /// Match a specified Value*. | ||||
603 | struct specificval_ty { | ||||
604 | const Value *Val; | ||||
605 | |||||
606 | specificval_ty(const Value *V) : Val(V) {} | ||||
607 | |||||
608 | template <typename ITy> bool match(ITy *V) { return V == Val; } | ||||
609 | }; | ||||
610 | |||||
611 | /// Match if we have a specific specified value. | ||||
612 | inline specificval_ty m_Specific(const Value *V) { return V; } | ||||
613 | |||||
614 | /// Stores a reference to the Value *, not the Value * itself, | ||||
615 | /// thus can be used in commutative matchers. | ||||
616 | template <typename Class> struct deferredval_ty { | ||||
617 | Class *const &Val; | ||||
618 | |||||
619 | deferredval_ty(Class *const &V) : Val(V) {} | ||||
620 | |||||
621 | template <typename ITy> bool match(ITy *const V) { return V == Val; } | ||||
622 | }; | ||||
623 | |||||
624 | /// A commutative-friendly version of m_Specific(). | ||||
625 | inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; } | ||||
626 | inline deferredval_ty<const Value> m_Deferred(const Value *const &V) { | ||||
627 | return V; | ||||
628 | } | ||||
629 | |||||
630 | /// Match a specified floating point value or vector of all elements of | ||||
631 | /// that value. | ||||
632 | struct specific_fpval { | ||||
633 | double Val; | ||||
634 | |||||
635 | specific_fpval(double V) : Val(V) {} | ||||
636 | |||||
637 | template <typename ITy> bool match(ITy *V) { | ||||
638 | if (const auto *CFP = dyn_cast<ConstantFP>(V)) | ||||
639 | return CFP->isExactlyValue(Val); | ||||
640 | if (V->getType()->isVectorTy()) | ||||
641 | if (const auto *C = dyn_cast<Constant>(V)) | ||||
642 | if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) | ||||
643 | return CFP->isExactlyValue(Val); | ||||
644 | return false; | ||||
645 | } | ||||
646 | }; | ||||
647 | |||||
648 | /// Match a specific floating point value or vector with all elements | ||||
649 | /// equal to the value. | ||||
650 | inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); } | ||||
651 | |||||
652 | /// Match a float 1.0 or vector with all elements equal to 1.0. | ||||
653 | inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } | ||||
654 | |||||
655 | struct bind_const_intval_ty { | ||||
656 | uint64_t &VR; | ||||
657 | |||||
658 | bind_const_intval_ty(uint64_t &V) : VR(V) {} | ||||
659 | |||||
660 | template <typename ITy> bool match(ITy *V) { | ||||
661 | if (const auto *CV = dyn_cast<ConstantInt>(V)) | ||||
662 | if (CV->getValue().ule(UINT64_MAX(18446744073709551615UL))) { | ||||
663 | VR = CV->getZExtValue(); | ||||
664 | return true; | ||||
665 | } | ||||
666 | return false; | ||||
667 | } | ||||
668 | }; | ||||
669 | |||||
670 | /// Match a specified integer value or vector of all elements of that | ||||
671 | /// value. | ||||
672 | struct specific_intval { | ||||
673 | APInt Val; | ||||
674 | |||||
675 | specific_intval(APInt V) : Val(std::move(V)) {} | ||||
676 | |||||
677 | template <typename ITy> bool match(ITy *V) { | ||||
678 | const auto *CI = dyn_cast<ConstantInt>(V); | ||||
679 | if (!CI && V->getType()->isVectorTy()) | ||||
680 | if (const auto *C = dyn_cast<Constant>(V)) | ||||
681 | CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()); | ||||
682 | |||||
683 | return CI && APInt::isSameValue(CI->getValue(), Val); | ||||
684 | } | ||||
685 | }; | ||||
686 | |||||
687 | /// Match a specific integer value or vector with all elements equal to | ||||
688 | /// the value. | ||||
689 | inline specific_intval m_SpecificInt(APInt V) { | ||||
690 | return specific_intval(std::move(V)); | ||||
691 | } | ||||
692 | |||||
693 | inline specific_intval m_SpecificInt(uint64_t V) { | ||||
694 | return m_SpecificInt(APInt(64, V)); | ||||
695 | } | ||||
696 | |||||
697 | /// Match a ConstantInt and bind to its value. This does not match | ||||
698 | /// ConstantInts wider than 64-bits. | ||||
699 | inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } | ||||
700 | |||||
701 | /// Match a specified basic block value. | ||||
702 | struct specific_bbval { | ||||
703 | BasicBlock *Val; | ||||
704 | |||||
705 | specific_bbval(BasicBlock *Val) : Val(Val) {} | ||||
706 | |||||
707 | template <typename ITy> bool match(ITy *V) { | ||||
708 | const auto *BB = dyn_cast<BasicBlock>(V); | ||||
709 | return BB && BB == Val; | ||||
710 | } | ||||
711 | }; | ||||
712 | |||||
713 | /// Match a specific basic block value. | ||||
714 | inline specific_bbval m_SpecificBB(BasicBlock *BB) { | ||||
715 | return specific_bbval(BB); | ||||
716 | } | ||||
717 | |||||
718 | /// A commutative-friendly version of m_Specific(). | ||||
719 | inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) { | ||||
720 | return BB; | ||||
721 | } | ||||
722 | inline deferredval_ty<const BasicBlock> | ||||
723 | m_Deferred(const BasicBlock *const &BB) { | ||||
724 | return BB; | ||||
725 | } | ||||
726 | |||||
727 | //===----------------------------------------------------------------------===// | ||||
728 | // Matcher for any binary operator. | ||||
729 | // | ||||
730 | template <typename LHS_t, typename RHS_t, bool Commutable = false> | ||||
731 | struct AnyBinaryOp_match { | ||||
732 | LHS_t L; | ||||
733 | RHS_t R; | ||||
734 | |||||
735 | // The evaluation order is always stable, regardless of Commutability. | ||||
736 | // The LHS is always matched first. | ||||
737 | AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||
738 | |||||
739 | template <typename OpTy> bool match(OpTy *V) { | ||||
740 | if (auto *I = dyn_cast<BinaryOperator>(V)) | ||||
741 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||
742 | (Commutable && L.match(I->getOperand(1)) && | ||||
743 | R.match(I->getOperand(0))); | ||||
744 | return false; | ||||
745 | } | ||||
746 | }; | ||||
747 | |||||
748 | template <typename LHS, typename RHS> | ||||
749 | inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) { | ||||
750 | return AnyBinaryOp_match<LHS, RHS>(L, R); | ||||
751 | } | ||||
752 | |||||
753 | //===----------------------------------------------------------------------===// | ||||
754 | // Matchers for specific binary operators. | ||||
755 | // | ||||
756 | |||||
757 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||
758 | bool Commutable = false> | ||||
759 | struct BinaryOp_match { | ||||
760 | LHS_t L; | ||||
761 | RHS_t R; | ||||
762 | |||||
763 | // The evaluation order is always stable, regardless of Commutability. | ||||
764 | // The LHS is always matched first. | ||||
765 | BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||
766 | |||||
767 | template <typename OpTy> bool match(OpTy *V) { | ||||
768 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||
769 | auto *I = cast<BinaryOperator>(V); | ||||
770 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||
771 | (Commutable && L.match(I->getOperand(1)) && | ||||
772 | R.match(I->getOperand(0))); | ||||
773 | } | ||||
774 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||
775 | return CE->getOpcode() == Opcode && | ||||
776 | ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || | ||||
777 | (Commutable && L.match(CE->getOperand(1)) && | ||||
778 | R.match(CE->getOperand(0)))); | ||||
779 | return false; | ||||
780 | } | ||||
781 | }; | ||||
782 | |||||
783 | template <typename LHS, typename RHS> | ||||
784 | inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L, | ||||
785 | const RHS &R) { | ||||
786 | return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R); | ||||
787 | } | ||||
788 | |||||
789 | template <typename LHS, typename RHS> | ||||
790 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L, | ||||
791 | const RHS &R) { | ||||
792 | return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R); | ||||
793 | } | ||||
794 | |||||
795 | template <typename LHS, typename RHS> | ||||
796 | inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L, | ||||
797 | const RHS &R) { | ||||
798 | return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R); | ||||
799 | } | ||||
800 | |||||
801 | template <typename LHS, typename RHS> | ||||
802 | inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L, | ||||
803 | const RHS &R) { | ||||
804 | return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R); | ||||
805 | } | ||||
806 | |||||
807 | template <typename Op_t> struct FNeg_match { | ||||
808 | Op_t X; | ||||
809 | |||||
810 | FNeg_match(const Op_t &Op) : X(Op) {} | ||||
811 | template <typename OpTy> bool match(OpTy *V) { | ||||
812 | auto *FPMO = dyn_cast<FPMathOperator>(V); | ||||
813 | if (!FPMO) return false; | ||||
814 | |||||
815 | if (FPMO->getOpcode() == Instruction::FNeg) | ||||
816 | return X.match(FPMO->getOperand(0)); | ||||
817 | |||||
818 | if (FPMO->getOpcode() == Instruction::FSub) { | ||||
819 | if (FPMO->hasNoSignedZeros()) { | ||||
820 | // With 'nsz', any zero goes. | ||||
821 | if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0))) | ||||
822 | return false; | ||||
823 | } else { | ||||
824 | // Without 'nsz', we need fsub -0.0, X exactly. | ||||
825 | if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0))) | ||||
826 | return false; | ||||
827 | } | ||||
828 | |||||
829 | return X.match(FPMO->getOperand(1)); | ||||
830 | } | ||||
831 | |||||
832 | return false; | ||||
833 | } | ||||
834 | }; | ||||
835 | |||||
836 | /// Match 'fneg X' as 'fsub -0.0, X'. | ||||
837 | template <typename OpTy> | ||||
838 | inline FNeg_match<OpTy> | ||||
839 | m_FNeg(const OpTy &X) { | ||||
840 | return FNeg_match<OpTy>(X); | ||||
841 | } | ||||
842 | |||||
843 | /// Match 'fneg X' as 'fsub +-0.0, X'. | ||||
844 | template <typename RHS> | ||||
845 | inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub> | ||||
846 | m_FNegNSZ(const RHS &X) { | ||||
847 | return m_FSub(m_AnyZeroFP(), X); | ||||
848 | } | ||||
849 | |||||
850 | template <typename LHS, typename RHS> | ||||
851 | inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L, | ||||
852 | const RHS &R) { | ||||
853 | return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R); | ||||
854 | } | ||||
855 | |||||
856 | template <typename LHS, typename RHS> | ||||
857 | inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L, | ||||
858 | const RHS &R) { | ||||
859 | return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R); | ||||
860 | } | ||||
861 | |||||
862 | template <typename LHS, typename RHS> | ||||
863 | inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L, | ||||
864 | const RHS &R) { | ||||
865 | return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R); | ||||
866 | } | ||||
867 | |||||
868 | template <typename LHS, typename RHS> | ||||
869 | inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L, | ||||
870 | const RHS &R) { | ||||
871 | return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R); | ||||
872 | } | ||||
873 | |||||
874 | template <typename LHS, typename RHS> | ||||
875 | inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L, | ||||
876 | const RHS &R) { | ||||
877 | return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R); | ||||
878 | } | ||||
879 | |||||
880 | template <typename LHS, typename RHS> | ||||
881 | inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L, | ||||
882 | const RHS &R) { | ||||
883 | return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R); | ||||
884 | } | ||||
885 | |||||
886 | template <typename LHS, typename RHS> | ||||
887 | inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L, | ||||
888 | const RHS &R) { | ||||
889 | return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R); | ||||
890 | } | ||||
891 | |||||
892 | template <typename LHS, typename RHS> | ||||
893 | inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L, | ||||
894 | const RHS &R) { | ||||
895 | return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R); | ||||
896 | } | ||||
897 | |||||
898 | template <typename LHS, typename RHS> | ||||
899 | inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L, | ||||
900 | const RHS &R) { | ||||
901 | return BinaryOp_match<LHS, RHS, Instruction::And>(L, R); | ||||
902 | } | ||||
903 | |||||
904 | template <typename LHS, typename RHS> | ||||
905 | inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L, | ||||
906 | const RHS &R) { | ||||
907 | return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R); | ||||
908 | } | ||||
909 | |||||
910 | template <typename LHS, typename RHS> | ||||
911 | inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L, | ||||
912 | const RHS &R) { | ||||
913 | return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R); | ||||
914 | } | ||||
915 | |||||
916 | template <typename LHS, typename RHS> | ||||
917 | inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L, | ||||
918 | const RHS &R) { | ||||
919 | return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R); | ||||
920 | } | ||||
921 | |||||
922 | template <typename LHS, typename RHS> | ||||
923 | inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L, | ||||
924 | const RHS &R) { | ||||
925 | return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R); | ||||
926 | } | ||||
927 | |||||
928 | template <typename LHS, typename RHS> | ||||
929 | inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L, | ||||
930 | const RHS &R) { | ||||
931 | return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R); | ||||
932 | } | ||||
933 | |||||
934 | template <typename LHS_t, typename RHS_t, unsigned Opcode, | ||||
935 | unsigned WrapFlags = 0> | ||||
936 | struct OverflowingBinaryOp_match { | ||||
937 | LHS_t L; | ||||
938 | RHS_t R; | ||||
939 | |||||
940 | OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) | ||||
941 | : L(LHS), R(RHS) {} | ||||
942 | |||||
943 | template <typename OpTy> bool match(OpTy *V) { | ||||
944 | if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) { | ||||
945 | if (Op->getOpcode() != Opcode) | ||||
946 | return false; | ||||
947 | if (WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap && | ||||
948 | !Op->hasNoUnsignedWrap()) | ||||
949 | return false; | ||||
950 | if (WrapFlags & OverflowingBinaryOperator::NoSignedWrap && | ||||
951 | !Op->hasNoSignedWrap()) | ||||
952 | return false; | ||||
953 | return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1)); | ||||
954 | } | ||||
955 | return false; | ||||
956 | } | ||||
957 | }; | ||||
958 | |||||
959 | template <typename LHS, typename RHS> | ||||
960 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||
961 | OverflowingBinaryOperator::NoSignedWrap> | ||||
962 | m_NSWAdd(const LHS &L, const RHS &R) { | ||||
963 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||
964 | OverflowingBinaryOperator::NoSignedWrap>( | ||||
965 | L, R); | ||||
966 | } | ||||
967 | template <typename LHS, typename RHS> | ||||
968 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||
969 | OverflowingBinaryOperator::NoSignedWrap> | ||||
970 | m_NSWSub(const LHS &L, const RHS &R) { | ||||
971 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||
972 | OverflowingBinaryOperator::NoSignedWrap>( | ||||
973 | L, R); | ||||
974 | } | ||||
975 | template <typename LHS, typename RHS> | ||||
976 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||
977 | OverflowingBinaryOperator::NoSignedWrap> | ||||
978 | m_NSWMul(const LHS &L, const RHS &R) { | ||||
979 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||
980 | OverflowingBinaryOperator::NoSignedWrap>( | ||||
981 | L, R); | ||||
982 | } | ||||
983 | template <typename LHS, typename RHS> | ||||
984 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||
985 | OverflowingBinaryOperator::NoSignedWrap> | ||||
986 | m_NSWShl(const LHS &L, const RHS &R) { | ||||
987 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||
988 | OverflowingBinaryOperator::NoSignedWrap>( | ||||
989 | L, R); | ||||
990 | } | ||||
991 | |||||
992 | template <typename LHS, typename RHS> | ||||
993 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||
994 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||
995 | m_NUWAdd(const LHS &L, const RHS &R) { | ||||
996 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, | ||||
997 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||
998 | L, R); | ||||
999 | } | ||||
1000 | template <typename LHS, typename RHS> | ||||
1001 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||
1002 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||
1003 | m_NUWSub(const LHS &L, const RHS &R) { | ||||
1004 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, | ||||
1005 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||
1006 | L, R); | ||||
1007 | } | ||||
1008 | template <typename LHS, typename RHS> | ||||
1009 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||
1010 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||
1011 | m_NUWMul(const LHS &L, const RHS &R) { | ||||
1012 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, | ||||
1013 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||
1014 | L, R); | ||||
1015 | } | ||||
1016 | template <typename LHS, typename RHS> | ||||
1017 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||
1018 | OverflowingBinaryOperator::NoUnsignedWrap> | ||||
1019 | m_NUWShl(const LHS &L, const RHS &R) { | ||||
1020 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, | ||||
1021 | OverflowingBinaryOperator::NoUnsignedWrap>( | ||||
1022 | L, R); | ||||
1023 | } | ||||
1024 | |||||
1025 | //===----------------------------------------------------------------------===// | ||||
1026 | // Class that matches a group of binary opcodes. | ||||
1027 | // | ||||
1028 | template <typename LHS_t, typename RHS_t, typename Predicate> | ||||
1029 | struct BinOpPred_match : Predicate { | ||||
1030 | LHS_t L; | ||||
1031 | RHS_t R; | ||||
1032 | |||||
1033 | BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||
1034 | |||||
1035 | template <typename OpTy> bool match(OpTy *V) { | ||||
1036 | if (auto *I = dyn_cast<Instruction>(V)) | ||||
1037 | return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && | ||||
1038 | R.match(I->getOperand(1)); | ||||
1039 | if (auto *CE = dyn_cast<ConstantExpr>(V)) | ||||
1040 | return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && | ||||
1041 | R.match(CE->getOperand(1)); | ||||
1042 | return false; | ||||
1043 | } | ||||
1044 | }; | ||||
1045 | |||||
1046 | struct is_shift_op { | ||||
1047 | bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } | ||||
1048 | }; | ||||
1049 | |||||
1050 | struct is_right_shift_op { | ||||
1051 | bool isOpType(unsigned Opcode) { | ||||
1052 | return Opcode == Instruction::LShr || Opcode == Instruction::AShr; | ||||
1053 | } | ||||
1054 | }; | ||||
1055 | |||||
1056 | struct is_logical_shift_op { | ||||
1057 | bool isOpType(unsigned Opcode) { | ||||
1058 | return Opcode == Instruction::LShr || Opcode == Instruction::Shl; | ||||
1059 | } | ||||
1060 | }; | ||||
1061 | |||||
1062 | struct is_bitwiselogic_op { | ||||
1063 | bool isOpType(unsigned Opcode) { | ||||
1064 | return Instruction::isBitwiseLogicOp(Opcode); | ||||
1065 | } | ||||
1066 | }; | ||||
1067 | |||||
1068 | struct is_idiv_op { | ||||
1069 | bool isOpType(unsigned Opcode) { | ||||
1070 | return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; | ||||
1071 | } | ||||
1072 | }; | ||||
1073 | |||||
1074 | struct is_irem_op { | ||||
1075 | bool isOpType(unsigned Opcode) { | ||||
1076 | return Opcode == Instruction::SRem || Opcode == Instruction::URem; | ||||
1077 | } | ||||
1078 | }; | ||||
1079 | |||||
1080 | /// Matches shift operations. | ||||
1081 | template <typename LHS, typename RHS> | ||||
1082 | inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L, | ||||
1083 | const RHS &R) { | ||||
1084 | return BinOpPred_match<LHS, RHS, is_shift_op>(L, R); | ||||
1085 | } | ||||
1086 | |||||
1087 | /// Matches logical shift operations. | ||||
1088 | template <typename LHS, typename RHS> | ||||
1089 | inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L, | ||||
1090 | const RHS &R) { | ||||
1091 | return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R); | ||||
1092 | } | ||||
1093 | |||||
1094 | /// Matches logical shift operations. | ||||
1095 | template <typename LHS, typename RHS> | ||||
1096 | inline BinOpPred_match<LHS, RHS, is_logical_shift_op> | ||||
1097 | m_LogicalShift(const LHS &L, const RHS &R) { | ||||
1098 | return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R); | ||||
1099 | } | ||||
1100 | |||||
1101 | /// Matches bitwise logic operations. | ||||
1102 | template <typename LHS, typename RHS> | ||||
1103 | inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op> | ||||
1104 | m_BitwiseLogic(const LHS &L, const RHS &R) { | ||||
1105 | return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R); | ||||
1106 | } | ||||
1107 | |||||
1108 | /// Matches integer division operations. | ||||
1109 | template <typename LHS, typename RHS> | ||||
1110 | inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L, | ||||
1111 | const RHS &R) { | ||||
1112 | return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R); | ||||
1113 | } | ||||
1114 | |||||
1115 | /// Matches integer remainder operations. | ||||
1116 | template <typename LHS, typename RHS> | ||||
1117 | inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L, | ||||
1118 | const RHS &R) { | ||||
1119 | return BinOpPred_match<LHS, RHS, is_irem_op>(L, R); | ||||
1120 | } | ||||
1121 | |||||
1122 | //===----------------------------------------------------------------------===// | ||||
1123 | // Class that matches exact binary ops. | ||||
1124 | // | ||||
1125 | template <typename SubPattern_t> struct Exact_match { | ||||
1126 | SubPattern_t SubPattern; | ||||
1127 | |||||
1128 | Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} | ||||
1129 | |||||
1130 | template <typename OpTy> bool match(OpTy *V) { | ||||
1131 | if (auto *PEO = dyn_cast<PossiblyExactOperator>(V)) | ||||
1132 | return PEO->isExact() && SubPattern.match(V); | ||||
1133 | return false; | ||||
1134 | } | ||||
1135 | }; | ||||
1136 | |||||
1137 | template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { | ||||
1138 | return SubPattern; | ||||
1139 | } | ||||
1140 | |||||
1141 | //===----------------------------------------------------------------------===// | ||||
1142 | // Matchers for CmpInst classes | ||||
1143 | // | ||||
1144 | |||||
1145 | template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, | ||||
1146 | bool Commutable = false> | ||||
1147 | struct CmpClass_match { | ||||
1148 | PredicateTy &Predicate; | ||||
1149 | LHS_t L; | ||||
1150 | RHS_t R; | ||||
1151 | |||||
1152 | // The evaluation order is always stable, regardless of Commutability. | ||||
1153 | // The LHS is always matched first. | ||||
1154 | CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) | ||||
1155 | : Predicate(Pred), L(LHS), R(RHS) {} | ||||
1156 | |||||
1157 | template <typename OpTy> bool match(OpTy *V) { | ||||
1158 | if (auto *I
| ||||
1159 | if ((L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || | ||||
1160 | (Commutable && L.match(I->getOperand(1)) && | ||||
1161 | R.match(I->getOperand(0)))) { | ||||
1162 | Predicate = I->getPredicate(); | ||||
1163 | return true; | ||||
1164 | } | ||||
1165 | return false; | ||||
1166 | } | ||||
1167 | }; | ||||
1168 | |||||
1169 | template <typename LHS, typename RHS> | ||||
1170 | inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> | ||||
1171 | m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||
1172 | return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); | ||||
1173 | } | ||||
1174 | |||||
1175 | template <typename LHS, typename RHS> | ||||
1176 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> | ||||
1177 | m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||
1178 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); | ||||
1179 | } | ||||
1180 | |||||
1181 | template <typename LHS, typename RHS> | ||||
1182 | inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> | ||||
1183 | m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||
1184 | return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); | ||||
1185 | } | ||||
1186 | |||||
1187 | //===----------------------------------------------------------------------===// | ||||
1188 | // Matchers for instructions with a given opcode and number of operands. | ||||
1189 | // | ||||
1190 | |||||
1191 | /// Matches instructions with Opcode and three operands. | ||||
1192 | template <typename T0, unsigned Opcode> struct OneOps_match { | ||||
1193 | T0 Op1; | ||||
1194 | |||||
1195 | OneOps_match(const T0 &Op1) : Op1(Op1) {} | ||||
1196 | |||||
1197 | template <typename OpTy> bool match(OpTy *V) { | ||||
1198 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||
1199 | auto *I = cast<Instruction>(V); | ||||
1200 | return Op1.match(I->getOperand(0)); | ||||
1201 | } | ||||
1202 | return false; | ||||
1203 | } | ||||
1204 | }; | ||||
1205 | |||||
1206 | /// Matches instructions with Opcode and three operands. | ||||
1207 | template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match { | ||||
1208 | T0 Op1; | ||||
1209 | T1 Op2; | ||||
1210 | |||||
1211 | TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {} | ||||
1212 | |||||
1213 | template <typename OpTy> bool match(OpTy *V) { | ||||
1214 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||
1215 | auto *I = cast<Instruction>(V); | ||||
1216 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)); | ||||
1217 | } | ||||
1218 | return false; | ||||
1219 | } | ||||
1220 | }; | ||||
1221 | |||||
1222 | /// Matches instructions with Opcode and three operands. | ||||
1223 | template <typename T0, typename T1, typename T2, unsigned Opcode> | ||||
1224 | struct ThreeOps_match { | ||||
1225 | T0 Op1; | ||||
1226 | T1 Op2; | ||||
1227 | T2 Op3; | ||||
1228 | |||||
1229 | ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3) | ||||
1230 | : Op1(Op1), Op2(Op2), Op3(Op3) {} | ||||
1231 | |||||
1232 | template <typename OpTy> bool match(OpTy *V) { | ||||
1233 | if (V->getValueID() == Value::InstructionVal + Opcode) { | ||||
1234 | auto *I = cast<Instruction>(V); | ||||
1235 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && | ||||
1236 | Op3.match(I->getOperand(2)); | ||||
1237 | } | ||||
1238 | return false; | ||||
1239 | } | ||||
1240 | }; | ||||
1241 | |||||
1242 | /// Matches SelectInst. | ||||
1243 | template <typename Cond, typename LHS, typename RHS> | ||||
1244 | inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> | ||||
1245 | m_Select(const Cond &C, const LHS &L, const RHS &R) { | ||||
1246 | return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R); | ||||
1247 | } | ||||
1248 | |||||
1249 | /// This matches a select of two constants, e.g.: | ||||
1250 | /// m_SelectCst<-1, 0>(m_Value(V)) | ||||
1251 | template <int64_t L, int64_t R, typename Cond> | ||||
1252 | inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>, | ||||
1253 | Instruction::Select> | ||||
1254 | m_SelectCst(const Cond &C) { | ||||
1255 | return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>()); | ||||
1256 | } | ||||
1257 | |||||
1258 | /// Matches FreezeInst. | ||||
1259 | template <typename OpTy> | ||||
1260 | inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) { | ||||
1261 | return OneOps_match<OpTy, Instruction::Freeze>(Op); | ||||
1262 | } | ||||
1263 | |||||
1264 | /// Matches InsertElementInst. | ||||
1265 | template <typename Val_t, typename Elt_t, typename Idx_t> | ||||
1266 | inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement> | ||||
1267 | m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) { | ||||
1268 | return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>( | ||||
1269 | Val, Elt, Idx); | ||||
1270 | } | ||||
1271 | |||||
1272 | /// Matches ExtractElementInst. | ||||
1273 | template <typename Val_t, typename Idx_t> | ||||
1274 | inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement> | ||||
1275 | m_ExtractElement(const Val_t &Val, const Idx_t &Idx) { | ||||
1276 | return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx); | ||||
1277 | } | ||||
1278 | |||||
1279 | /// Matches ShuffleVectorInst. | ||||
1280 | template <typename V1_t, typename V2_t, typename Mask_t> | ||||
1281 | inline ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector> | ||||
1282 | m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m) { | ||||
1283 | return ThreeOps_match<V1_t, V2_t, Mask_t, Instruction::ShuffleVector>(v1, v2, | ||||
1284 | m); | ||||
1285 | } | ||||
1286 | |||||
1287 | /// Matches LoadInst. | ||||
1288 | template <typename OpTy> | ||||
1289 | inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) { | ||||
1290 | return OneOps_match<OpTy, Instruction::Load>(Op); | ||||
1291 | } | ||||
1292 | |||||
1293 | /// Matches StoreInst. | ||||
1294 | template <typename ValueOpTy, typename PointerOpTy> | ||||
1295 | inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store> | ||||
1296 | m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { | ||||
1297 | return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp, | ||||
1298 | PointerOp); | ||||
1299 | } | ||||
1300 | |||||
1301 | //===----------------------------------------------------------------------===// | ||||
1302 | // Matchers for CastInst classes | ||||
1303 | // | ||||
1304 | |||||
1305 | template <typename Op_t, unsigned Opcode> struct CastClass_match { | ||||
1306 | Op_t Op; | ||||
1307 | |||||
1308 | CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {} | ||||
1309 | |||||
1310 | template <typename OpTy> bool match(OpTy *V) { | ||||
1311 | if (auto *O = dyn_cast<Operator>(V)) | ||||
1312 | return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); | ||||
1313 | return false; | ||||
1314 | } | ||||
1315 | }; | ||||
1316 | |||||
1317 | /// Matches BitCast. | ||||
1318 | template <typename OpTy> | ||||
1319 | inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) { | ||||
1320 | return CastClass_match<OpTy, Instruction::BitCast>(Op); | ||||
1321 | } | ||||
1322 | |||||
1323 | /// Matches PtrToInt. | ||||
1324 | template <typename OpTy> | ||||
1325 | inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { | ||||
1326 | return CastClass_match<OpTy, Instruction::PtrToInt>(Op); | ||||
1327 | } | ||||
1328 | |||||
1329 | /// Matches Trunc. | ||||
1330 | template <typename OpTy> | ||||
1331 | inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { | ||||
1332 | return CastClass_match<OpTy, Instruction::Trunc>(Op); | ||||
1333 | } | ||||
1334 | |||||
1335 | template <typename OpTy> | ||||
1336 | inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy> | ||||
1337 | m_TruncOrSelf(const OpTy &Op) { | ||||
1338 | return m_CombineOr(m_Trunc(Op), Op); | ||||
1339 | } | ||||
1340 | |||||
1341 | /// Matches SExt. | ||||
1342 | template <typename OpTy> | ||||
1343 | inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) { | ||||
1344 | return CastClass_match<OpTy, Instruction::SExt>(Op); | ||||
1345 | } | ||||
1346 | |||||
1347 | /// Matches ZExt. | ||||
1348 | template <typename OpTy> | ||||
1349 | inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) { | ||||
1350 | return CastClass_match<OpTy, Instruction::ZExt>(Op); | ||||
1351 | } | ||||
1352 | |||||
1353 | template <typename OpTy> | ||||
1354 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy> | ||||
1355 | m_ZExtOrSelf(const OpTy &Op) { | ||||
1356 | return m_CombineOr(m_ZExt(Op), Op); | ||||
1357 | } | ||||
1358 | |||||
1359 | template <typename OpTy> | ||||
1360 | inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy> | ||||
1361 | m_SExtOrSelf(const OpTy &Op) { | ||||
1362 | return m_CombineOr(m_SExt(Op), Op); | ||||
1363 | } | ||||
1364 | |||||
1365 | template <typename OpTy> | ||||
1366 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||
1367 | CastClass_match<OpTy, Instruction::SExt>> | ||||
1368 | m_ZExtOrSExt(const OpTy &Op) { | ||||
1369 | return m_CombineOr(m_ZExt(Op), m_SExt(Op)); | ||||
1370 | } | ||||
1371 | |||||
1372 | template <typename OpTy> | ||||
1373 | inline match_combine_or< | ||||
1374 | match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, | ||||
1375 | CastClass_match<OpTy, Instruction::SExt>>, | ||||
1376 | OpTy> | ||||
1377 | m_ZExtOrSExtOrSelf(const OpTy &Op) { | ||||
1378 | return m_CombineOr(m_ZExtOrSExt(Op), Op); | ||||
1379 | } | ||||
1380 | |||||
1381 | /// Matches UIToFP. | ||||
1382 | template <typename OpTy> | ||||
1383 | inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) { | ||||
1384 | return CastClass_match<OpTy, Instruction::UIToFP>(Op); | ||||
1385 | } | ||||
1386 | |||||
1387 | /// Matches SIToFP. | ||||
1388 | template <typename OpTy> | ||||
1389 | inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) { | ||||
1390 | return CastClass_match<OpTy, Instruction::SIToFP>(Op); | ||||
1391 | } | ||||
1392 | |||||
1393 | /// Matches FPTrunc | ||||
1394 | template <typename OpTy> | ||||
1395 | inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) { | ||||
1396 | return CastClass_match<OpTy, Instruction::FPTrunc>(Op); | ||||
1397 | } | ||||
1398 | |||||
1399 | /// Matches FPExt | ||||
1400 | template <typename OpTy> | ||||
1401 | inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { | ||||
1402 | return CastClass_match<OpTy, Instruction::FPExt>(Op); | ||||
1403 | } | ||||
1404 | |||||
1405 | //===----------------------------------------------------------------------===// | ||||
1406 | // Matchers for control flow. | ||||
1407 | // | ||||
1408 | |||||
1409 | struct br_match { | ||||
1410 | BasicBlock *&Succ; | ||||
1411 | |||||
1412 | br_match(BasicBlock *&Succ) : Succ(Succ) {} | ||||
1413 | |||||
1414 | template <typename OpTy> bool match(OpTy *V) { | ||||
1415 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||
1416 | if (BI->isUnconditional()) { | ||||
1417 | Succ = BI->getSuccessor(0); | ||||
1418 | return true; | ||||
1419 | } | ||||
1420 | return false; | ||||
1421 | } | ||||
1422 | }; | ||||
1423 | |||||
1424 | inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } | ||||
1425 | |||||
1426 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||
1427 | struct brc_match { | ||||
1428 | Cond_t Cond; | ||||
1429 | TrueBlock_t T; | ||||
1430 | FalseBlock_t F; | ||||
1431 | |||||
1432 | brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) | ||||
1433 | : Cond(C), T(t), F(f) {} | ||||
1434 | |||||
1435 | template <typename OpTy> bool match(OpTy *V) { | ||||
1436 | if (auto *BI = dyn_cast<BranchInst>(V)) | ||||
1437 | if (BI->isConditional() && Cond.match(BI->getCondition())) | ||||
1438 | return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); | ||||
1439 | return false; | ||||
1440 | } | ||||
1441 | }; | ||||
1442 | |||||
1443 | template <typename Cond_t> | ||||
1444 | inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>> | ||||
1445 | m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { | ||||
1446 | return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>( | ||||
1447 | C, m_BasicBlock(T), m_BasicBlock(F)); | ||||
1448 | } | ||||
1449 | |||||
1450 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> | ||||
1451 | inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t> | ||||
1452 | m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { | ||||
1453 | return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F); | ||||
1454 | } | ||||
1455 | |||||
1456 | //===----------------------------------------------------------------------===// | ||||
1457 | // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). | ||||
1458 | // | ||||
1459 | |||||
1460 | template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t, | ||||
1461 | bool Commutable = false> | ||||
1462 | struct MaxMin_match { | ||||
1463 | LHS_t L; | ||||
1464 | RHS_t R; | ||||
1465 | |||||
1466 | // The evaluation order is always stable, regardless of Commutability. | ||||
1467 | // The LHS is always matched first. | ||||
1468 | MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} | ||||
1469 | |||||
1470 | template <typename OpTy> bool match(OpTy *V) { | ||||
1471 | // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". | ||||
1472 | auto *SI = dyn_cast<SelectInst>(V); | ||||
1473 | if (!SI) | ||||
1474 | return false; | ||||
1475 | auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition()); | ||||
1476 | if (!Cmp) | ||||
1477 | return false; | ||||
1478 | // At this point we have a select conditioned on a comparison. Check that | ||||
1479 | // it is the values returned by the select that are being compared. | ||||
1480 | Value *TrueVal = SI->getTrueValue(); | ||||
1481 | Value *FalseVal = SI->getFalseValue(); | ||||
1482 | Value *LHS = Cmp->getOperand(0); | ||||
1483 | Value *RHS = Cmp->getOperand(1); | ||||
1484 | if ((TrueVal != LHS || FalseVal != RHS) && | ||||
1485 | (TrueVal != RHS || FalseVal != LHS)) | ||||
1486 | return false; | ||||
1487 | typename CmpInst_t::Predicate Pred = | ||||
1488 | LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); | ||||
1489 | // Does "(x pred y) ? x : y" represent the desired max/min operation? | ||||
1490 | if (!Pred_t::match(Pred)) | ||||
1491 | return false; | ||||
1492 | // It does! Bind the operands. | ||||
1493 | return (L.match(LHS) && R.match(RHS)) || | ||||
1494 | (Commutable && L.match(RHS) && R.match(LHS)); | ||||
1495 | } | ||||
1496 | }; | ||||
1497 | |||||
1498 | /// Helper class for identifying signed max predicates. | ||||
1499 | struct smax_pred_ty { | ||||
1500 | static bool match(ICmpInst::Predicate Pred) { | ||||
1501 | return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; | ||||
1502 | } | ||||
1503 | }; | ||||
1504 | |||||
1505 | /// Helper class for identifying signed min predicates. | ||||
1506 | struct smin_pred_ty { | ||||
1507 | static bool match(ICmpInst::Predicate Pred) { | ||||
1508 | return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE; | ||||
1509 | } | ||||
1510 | }; | ||||
1511 | |||||
1512 | /// Helper class for identifying unsigned max predicates. | ||||
1513 | struct umax_pred_ty { | ||||
1514 | static bool match(ICmpInst::Predicate Pred) { | ||||
1515 | return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE; | ||||
1516 | } | ||||
1517 | }; | ||||
1518 | |||||
1519 | /// Helper class for identifying unsigned min predicates. | ||||
1520 | struct umin_pred_ty { | ||||
1521 | static bool match(ICmpInst::Predicate Pred) { | ||||
1522 | return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE; | ||||
1523 | } | ||||
1524 | }; | ||||
1525 | |||||
1526 | /// Helper class for identifying ordered max predicates. | ||||
1527 | struct ofmax_pred_ty { | ||||
1528 | static bool match(FCmpInst::Predicate Pred) { | ||||
1529 | return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE; | ||||
1530 | } | ||||
1531 | }; | ||||
1532 | |||||
1533 | /// Helper class for identifying ordered min predicates. | ||||
1534 | struct ofmin_pred_ty { | ||||
1535 | static bool match(FCmpInst::Predicate Pred) { | ||||
1536 | return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE; | ||||
1537 | } | ||||
1538 | }; | ||||
1539 | |||||
1540 | /// Helper class for identifying unordered max predicates. | ||||
1541 | struct ufmax_pred_ty { | ||||
1542 | static bool match(FCmpInst::Predicate Pred) { | ||||
1543 | return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE; | ||||
1544 | } | ||||
1545 | }; | ||||
1546 | |||||
1547 | /// Helper class for identifying unordered min predicates. | ||||
1548 | struct ufmin_pred_ty { | ||||
1549 | static bool match(FCmpInst::Predicate Pred) { | ||||
1550 | return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE; | ||||
1551 | } | ||||
1552 | }; | ||||
1553 | |||||
1554 | template <typename LHS, typename RHS> | ||||
1555 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L, | ||||
1556 | const RHS &R) { | ||||
1557 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R); | ||||
1558 | } | ||||
1559 | |||||
1560 | template <typename LHS, typename RHS> | ||||
1561 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L, | ||||
1562 | const RHS &R) { | ||||
1563 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R); | ||||
1564 | } | ||||
1565 | |||||
1566 | template <typename LHS, typename RHS> | ||||
1567 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L, | ||||
1568 | const RHS &R) { | ||||
1569 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R); | ||||
1570 | } | ||||
1571 | |||||
1572 | template <typename LHS, typename RHS> | ||||
1573 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, | ||||
1574 | const RHS &R) { | ||||
1575 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); | ||||
1576 | } | ||||
1577 | |||||
1578 | /// Match an 'ordered' floating point maximum function. | ||||
1579 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||
1580 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||
1581 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||
1582 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||
1583 | /// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate. | ||||
1584 | /// | ||||
1585 | /// max(L, R) iff L and R are not NaN | ||||
1586 | /// m_OrdFMax(L, R) = R iff L or R are NaN | ||||
1587 | template <typename LHS, typename RHS> | ||||
1588 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L, | ||||
1589 | const RHS &R) { | ||||
1590 | return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R); | ||||
1591 | } | ||||
1592 | |||||
1593 | /// Match an 'ordered' floating point minimum function. | ||||
1594 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||
1595 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||
1596 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||
1597 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||
1598 | /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. | ||||
1599 | /// | ||||
1600 | /// min(L, R) iff L and R are not NaN | ||||
1601 | /// m_OrdFMin(L, R) = R iff L or R are NaN | ||||
1602 | template <typename LHS, typename RHS> | ||||
1603 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L, | ||||
1604 | const RHS &R) { | ||||
1605 | return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R); | ||||
1606 | } | ||||
1607 | |||||
1608 | /// Match an 'unordered' floating point maximum function. | ||||
1609 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||
1610 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||
1611 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' | ||||
1612 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||
1613 | /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. | ||||
1614 | /// | ||||
1615 | /// max(L, R) iff L and R are not NaN | ||||
1616 | /// m_UnordFMax(L, R) = L iff L or R are NaN | ||||
1617 | template <typename LHS, typename RHS> | ||||
1618 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty> | ||||
1619 | m_UnordFMax(const LHS &L, const RHS &R) { | ||||
1620 | return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R); | ||||
1621 | } | ||||
1622 | |||||
1623 | /// Match an 'unordered' floating point minimum function. | ||||
1624 | /// Floating point has one special value 'NaN'. Therefore, there is no total | ||||
1625 | /// order. However, if we can ignore the 'NaN' value (for example, because of a | ||||
1626 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' | ||||
1627 | /// semantics. In the presence of 'NaN' we have to preserve the original | ||||
1628 | /// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. | ||||
1629 | /// | ||||
1630 | /// min(L, R) iff L and R are not NaN | ||||
1631 | /// m_UnordFMin(L, R) = L iff L or R are NaN | ||||
1632 | template <typename LHS, typename RHS> | ||||
1633 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty> | ||||
1634 | m_UnordFMin(const LHS &L, const RHS &R) { | ||||
1635 | return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R); | ||||
1636 | } | ||||
1637 | |||||
1638 | //===----------------------------------------------------------------------===// | ||||
1639 | // Matchers for overflow check patterns: e.g. (a + b) u< a | ||||
1640 | // | ||||
1641 | |||||
1642 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||
1643 | struct UAddWithOverflow_match { | ||||
1644 | LHS_t L; | ||||
1645 | RHS_t R; | ||||
1646 | Sum_t S; | ||||
1647 | |||||
1648 | UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) | ||||
1649 | : L(L), R(R), S(S) {} | ||||
1650 | |||||
1651 | template <typename OpTy> bool match(OpTy *V) { | ||||
1652 | Value *ICmpLHS, *ICmpRHS; | ||||
1653 | ICmpInst::Predicate Pred; | ||||
1654 | if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) | ||||
1655 | return false; | ||||
1656 | |||||
1657 | Value *AddLHS, *AddRHS; | ||||
1658 | auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); | ||||
1659 | |||||
1660 | // (a + b) u< a, (a + b) u< b | ||||
1661 | if (Pred == ICmpInst::ICMP_ULT) | ||||
1662 | if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) | ||||
1663 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||
1664 | |||||
1665 | // a >u (a + b), b >u (a + b) | ||||
1666 | if (Pred == ICmpInst::ICMP_UGT) | ||||
1667 | if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) | ||||
1668 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||
1669 | |||||
1670 | // Match special-case for increment-by-1. | ||||
1671 | if (Pred == ICmpInst::ICMP_EQ) { | ||||
1672 | // (a + 1) == 0 | ||||
1673 | // (1 + a) == 0 | ||||
1674 | if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && | ||||
1675 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||
1676 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); | ||||
1677 | // 0 == (a + 1) | ||||
1678 | // 0 == (1 + a) | ||||
1679 | if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && | ||||
1680 | (m_One().match(AddLHS) || m_One().match(AddRHS))) | ||||
1681 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); | ||||
1682 | } | ||||
1683 | |||||
1684 | return false; | ||||
1685 | } | ||||
1686 | }; | ||||
1687 | |||||
1688 | /// Match an icmp instruction checking for unsigned overflow on addition. | ||||
1689 | /// | ||||
1690 | /// S is matched to the addition whose result is being checked for overflow, and | ||||
1691 | /// L and R are matched to the LHS and RHS of S. | ||||
1692 | template <typename LHS_t, typename RHS_t, typename Sum_t> | ||||
1693 | UAddWithOverflow_match<LHS_t, RHS_t, Sum_t> | ||||
1694 | m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { | ||||
1695 | return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S); | ||||
1696 | } | ||||
1697 | |||||
1698 | template <typename Opnd_t> struct Argument_match { | ||||
1699 | unsigned OpI; | ||||
1700 | Opnd_t Val; | ||||
1701 | |||||
1702 | Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} | ||||
1703 | |||||
1704 | template <typename OpTy> bool match(OpTy *V) { | ||||
1705 | // FIXME: Should likely be switched to use `CallBase`. | ||||
1706 | if (const auto *CI = dyn_cast<CallInst>(V)) | ||||
1707 | return Val.match(CI->getArgOperand(OpI)); | ||||
1708 | return false; | ||||
1709 | } | ||||
1710 | }; | ||||
1711 | |||||
1712 | /// Match an argument. | ||||
1713 | template <unsigned OpI, typename Opnd_t> | ||||
1714 | inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) { | ||||
1715 | return Argument_match<Opnd_t>(OpI, Op); | ||||
1716 | } | ||||
1717 | |||||
1718 | /// Intrinsic matchers. | ||||
1719 | struct IntrinsicID_match { | ||||
1720 | unsigned ID; | ||||
1721 | |||||
1722 | IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} | ||||
1723 | |||||
1724 | template <typename OpTy> bool match(OpTy *V) { | ||||
1725 | if (const auto *CI = dyn_cast<CallInst>(V)) | ||||
1726 | if (const auto *F = CI->getCalledFunction()) | ||||
1727 | return F->getIntrinsicID() == ID; | ||||
1728 | return false; | ||||
1729 | } | ||||
1730 | }; | ||||
1731 | |||||
1732 | /// Intrinsic matches are combinations of ID matchers, and argument | ||||
1733 | /// matchers. Higher arity matcher are defined recursively in terms of and-ing | ||||
1734 | /// them with lower arity matchers. Here's some convenient typedefs for up to | ||||
1735 | /// several arguments, and more can be added as needed | ||||
1736 | template <typename T0 = void, typename T1 = void, typename T2 = void, | ||||
1737 | typename T3 = void, typename T4 = void, typename T5 = void, | ||||
1738 | typename T6 = void, typename T7 = void, typename T8 = void, | ||||
1739 | typename T9 = void, typename T10 = void> | ||||
1740 | struct m_Intrinsic_Ty; | ||||
1741 | template <typename T0> struct m_Intrinsic_Ty<T0> { | ||||
1742 | using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>; | ||||
1743 | }; | ||||
1744 | template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> { | ||||
1745 | using Ty = | ||||
1746 | match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>; | ||||
1747 | }; | ||||
1748 | template <typename T0, typename T1, typename T2> | ||||
1749 | struct m_Intrinsic_Ty<T0, T1, T2> { | ||||
1750 | using Ty = | ||||
1751 | match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty, | ||||
1752 | Argument_match<T2>>; | ||||
1753 | }; | ||||
1754 | template <typename T0, typename T1, typename T2, typename T3> | ||||
1755 | struct m_Intrinsic_Ty<T0, T1, T2, T3> { | ||||
1756 | using Ty = | ||||
1757 | match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty, | ||||
1758 | Argument_match<T3>>; | ||||
1759 | }; | ||||
1760 | |||||
1761 | template <typename T0, typename T1, typename T2, typename T3, typename T4> | ||||
1762 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> { | ||||
1763 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty, | ||||
1764 | Argument_match<T4>>; | ||||
1765 | }; | ||||
1766 | |||||
1767 | /// Match intrinsic calls like this: | ||||
1768 | /// m_Intrinsic<Intrinsic::fabs>(m_Value(X)) | ||||
1769 | template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() { | ||||
1770 | return IntrinsicID_match(IntrID); | ||||
1771 | } | ||||
1772 | |||||
1773 | template <Intrinsic::ID IntrID, typename T0> | ||||
1774 | inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) { | ||||
1775 | return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0)); | ||||
1776 | } | ||||
1777 | |||||
1778 | template <Intrinsic::ID IntrID, typename T0, typename T1> | ||||
1779 | inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0, | ||||
1780 | const T1 &Op1) { | ||||
1781 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1)); | ||||
1782 | } | ||||
1783 | |||||
1784 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2> | ||||
1785 | inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty | ||||
1786 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) { | ||||
1787 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2)); | ||||
1788 | } | ||||
1789 | |||||
1790 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||
1791 | typename T3> | ||||
1792 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty | ||||
1793 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { | ||||
1794 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3)); | ||||
1795 | } | ||||
1796 | |||||
1797 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, | ||||
1798 | typename T3, typename T4> | ||||
1799 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty | ||||
1800 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, | ||||
1801 | const T4 &Op4) { | ||||
1802 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3), | ||||
1803 | m_Argument<4>(Op4)); | ||||
1804 | } | ||||
1805 | |||||
1806 | // Helper intrinsic matching specializations. | ||||
1807 | template <typename Opnd0> | ||||
1808 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) { | ||||
1809 | return m_Intrinsic<Intrinsic::bitreverse>(Op0); | ||||
1810 | } | ||||
1811 | |||||
1812 | template <typename Opnd0> | ||||
1813 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) { | ||||
1814 | return m_Intrinsic<Intrinsic::bswap>(Op0); | ||||
1815 | } | ||||
1816 | |||||
1817 | template <typename Opnd0> | ||||
1818 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) { | ||||
1819 | return m_Intrinsic<Intrinsic::fabs>(Op0); | ||||
1820 | } | ||||
1821 | |||||
1822 | template <typename Opnd0> | ||||
1823 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) { | ||||
1824 | return m_Intrinsic<Intrinsic::canonicalize>(Op0); | ||||
1825 | } | ||||
1826 | |||||
1827 | template <typename Opnd0, typename Opnd1> | ||||
1828 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0, | ||||
1829 | const Opnd1 &Op1) { | ||||
1830 | return m_Intrinsic<Intrinsic::minnum>(Op0, Op1); | ||||
1831 | } | ||||
1832 | |||||
1833 | template <typename Opnd0, typename Opnd1> | ||||
1834 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, | ||||
1835 | const Opnd1 &Op1) { | ||||
1836 | return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); | ||||
1837 | } | ||||
1838 | |||||
1839 | //===----------------------------------------------------------------------===// | ||||
1840 | // Matchers for two-operands operators with the operators in either order | ||||
1841 | // | ||||
1842 | |||||
1843 | /// Matches a BinaryOperator with LHS and RHS in either order. | ||||
1844 | template <typename LHS, typename RHS> | ||||
1845 | inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { | ||||
1846 | return AnyBinaryOp_match<LHS, RHS, true>(L, R); | ||||
1847 | } | ||||
1848 | |||||
1849 | /// Matches an ICmp with a predicate over LHS and RHS in either order. | ||||
1850 | /// Does not swap the predicate. | ||||
1851 | template <typename LHS, typename RHS> | ||||
1852 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> | ||||
1853 | m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { | ||||
1854 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, | ||||
| |||||
| |||||
1855 | R); | ||||
1856 | } | ||||
1857 | |||||
1858 | /// Matches a Add with LHS and RHS in either order. | ||||
1859 | template <typename LHS, typename RHS> | ||||
1860 | inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, | ||||
1861 | const RHS &R) { | ||||
1862 | return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R); | ||||
1863 | } | ||||
1864 | |||||
1865 | /// Matches a Mul with LHS and RHS in either order. | ||||
1866 | template <typename LHS, typename RHS> | ||||
1867 | inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L, | ||||
1868 | const RHS &R) { | ||||
1869 | return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R); | ||||
1870 | } | ||||
1871 | |||||
1872 | /// Matches an And with LHS and RHS in either order. | ||||
1873 | template <typename LHS, typename RHS> | ||||
1874 | inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L, | ||||
1875 | const RHS &R) { | ||||
1876 | return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R); | ||||
1877 | } | ||||
1878 | |||||
1879 | /// Matches an Or with LHS and RHS in either order. | ||||
1880 | template <typename LHS, typename RHS> | ||||
1881 | inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L, | ||||
1882 | const RHS &R) { | ||||
1883 | return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R); | ||||
1884 | } | ||||
1885 | |||||
1886 | /// Matches an Xor with LHS and RHS in either order. | ||||
1887 | template <typename LHS, typename RHS> | ||||
1888 | inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L, | ||||
1889 | const RHS &R) { | ||||
1890 | return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R); | ||||
1891 | } | ||||
1892 | |||||
1893 | /// Matches a 'Neg' as 'sub 0, V'. | ||||
1894 | template <typename ValTy> | ||||
1895 | inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub> | ||||
1896 | m_Neg(const ValTy &V) { | ||||
1897 | return m_Sub(m_ZeroInt(), V); | ||||
1898 | } | ||||
1899 | |||||
1900 | /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. | ||||
1901 | template <typename ValTy> | ||||
1902 | inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> | ||||
1903 | m_Not(const ValTy &V) { | ||||
1904 | return m_c_Xor(V, m_AllOnes()); | ||||
1905 | } | ||||
1906 | |||||
1907 | /// Matches an SMin with LHS and RHS in either order. | ||||
1908 | template <typename LHS, typename RHS> | ||||
1909 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> | ||||
1910 | m_c_SMin(const LHS &L, const RHS &R) { | ||||
1911 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R); | ||||
1912 | } | ||||
1913 | /// Matches an SMax with LHS and RHS in either order. | ||||
1914 | template <typename LHS, typename RHS> | ||||
1915 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true> | ||||
1916 | m_c_SMax(const LHS &L, const RHS &R) { | ||||
1917 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R); | ||||
1918 | } | ||||
1919 | /// Matches a UMin with LHS and RHS in either order. | ||||
1920 | template <typename LHS, typename RHS> | ||||
1921 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true> | ||||
1922 | m_c_UMin(const LHS &L, const RHS &R) { | ||||
1923 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R); | ||||
1924 | } | ||||
1925 | /// Matches a UMax with LHS and RHS in either order. | ||||
1926 | template <typename LHS, typename RHS> | ||||
1927 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true> | ||||
1928 | m_c_UMax(const LHS &L, const RHS &R) { | ||||
1929 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); | ||||
1930 | } | ||||
1931 | |||||
1932 | /// Matches FAdd with LHS and RHS in either order. | ||||
1933 | template <typename LHS, typename RHS> | ||||
1934 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> | ||||
1935 | m_c_FAdd(const LHS &L, const RHS &R) { | ||||
1936 | return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R); | ||||
1937 | } | ||||
1938 | |||||
1939 | /// Matches FMul with LHS and RHS in either order. | ||||
1940 | template <typename LHS, typename RHS> | ||||
1941 | inline BinaryOp_match<LHS, RHS, Instruction::FMul, true> | ||||
1942 | m_c_FMul(const LHS &L, const RHS &R) { | ||||
1943 | return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R); | ||||
1944 | } | ||||
1945 | |||||
1946 | template <typename Opnd_t> struct Signum_match { | ||||
1947 | Opnd_t Val; | ||||
1948 | Signum_match(const Opnd_t &V) : Val(V) {} | ||||
1949 | |||||
1950 | template <typename OpTy> bool match(OpTy *V) { | ||||
1951 | unsigned TypeSize = V->getType()->getScalarSizeInBits(); | ||||
1952 | if (TypeSize == 0) | ||||
1953 | return false; | ||||
1954 | |||||
1955 | unsigned ShiftWidth = TypeSize - 1; | ||||
1956 | Value *OpL = nullptr, *OpR = nullptr; | ||||
1957 | |||||
1958 | // This is the representation of signum we match: | ||||
1959 | // | ||||
1960 | // signum(x) == (x >> 63) | (-x >>u 63) | ||||
1961 | // | ||||
1962 | // An i1 value is its own signum, so it's correct to match | ||||
1963 | // | ||||
1964 | // signum(x) == (x >> 0) | (-x >>u 0) | ||||
1965 | // | ||||
1966 | // for i1 values. | ||||
1967 | |||||
1968 | auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); | ||||
1969 | auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); | ||||
1970 | auto Signum = m_Or(LHS, RHS); | ||||
1971 | |||||
1972 | return Signum.match(V) && OpL == OpR && Val.match(OpL); | ||||
1973 | } | ||||
1974 | }; | ||||
1975 | |||||
1976 | /// Matches a signum pattern. | ||||
1977 | /// | ||||
1978 | /// signum(x) = | ||||
1979 | /// x > 0 -> 1 | ||||
1980 | /// x == 0 -> 0 | ||||
1981 | /// x < 0 -> -1 | ||||
1982 | template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) { | ||||
1983 | return Signum_match<Val_t>(V); | ||||
1984 | } | ||||
1985 | |||||
1986 | template <int Ind, typename Opnd_t> struct ExtractValue_match { | ||||
1987 | Opnd_t Val; | ||||
1988 | ExtractValue_match(const Opnd_t &V) : Val(V) {} | ||||
1989 | |||||
1990 | template <typename OpTy> bool match(OpTy *V) { | ||||
1991 | if (auto *I = dyn_cast<ExtractValueInst>(V)) | ||||
1992 | return I->getNumIndices() == 1 && I->getIndices()[0] == Ind && | ||||
1993 | Val.match(I->getAggregateOperand()); | ||||
1994 | return false; | ||||
1995 | } | ||||
1996 | }; | ||||
1997 | |||||
1998 | /// Match a single index ExtractValue instruction. | ||||
1999 | /// For example m_ExtractValue<1>(...) | ||||
2000 | template <int Ind, typename Val_t> | ||||
2001 | inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { | ||||
2002 | return ExtractValue_match<Ind, Val_t>(V); | ||||
2003 | } | ||||
2004 | |||||
2005 | } // end namespace PatternMatch | ||||
2006 | } // end namespace llvm | ||||
2007 | |||||
2008 | #endif // LLVM_IR_PATTERNMATCH_H |